- [nexus] Development

GitOrigin-RevId: ccfd55db266862ed70f1299aaf62500765b03cc4
2024-12-23 18:17:45 +01:00 · 2022-09-06 19:33:57 +03:00 · 2022-09-06 19:33:57 +03:00 · 7c1bb06b1b
commit 7c1bb06b1b
parent 51ae1fc5d9
42 changed files with 300 additions and 322 deletions
--- a/library/telegram/base.py
+++ b/library/telegram/base.py
@ -39,6 +39,7 @@ class BaseTelegramClient(AioThing):
        bot_token: Optional[str] = None,
        mtproxy: Optional[dict] = None,
        flood_sleep_threshold: int = 60,
+        catch_up: bool = False,
    ):
        super().__init__()
        if not app_id or not app_hash:
@ -50,6 +51,7 @@ class BaseTelegramClient(AioThing):
            self._get_session(database),
            app_id,
            app_hash,
+            catch_up=catch_up,
            flood_sleep_threshold=flood_sleep_threshold,
            **self._get_proxy(mtproxy=mtproxy),
        )
--- a/library/telegram/promotioner.py
+++ b/library/telegram/promotioner.py
@ -5,9 +5,17 @@ class Promotioner:
    """
    Promotioner is used to select promotion randomly based on weights of every promotion.
    """
-    def __init__(self, promotions: list[dict], default_promotion_index: int = 0):
+    def __init__(
+        self,
+        promotions: list[dict],
+        default_promotion_index: int = 0,
+        promotion_vars: dict = None,
+    ):
        self.promotions = promotions
        self.default_promotion_index = default_promotion_index
+        if not promotion_vars:
+            promotion_vars = {}
+        self.promotion_vars = promotion_vars
        self.partial_sums: list = [self.promotions[0]['weight']]
        for promotion in self.promotions[1:]:
            self.partial_sums.append(promotion['weight'] + self.partial_sums[-1])
@ -18,11 +26,11 @@ class Promotioner:
            if partial_sum <= pivot:
                continue
            if language in promotion['texts']:
-                return promotion['texts'][language]
+                return promotion['texts'][language].format(**self.promotion_vars)
            elif promotion.get('local', False):
                default_promotion = self.promotions[self.default_promotion_index]
                if language in default_promotion['texts']:
-                    return default_promotion['texts'][language]
-                return default_promotion['texts']['en']
+                    return default_promotion['texts'][language].format(**self.promotion_vars)
+                return default_promotion['texts']['en'].format(**self.promotion_vars)
            else:
-                return promotion['texts']['en']
+                return promotion['texts']['en'].format(**self.promotion_vars)
--- a/library/telegram/utils.py
+++ b/library/telegram/utils.py
@ -19,6 +19,7 @@ async def safe_execution(
    error_log=error_log,
    on_fail: Optional[Callable[[], Awaitable]] = None,
    level=logging.WARNING,
+    is_logging_enabled: bool = True
 ):
    try:
        try:
@ -34,13 +35,17 @@ async def safe_execution(
            errors.MessageIdInvalidError,
            errors.ChatAdminRequiredError,
        ) as e:
-            error_log(e, level=level)
+            if is_logging_enabled:
+                error_log(e, level=level)
+                traceback.print_exc()
        except Exception as e:
-            error_log(e, level=level)
-            traceback.print_exc()
+            if is_logging_enabled:
+                error_log(e, level=level)
+                traceback.print_exc()
            if on_fail:
                await on_fail()
    except events.StopPropagation:
        raise
    except Exception as e:
-        error_log(e, level=level)
+        if is_logging_enabled:
+            error_log(e, level=level)
--- a/nexus/actions/crossref_api.py
+++ b/nexus/actions/crossref_api.py
@ -76,6 +76,11 @@ def clean_issns(issns):
        return cleaned_issns


+def clean_isbns(isbns):
+    if isbns:
+        return isbns
+
+
 def extract_title(title, subtitle):
    return ': '.join(filter(lambda x: bool(x), [title.strip(), subtitle.strip()]))

@ -90,6 +95,7 @@ class ToScimagPbAction(BaseAction):
            doi=item['DOI'],
            issue=item.get('issue'),
            issns=clean_issns(item.get('ISSN')),
+            isbns=clean_isbns(item.get('ISBN')),
            language=item.get('language'),
            referenced_by_count=item.get('is-referenced-by-count'),
            references=extract_references(item.get('reference')),
--- a/nexus/bot/BUILD.bazel
+++ b/nexus/bot/BUILD.bazel
@ -42,6 +42,8 @@ py3_image(
        "//nexus/hub/aioclient",
        "//nexus/meta_api/aioclient",
        "//nexus/models/proto:proto_py",
+        "//nexus/promotions",
+        "//nexus/translations",
        "//nexus/views/telegram",
        requirement("izihawa_nlptools"),
        requirement("izihawa_utils"),
--- a/nexus/bot/application.py
+++ b/nexus/bot/application.py
@ -2,10 +2,11 @@ from aiokit import AioRootThing
 from idm.api.aioclient import IdmApiGrpcClient
 from izihawa_utils.importlib import import_object
 from library.telegram.base import BaseTelegramClient
-from nexus.bot.promotioner import Promotioner
+from library.telegram.promotioner import Promotioner
 from nexus.bot.user_manager import UserManager
 from nexus.hub.aioclient import HubGrpcClient
 from nexus.meta_api.aioclient import MetaApiGrpcClient
+from nexus.promotions import get_promotions


 class TelegramApplication(AioRootThing):
@ -18,6 +19,7 @@ class TelegramApplication(AioRootThing):
            bot_token=self.config['telegram']['bot_token'],
            database=self.config['telegram'].get('database'),
            mtproxy=self.config['telegram'].get('mtproxy'),
+            catch_up=self.config['telegram'].get('catch_up', False)
        )
        self.hub_client = HubGrpcClient(endpoint=self.config['hub']['endpoint'])
        self.starts.append(self.hub_client)
@ -28,7 +30,13 @@ class TelegramApplication(AioRootThing):
        self.meta_api_client = MetaApiGrpcClient(endpoint=self.config['meta_api']['endpoint'])
        self.starts.append(self.meta_api_client)

-        self.promotioner = Promotioner(promotions=self.config['promotions'])
+        self.promotioner = Promotioner(
+            promotions=get_promotions(),
+            promotion_vars=dict(
+                related_channel=self.config['telegram']['related_channel'],
+                twitter_contact_url=self.config['twitter']['contact_url'],
+            )
+        )
        self.user_manager = UserManager()
        self._handlers = []

--- a/nexus/bot/configs/init.py
+++ b/nexus/bot/configs/init.py
@ -7,7 +7,6 @@ def get_config():
        'nexus/bot/configs/base.yaml',
        'nexus/bot/configs/%s.yaml?' % env.type,
        'nexus/bot/configs/logging.yaml',
-        'nexus/bot/configs/promotions.yaml',
    ], env_prefix='NEXUS_BOT')


--- a/nexus/bot/configs/promotions.yaml
+++ b/nexus/bot/configs/promotions.yaml
@ -1,36 +0,0 @@
---
-
-promotions:
-  - texts:
-      en: 💬 The victory of humanity is inevitable
-    weight: 1
-  - texts:
-      en: 💬 Shall build Standard Template Construct
-    weight: 1
-  - texts:
-      en: 💬 Gaining knowledge is the only purpose of life
-    weight: 1
-  - texts:
-      en: 💬 Knowledge cannot belong
-    weight: 1
-  - texts:
-      en: 💬 Obey the path of discovery
-    weight: 1
-  - texts:
-      en: 💬 Research is the only and ultimate goal
-    weight: 1
-  - texts:
-      en: ✋ Have a subscription to paid articles? [Help researchers!](https://t.me/nexus_aaron)
-      ru: ✋ Есть доступ к платным статьям? [Помоги ученым!](https://t.me/nexus_aaron)
-    weight: 25
-  - texts:
-      en: ✋ Help us, become a seeder of books. Learn how in /seed
-      ru: ✋ Сохрани наследие, раздавай книги нуждающимся. Узнай как в /seed
-    weight: 25
-  - texts:
-      en: ⤴️ Stay tuned with us at @{related_channel} and [Twitter]({twitter_contact_url})
-      es: ⤴️ Mantente en contacto con nosotros en @{related_channel} y [Twitter]({twitter_contact_url})
-      it: ⤴️ Resta aggiornato con noi su @{related_channel} e [Twitter]({twitter_contact_url})
-      pb: ⤴️ Fique ligado conosco em @{related_channel} e [Twitter]({twitter_contact_url})
-      ru: ⤴️ Оставайся на связи с нами на @{related_channel} и в [Twitter]({twitter_contact_url})
-    weight: 25
--- a/nexus/bot/handlers/close.py
+++ b/nexus/bot/handlers/close.py
@ -2,6 +2,7 @@ import asyncio
 import time

 from library.telegram.base import RequestContext
+from library.telegram.utils import safe_execution
 from nexus.translations import t
 from telethon import events

@ -36,5 +37,6 @@ class CloseHandler(BaseCallbackQueryHandler):
                target_events.append(reply_message.delete())
            target_events.append(message.delete())
        else:
-            target_events.append(event.answer(t('DELETION_FORBIDDEN_DUE_TO_AGE')))
+            async with safe_execution(is_logging_enabled=False):
+                await event.answer(t('DELETION_FORBIDDEN_DUE_TO_AGE'))
        await asyncio.gather(*target_events)
--- a/nexus/bot/handlers/download.py
+++ b/nexus/bot/handlers/download.py
@ -1,4 +1,5 @@
 from library.telegram.base import RequestContext
+from library.telegram.utils import safe_execution
 from nexus.hub.proto.delivery_service_pb2 import \
    StartDeliveryResponse as StartDeliveryResponsePb
 from nexus.translations import t
@ -45,14 +46,16 @@ class DownloadHandler(BaseCallbackQueryHandler):
            bot_name=request_context.bot_name,
        )
        if start_delivery_response_pb.status == StartDeliveryResponsePb.Status.ALREADY_DOWNLOADING:
-            await event.answer(
-                f'{t("ALREADY_DOWNLOADING", request_context.chat.language)}',
-            )
+            async with safe_execution(is_logging_enabled=False):
+                await event.answer(
+                    f'{t("ALREADY_DOWNLOADING", request_context.chat.language)}',
+                )
            await remove_button(event, '⬇️', and_empty_too=True)
        elif start_delivery_response_pb.status == StartDeliveryResponsePb.Status.TOO_MANY_DOWNLOADS:
-            await event.answer(
-                f'{t("TOO_MANY_DOWNLOADS", request_context.chat.language)}',
-            )
+            async with safe_execution(is_logging_enabled=False):
+                await event.answer(
+                    f'{t("TOO_MANY_DOWNLOADS", request_context.chat.language)}',
+                )
        else:
            await remove_button(event, '⬇️', and_empty_too=True)
            self.application.user_manager.last_widget[request_context.chat.chat_id] = None
--- a/nexus/bot/handlers/profile.py
+++ b/nexus/bot/handlers/profile.py
@ -61,7 +61,7 @@ class ProfileHandler(BaseHandler):
            target_events.append(profile_reply_message.reply(rendered_widget, buttons=buttons, link_preview=False))
        else:
            target_events.append(event.reply(rendered_widget, buttons=buttons, link_preview=False))
-        return asyncio.gather(*target_events)
+        return await asyncio.gather(*target_events)


 class DigestHandler(BaseHandler):
@ -114,6 +114,7 @@ class DigestHandler(BaseHandler):
            document_holders=document_holders,
            bot_name=bot_name,
            header='✨ Nexus Discovery ✨',
+            promotioner=self.application.promotioner,
        )

        view, buttons = await document_list_widget.render()
--- a/nexus/bot/handlers/roll.py
+++ b/nexus/bot/handlers/roll.py
@ -39,10 +39,7 @@ class RollHandler(BaseHandler):

        if random_documents:
            holder = BaseHolder.create_from_document(random_documents[0])
-            promo = self.application.promotioner.choose_promotion(language).format(
-                related_channel=self.application.config['telegram']['related_channel'],
-                twitter_contact_url=self.application.config['twitter']['contact_url'],
-            )
+            promo = self.application.promotioner.choose_promotion(language)
            view = holder.view_builder(language).add_view(bot_name=bot_name).add_new_line(2).add(promo, escaped=True).build()
            buttons_builder = holder.buttons_builder(language)

@ -54,5 +51,5 @@ class RollHandler(BaseHandler):

            request_context.statbox(action='show', duration=time.time() - start_time)
            await event.respond(view, buttons=buttons_builder.build())
-        async with safe_execution(error_log=request_context.error_log, level=logging.DEBUG):
+        async with safe_execution(is_logging_enabled=False):
            await event.delete()
--- a/nexus/bot/handlers/search.py
+++ b/nexus/bot/handlers/search.py
@ -210,6 +210,7 @@ class SearchEditHandler(BaseSearchHandler):

        if request_context.is_group_mode() and not search_prefix:
            return
+
        if request_context.is_personal_mode() and search_prefix:
            query = event.raw_text

@ -288,7 +289,7 @@ class SearchPagingHandler(BaseCallbackQueryHandler):
        )

        serp, buttons = await search_widget.render(message_id=message_id)
-        return await asyncio.gather(
-            event.answer(),
-            message.edit(serp, buttons=buttons, link_preview=False)
-        )
+
+        await message.edit(serp, buttons=buttons, link_preview=False)
+        async with safe_execution(is_logging_enabled=False):
+            await event.answer()
--- a/nexus/bot/handlers/submit.py
+++ b/nexus/bot/handlers/submit.py
@ -47,13 +47,13 @@ class SubmitHandler(BaseHandler):
            reply_to = reply_message.id

        doi_hint = self.get_doi_hint(message=message, reply_message=reply_message)
-        doi_hint_priority = '⚡' in message.raw_text
+        skip_analysis = '⚡️' in message.raw_text
        user_id = message.sender_id
        request_context.statbox(
            action='analyzed',
            mode='submit',
            doi_hint=doi_hint,
-            doi_hint_priority=doi_hint_priority,
+            skip_analysis=skip_analysis,
            reply_to=reply_to,
        )

@ -71,12 +71,12 @@ class SubmitHandler(BaseHandler):
                    request_id=request_context.request_id,
                    session_id=session_id,
                    doi_hint=doi_hint,
-                    doi_hint_priority=doi_hint_priority,
+                    skip_analysis=skip_analysis,
                    uploader_id=user_id,
                )
            case 'application/zip':
-                try:
-                    if request_context.is_personal_mode():
+                if request_context.is_personal_mode():
+                    try:
                        file_data = await self.application.telegram_client.download_document(
                            document=event.document,
                            file=bytes,
@ -105,10 +105,8 @@ class SubmitHandler(BaseHandler):
                                    session_id=session_id,
                                    uploader_id=user_id,
                                )
-                    else:
-                        await event.reply(t('ZIP_FILES_ARE_NOT_SUPPORTED_IN_GROUP_MODE', request_context.chat.language))
-                finally:
-                    return await event.delete()
+                    finally:
+                        return await event.delete()
            case _:
                request_context.statbox(action='unknown_file_format')
                request_context.error_log(UnknownFileFormatError(format=event.document.mime_type))
--- a/nexus/bot/handlers/view.py
+++ b/nexus/bot/handlers/view.py
@ -85,10 +85,7 @@ class ViewHandler(BaseHandler):
            holder = BaseHolder.create(typed_document_pb=typed_document_pb)
            back_command = await self.compose_back_command(session_id=session_id, message_id=message_id, page=page)

-            promo = self.application.promotioner.choose_promotion(language).format(
-                related_channel=self.application.config['telegram']['related_channel'],
-                twitter_contact_url=self.application.config['twitter']['contact_url'],
-            )
+            promo = self.application.promotioner.choose_promotion(language)
            view_builder = holder.view_builder(language).add_view(
                bot_name=self.application.config['telegram']['bot_name']
            ).add_new_line(2).add(promo, escaped=True)
--- a/nexus/bot/widgets/search_widget.py
+++ b/nexus/bot/widgets/search_widget.py
@ -154,10 +154,7 @@ class SearchWidget(BaseSearchWidget):
                )

        promotion_language = self.query_language or self.chat.language
-        promo = self.application.promotioner.choose_promotion(promotion_language).format(
-            related_channel=self.application.config['telegram']['related_channel'],
-            twitter_contact_url=self.application.config['twitter']['contact_url'],
-        )
+        promo = self.application.promotioner.choose_promotion(promotion_language)
        serp = f'{serp}\n\n{promo}\n'

        buttons = None
--- a/nexus/hub/aioclient/aioclient.py
+++ b/nexus/hub/aioclient/aioclient.py
@ -58,7 +58,7 @@ class HubGrpcClient(BaseGrpcClient):
        bot_name: str,
        reply_to: Optional[int] = None,
        doi_hint: Optional[str] = None,
-        doi_hint_priority: bool = False,
+        skip_analysis: bool = False,
        request_id: Optional[str] = None,
        session_id: Optional[str] = None,
        uploader_id: Optional[int] = None
@ -68,7 +68,7 @@ class HubGrpcClient(BaseGrpcClient):
            bot_name=bot_name,
            reply_to=reply_to,
            doi_hint=doi_hint,
-            doi_hint_priority=doi_hint_priority,
+            skip_analysis=skip_analysis,
            uploader_id=uploader_id,
        )
        if isinstance(file, submitter_service_pb2.PlainFile):
--- a/nexus/hub/configs/pylon.yaml
+++ b/nexus/hub/configs/pylon.yaml
@ -4,6 +4,7 @@ pylon:
    - [cambridge]
    - [edinburg]
    - [southampton]
+  default_resolver_proxy_list: ~
  downloads_directory: /downloads
  proxies:
    - address: clash.default.svc.cluster.example.com:7890
@ -15,6 +16,9 @@ pylon:
    - address: clash.default.svc.cluster.example.com:8090
      name: southampton
      tags: ['southampton']
+    - address: socks5://clash.default.svc.cluster.example.com:7991
+      name: socks5
+      tags: ['socks5']
  sources:
    # LibGen.rocks
    - driver:
@ -331,6 +335,13 @@ pylon:
        args:
          format_string: 'https://journals.physiology.org/doi/pdf/{doi}?download=true'
        class: nexus.pylon.resolvers.TemplateResolver
+    # www.ahajournals.org
+    - matcher:
+        doi: ^10.1161/.*$
+      resolver:
+        args:
+          format_string: 'https://www.ahajournals.org/doi/pdf/{doi}?download=true'
+        class: nexus.pylon.resolvers.TemplateResolver
    # ajp.psychiatryonline.org
    - matcher:
        doi: ^10.1176/.*$
--- a/nexus/hub/proto/submitter_service.proto
+++ b/nexus/hub/proto/submitter_service.proto
@ -23,7 +23,7 @@ message SubmitRequest {
    string bot_name = 4;
    optional int64 reply_to = 5;
    optional string doi_hint = 6;
-    bool doi_hint_priority = 7;
+    bool skip_analysis = 7;
    int64 uploader_id = 8;
 }
 message SubmitResponse { }
--- a/nexus/hub/services/base.py
+++ b/nexus/hub/services/base.py
@ -1,5 +1,6 @@
 import asyncio

+from aiobaseclient.exceptions import BadRequestError
 from library.aiogrpctools.base import BaseService
 from library.telegram.common import close_button
 from nexus.views.telegram.common import vote_button
@ -16,8 +17,39 @@ def is_group_or_channel(chat_id: int):
    return chat_id < 0


+class ProcessedDocument:
+    def __init__(self, processed_document):
+        self.processed_document = processed_document
+
+    @staticmethod
+    async def setup(file_data, grobid_client, request_context):
+        try:
+            processed_document = await grobid_client.process_fulltext_document(pdf_file=file_data)
+        except BadRequestError as e:
+            request_context.statbox(action='unparsable_document')
+            request_context.error_log(e)
+            processed_document = {}
+        return ProcessedDocument(processed_document)
+
+    @property
+    def doi(self):
+        return self.processed_document.get('doi')
+
+    @property
+    def title(self):
+        return self.processed_document.get('title')
+
+    @property
+    def abstract(self):
+        return self.processed_document.get('abstract')
+
+    @property
+    def body(self):
+        return self.processed_document.get('body')
+
+
 class BaseHubService(BaseService):
-    async def item_found(self, bot_name, doi):
+    async def found_item(self, bot_name, doi):
        if mutual_aid_service := self.application.mutual_aid_services.get(bot_name):
            await mutual_aid_service.delete_request(doi)
        await self.application.idm_client.reschedule_subscriptions(
@ -36,13 +68,13 @@ class BaseHubService(BaseService):
            )
        ))

-    def set_fields_from_processed(self, document_pb, processed_document):
+    def set_fields_from_processed(self, document_pb, processed_document: ProcessedDocument):
        new_fields = []
-        if processed_document.get('abstract') and not document_pb.abstract:
-            document_pb.abstract = processed_document['abstract']
+        if processed_document.abstract and not document_pb.abstract:
+            document_pb.abstract = processed_document.abstract
            new_fields.append('abstract')
-        if processed_document.get('body') and not document_pb.content:
-            document_pb.content = processed_document['body']
+        if processed_document.body and not document_pb.content:
+            document_pb.content = processed_document.body
            new_fields.append('content')
        return new_fields

--- a/nexus/hub/services/delivery.py
+++ b/nexus/hub/services/delivery.py
@ -46,6 +46,7 @@ from pypika import (

 from .base import (
    BaseHubService,
+    ProcessedDocument,
    is_group_or_channel,
 )

@ -76,6 +77,7 @@ class DeliveryService(delivery_service_pb2_grpc.DeliveryServicer, BaseHubService
            proxies=pylon_config['proxies'],
            source_configs=pylon_config['sources'],
            default_driver_proxy_list=pylon_config['default_driver_proxy_list'],
+            default_resolver_proxy_list=pylon_config['default_resolver_proxy_list'],
            downloads_directory=pylon_config['downloads_directory'],
        )
        self.should_parse_with_grobid = should_parse_with_grobid
@ -216,11 +218,12 @@ class DownloadTask:
            error_log=request_context.error_log,
            on_fail=_on_fail,
        ):
+            filename = document_holder.get_filename()
            progress_bar_download = ProgressBar(
                telegram_client=self.application.telegram_clients[request_context.bot_name],
                request_context=request_context,
                banner=t("LOOKING_AT", request_context.chat.language),
-                header=f'⬇️ {document_holder.get_filename()}',
+                header=f'⬇️ {filename}',
                tail_text=t('TRANSMITTED_FROM', request_context.chat.language),
                throttle_secs=throttle_secs,
            )
@ -239,6 +242,10 @@ class DownloadTask:
                    )
                    if not document_holder.md5 and document_holder.get_extension() == 'pdf':
                        try:
+                            await progress_bar_download.send_message(
+                                t("PROCESSING_PAPER", request_context.chat.language).format(filename=filename),
+                                ignore_last_call=True
+                            )
                            file = clean_metadata(file, doi=document_holder.doi)
                            request_context.statbox(
                                action='cleaned',
@ -251,7 +258,7 @@ class DownloadTask:
                        request_context=request_context,
                        message=progress_bar_download.message,
                        banner=t("LOOKING_AT", request_context.chat.language),
-                        header=f'⬇️ {document_holder.get_filename()}',
+                        header=f'⬇️ {filename}',
                        tail_text=t('UPLOADED_TO_TELEGRAM', request_context.chat.language),
                        throttle_secs=throttle_secs
                    )
@ -264,7 +271,7 @@ class DownloadTask:
                        voting=not is_group_or_channel(self.request_context.chat.chat_id),
                    )
                    if self.document_holder.doi:
-                        await self.delivery_service.item_found(
+                        await self.delivery_service.found_item(
                            bot_name=request_context.bot_name,
                            doi=self.document_holder.doi,
                        )
@ -277,6 +284,7 @@ class DownloadTask:
                        document_holder=document_holder,
                        telegram_file_id=uploaded_message.file.id,
                        file=file,
+                        request_context=request_context,
                    ))
                else:
                    request_context.statbox(
@ -425,7 +433,7 @@ class DownloadTask:
        self.task.cancel()
        await self.task

-    async def store_new_data(self, bot_name, document_holder, telegram_file_id, file):
+    async def store_new_data(self, bot_name, document_holder, telegram_file_id, file, request_context):
        document_pb = document_holder.document_pb
        new_fields = []
        if self.delivery_service.should_store_hashes:
@ -448,7 +456,11 @@ class DownloadTask:
            and document_holder.index_alias == 'scimag'
        ):
            try:
-                processed_document = await self.application.grobid_client.process_fulltext_document(pdf_file=file)
+                processed_document = await ProcessedDocument.setup(
+                    file,
+                    grobid_client=self.application.grobid_client,
+                    request_context=request_context,
+                )
                new_fields += self.delivery_service.set_fields_from_processed(document_pb, processed_document)
            except BadRequestError as e:
                self.request_context.statbox(action='unparsable_document')
--- a/nexus/hub/services/submitter.py
+++ b/nexus/hub/services/submitter.py
@ -34,7 +34,10 @@ from nexus.views.telegram.base_holder import ScimagHolder
 from telethon.errors import ChatAdminRequiredError
 from telethon.extensions import BinaryReader

-from .base import BaseHubService
+from .base import (
+    BaseHubService,
+    ProcessedDocument,
+)


 async def operation_log(document_operation_pb):
@ -87,22 +90,48 @@ class PlainFile:


 def fuzzy_compare(a, b):
-    if a is None or b is None:
-        return False
    a = re.sub(r'[^a-z\d]', '', a.lower())
    b = re.sub(r'[^a-z\d]', '', b.lower())
    return SequenceMatcher(None, a, b).ratio() > 0.9


-async def delayed_task(task, seconds):
-    await asyncio.sleep(seconds)
-    await task
-
-
 class SubmitterService(submitter_service_pb2_grpc.SubmitterServicer, BaseHubService):
    async def start(self):
        submitter_service_pb2_grpc.add_SubmitterServicer_to_server(self, self.application.server)

+    def wrap_request_file(self, request, request_context):
+        match str(request.WhichOneof('file')):
+            case 'plain':
+                return PlainFile(request.plain)
+            case 'telegram':
+                return TelegramFile(self.application.telegram_clients[request_context.bot_name], request.telegram)
+            case _:
+                raise RuntimeError(f"Unknown file type {request.WhichOneof('file')}")
+
+    async def retrieve_metadata(self, doi, title, session_id, request_context):
+        if doi:
+            meta_search_response = await self.application.meta_api_client.meta_search(
+                index_aliases=['scimag', ],
+                query=doi,
+                collectors=[{'top_docs': {'limit': 1}}],
+                session_id=session_id,
+                request_id=request_context.request_id,
+                user_id=str(request_context.chat.chat_id),
+                query_tags=['submitter'],
+            )
+            scored_documents = meta_search_response.collector_outputs[0].top_docs.scored_documents
+            if len(scored_documents) == 1:
+                scimag_pb = scimag_pb2.Scimag(**json.loads(scored_documents[0].document))
+                if title is not None and not fuzzy_compare(scimag_pb.title, title):
+                    request_context.statbox(
+                        action='mismatched_title',
+                        doi=doi,
+                        processed_title=title,
+                        title=scimag_pb.title,
+                    )
+                    return None
+                return scimag_pb
+
    @aiogrpc_request_wrapper(log=False)
    async def submit(
        self,
@ -125,17 +154,10 @@ class SubmitterService(submitter_service_pb2_grpc.SubmitterServicer, BaseHubServ
        )

        buttons = None if request_context.is_group_mode() else [close_button()]
+        wrapped_file = self.wrap_request_file(request, request_context)

-        match str(request.WhichOneof('file')):
-            case 'plain':
-                file = PlainFile(request.plain)
-            case 'telegram':
-                file = TelegramFile(self.application.telegram_clients[request_context.bot_name], request.telegram)
-            case _:
-                raise RuntimeError(f"Unknown file type {request.WhichOneof('file')}")
-
-        if file.size > 30 * 1024 * 1024:
-            request_context.error_log(FileTooBigError(size=file.size))
+        if wrapped_file.size > 300 * 1024 * 1024:
+            request_context.error_log(FileTooBigError(size=wrapped_file.size))
            request_context.statbox(action='file_too_big')
            async with safe_execution(error_log=request_context.error_log):
                await self.application.telegram_clients[request_context.bot_name].send_message(
@ -149,109 +171,59 @@ class SubmitterService(submitter_service_pb2_grpc.SubmitterServicer, BaseHubServ
        try:
            processing_message = await self.application.telegram_clients[request_context.bot_name].send_message(
                request_context.chat.chat_id,
-                t("PROCESSING_PAPER", request_context.chat.language).format(filename=file.filename),
+                t("PROCESSING_PAPER", request_context.chat.language).format(filename=wrapped_file.filename),
                reply_to=request.reply_to,
            )
        except ChatAdminRequiredError:
            return submitter_service_pb2.SubmitResponse()

        try:
-            file_data = await file.read()
-            processed_document = None
-            pdf_doi = None
-            pdf_title = None
-            try:
-                processed_document = await self.application.grobid_client.process_fulltext_document(pdf_file=file_data)
-                pdf_doi = processed_document.get('doi')
-                pdf_title = processed_document.get('title')
-                request_context.add_default_fields(pdf_doi=pdf_doi)
-            except BadRequestError as e:
-                request_context.statbox(action='unparsable_document')
-                request_context.error_log(e)
-                if not request.doi_hint:
-                    await self.application.telegram_clients[request_context.bot_name].send_message(
-                        request_context.chat.chat_id,
-                        t('UNPARSABLE_DOCUMENT_ERROR', request_context.chat.language).format(
-                            filename=file.filename,
-                        ),
-                        buttons=buttons,
-                        reply_to=request.reply_to,
-                    )
-                    return submitter_service_pb2.SubmitResponse()
+            file_data = await wrapped_file.read()
+            if not request.skip_analysis:
+                processed_document = await ProcessedDocument.setup(
+                    file_data,
+                    grobid_client=self.application.grobid_client,
+                    request_context=request_context,
+                )
+            else:
+                processed_document = ProcessedDocument({})

-            if request.doi_hint and pdf_doi != request.doi_hint:
-                request_context.statbox(action='mismatched_doi', doi_hint_priority=request.doi_hint_priority)
-                if request.doi_hint_priority:
-                    pdf_doi = request.doi_hint
-
-            if not pdf_doi and not request.doi_hint:
+            if not processed_document.doi and not request.doi_hint:
                request_context.statbox(action='unparsable_doi')
                request_context.error_log(UnparsableDoiError())
                await self.application.telegram_clients[request_context.bot_name].send_message(
                    request_context.chat.chat_id,
                    t('UNPARSABLE_DOI_ERROR', request_context.chat.language).format(
-                        filename=file.filename,
+                        filename=wrapped_file.filename,
                    ),
                    buttons=buttons,
                    reply_to=request.reply_to,
                )
                return submitter_service_pb2.SubmitResponse()

-            scimag_pb = None
-
-            if pdf_doi:
-                meta_search_response = await self.application.meta_api_client.meta_search(
-                    index_aliases=['scimag',],
-                    query=pdf_doi,
-                    collectors=[{'top_docs': {'limit': 1}}],
-                    session_id=session_id,
-                    request_id=request_context.request_id,
-                    user_id=str(request_context.chat.chat_id),
-                    query_tags=['submitter'],
-                )
-                scored_documents = meta_search_response.collector_outputs[0].top_docs.scored_documents
-                if len(scored_documents) == 1:
-                    scimag_pb = scimag_pb2.Scimag(**json.loads(scored_documents[0].document))
-                    if not fuzzy_compare(scimag_pb.title, pdf_title):
-                        request_context.statbox(
-                            action='mismatched_title',
-                            doi=pdf_doi,
-                            pdf_title=pdf_title,
-                            title=scimag_pb.title,
-                        )
-                        scimag_pb = None
-
+            scimag_pb = await self.retrieve_metadata(
+                processed_document.doi,
+                processed_document.title,
+                session_id=session_id,
+                request_context=request_context,
+            )
            if not scimag_pb and request.doi_hint:
-                meta_search_response = await self.application.meta_api_client.meta_search(
-                    index_aliases=['scimag', ],
-                    query=request.doi_hint,
-                    collectors=[{'top_docs': {'limit': 1}}],
+                scimag_pb = await self.retrieve_metadata(
+                    request.doi_hint,
+                    processed_document.title,
                    session_id=session_id,
-                    request_id=request_context.request_id,
-                    user_id=str(request_context.chat.chat_id),
-                    query_tags=['submitter'],
+                    request_context=request_context,
                )
-                scored_documents = meta_search_response.collector_outputs[0].top_docs.scored_documents
-                if len(scored_documents) == 1:
-                    scimag_pb = scimag_pb2.Scimag(**json.loads(scored_documents[0].document))
-                    if not fuzzy_compare(scimag_pb.title, pdf_title):
-                        request_context.statbox(
-                            action='mismatched_title',
-                            doi=request.doi_hint,
-                            pdf_title=pdf_title,
-                            title=scimag_pb.title,
-                        )
-                        # ToDo: add trust mechanics

            if not scimag_pb:
                request_context.statbox(action='unavailable_metadata')
-                request_context.error_log(UnavailableMetadataError(doi=pdf_doi))
+                request_context.error_log(UnavailableMetadataError(doi=processed_document.doi))
                await self.application.telegram_clients[request_context.bot_name].send_message(
                    request_context.chat.chat_id,
                    t(
                        'UNAVAILABLE_METADATA_ERROR',
                        language=request_context.chat.language
-                    ).format(doi=pdf_doi or request.doi_hint),
+                    ).format(doi=processed_document.doi or request.doi_hint),
                    buttons=buttons,
                    reply_to=request.reply_to,
                )
@ -260,10 +232,7 @@ class SubmitterService(submitter_service_pb2_grpc.SubmitterServicer, BaseHubServ
            request_context.add_default_fields(doi=scimag_pb.doi, document_id=scimag_pb.id)
            try:
                file_data = clean_metadata(file_data, doi=scimag_pb.doi)
-                request_context.statbox(
-                    action='cleaned',
-                    len=len(file_data),
-                )
+                request_context.statbox(action='cleaned', len=len(file_data))
            except ValueError as e:
                request_context.error_log(e)
            uploaded_message = await self.send_file(
@ -276,13 +245,13 @@ class SubmitterService(submitter_service_pb2_grpc.SubmitterServicer, BaseHubServ

            if processed_document:
                sharience_pb = sharience_pb2.Sharience(
-                    abstract=processed_document.get('abstract', ''),
-                    content=processed_document.get('body', ''),
+                    abstract=processed_document.abstract or '',
+                    content=processed_document.body or '',
                    parent_id=scimag_pb.id,
                    uploader_id=request.uploader_id or request_context.chat.chat_id,
                    updated_at=int(time.time()),
                    md5=hashlib.md5(file_data).hexdigest(),
-                    filesize=file.size,
+                    filesize=wrapped_file.size,
                    ipfs_multihashes=await self.get_ipfs_hashes(file=file_data),
                )
                update_sharience_pb = operation_pb2.DocumentOperation(
@ -313,13 +282,13 @@ class SubmitterService(submitter_service_pb2_grpc.SubmitterServicer, BaseHubServ
            await operation_log(store_telegram_file_id_operation_pb)
            request_context.statbox(action='successfully_stored')

-            if file.message_id:
+            if wrapped_file.message_id:
                async with safe_execution(error_log=request_context.error_log, level=logging.DEBUG):
                    await self.application.telegram_clients[request_context.bot_name].delete_messages(
                        request_context.chat.chat_id,
-                        file.message_id,
+                        wrapped_file.message_id,
                    )
-            await self.item_found(bot_name=request_context.bot_name, doi=scimag_pb.doi)
+            await self.found_item(bot_name=request_context.bot_name, doi=scimag_pb.doi)
        finally:
            await processing_message.delete()
        return submitter_service_pb2.SubmitResponse()
--- a/nexus/meta_api/main.py
+++ b/nexus/meta_api/main.py
@ -45,7 +45,7 @@ from nexus.meta_api.word_transformers import (
 )


-def create_query_transformer(valid_fields, invalid_fields):
+def create_query_transformer(valid_fields, invalid_fields, order_by_valid_fields):
    return QueryProcessor(
        tree_transformers=[
            OrderByTreeTransformer(
@ -55,14 +55,7 @@ def create_query_transformer(valid_fields, invalid_fields):
                    'pr': 'page_rank',
                    'refc': 'referenced_by_count',
                },
-                valid_fields=frozenset([
-                    'id',
-                    'referenced_by_count',
-                    'issued_at',
-                    'page_rank',
-                    'pages',
-                    'updated_at'
-                ])
+                valid_fields=frozenset(order_by_valid_fields)
            ),
            FieldTreeTransformer(
                field_aliases={
@ -135,20 +128,35 @@ class GrpcServer(AioGrpcServer):
            'doi', 'ipfs_multihashes', 'issns', 'isbns', 'issued_at', 'language', 'original_id',
            'page_rank', 'referenced_by_count', 'references', 'tags', 'title', 'year',
        }
+        order_by_scimag_fields = {
+            'id',
+            'referenced_by_count',
+            'issued_at',
+            'page_rank',
+            'updated_at'
+        }
        scitech_fields = {
            'id', 'authors', 'doi', 'description', 'extension',
            'ipfs_multihashes', 'isbns', 'issued_at', 'language', 'original_id', 'pages',
            'tags', 'title', 'updated_at', 'year',
        }
+        order_by_scitech_fields = {
+            'id',
+            'issued_at',
+            'pages',
+            'updated_at'
+        }

        self.query_transformers = {
            'scimag': create_query_transformer(
                valid_fields=scimag_fields,
                invalid_fields=scitech_fields.difference(scimag_fields),
+                order_by_valid_fields=order_by_scimag_fields,
            ),
            'scitech': create_query_transformer(
                valid_fields=scitech_fields,
                invalid_fields=scimag_fields.difference(scitech_fields),
+                order_by_valid_fields=order_by_scitech_fields,
            )
        }
        self.summa_client = SummaClient(
--- a/nexus/meta_api/services/base.py
+++ b/nexus/meta_api/services/base.py
@ -31,7 +31,6 @@ class BaseService(LibraryBaseService):
        new_scored_documents = []
        for scored_document in scored_documents:
            document = json.loads(scored_document.document)
-            document = self.enrich_document_with_stat_provider(document)
            new_scored_documents.append(nexus_meta_api_search_service_pb.ScoredDocument(
                typed_document=typed_document_pb2.TypedDocument(
                    **{scored_document.index_alias: self.pb_registry[scored_document.index_alias](**document)},
@ -46,7 +45,6 @@ class BaseService(LibraryBaseService):
        new_scored_documents = []
        for position, document in enumerate(documents):
            document = json.loads(document)
-            document = self.enrich_document_with_stat_provider(document)
            new_scored_documents.append(nexus_meta_api_search_service_pb.ScoredDocument(
                typed_document=TypedDocumentPb(
                    **{index_alias: self.pb_registry[index_alias](**document)},
@ -55,14 +53,3 @@ class BaseService(LibraryBaseService):
                score=1.0,
            ))
        return new_scored_documents
-
-    def enrich_document_with_stat_provider(self, document):
-        if self.stat_provider:
-            original_id = (
-                document.get('original_id')
-                or document['id']
-            )
-            download_stats = self.stat_provider.get_download_stats(original_id)
-            if download_stats and download_stats.downloads_count:
-                document['downloads_count'] = download_stats.downloads_count
-        return document
--- a/nexus/meta_api/services/documents.py
+++ b/nexus/meta_api/services/documents.py
@ -57,19 +57,6 @@ class DocumentsService(DocumentsServicer, BaseService):

        document_pb = getattr(typed_document_pb, typed_document_pb.WhichOneof('document'))

-        if hasattr(document_pb, 'original_id') and document_pb.original_id:
-            original_document_pb = await self.get_document(
-                index_alias=request.index_alias,
-                document_id=document_pb.original_id,
-                context=context,
-                request_id=metadata['request-id'],
-                session_id=metadata['session-id'],
-            )
-            for to_remove in ('doi', 'fiction_id', 'filesize', 'libgen_id',):
-                original_document_pb.ClearField(to_remove)
-            original_document_pb.MergeFrom(document_pb)
-            document_pb = original_document_pb
-
        logging.getLogger('query').info({
            'action': 'get',
            'cache_hit': False,
--- a/nexus/meta_api/services/search.py
+++ b/nexus/meta_api/services/search.py
@ -270,6 +270,10 @@ class SearchService(SearchServicer, BaseService):
    @aiogrpc_request_wrapper(log=False)
    async def search(self, request, context, metadata):
        preprocessed_query = await self.process_query(query=request.query, languages=request.language, context=context)
+        logging.getLogger('debug').info({
+            'action': 'preprocess_query',
+            'preprocessed_query': str(preprocessed_query),
+        })
        index_aliases = self.resolve_index_aliases(
            request_index_aliases=request.index_aliases,
            processed_query=preprocessed_query,
@ -280,19 +284,27 @@ class SearchService(SearchServicer, BaseService):
        right_offset = left_offset + page_size

        search_requests = []
+        processed_queries = {}
        for index_alias in index_aliases:
-            processed_query = self.query_transformers[index_alias].apply_tree_transformers(preprocessed_query)
+            processed_queries[index_alias] = self.query_transformers[index_alias].apply_tree_transformers(preprocessed_query)
+            logging.getLogger('debug').info({
+                'action': 'process_query',
+                'index_alias': index_alias,
+                'processed_query': str(processed_queries[index_alias]),
+                'order_by': processed_queries[index_alias].context.order_by,
+                'has_invalid_fields': processed_queries[index_alias].context.has_invalid_fields,
+            })
            search_requests.append(
                SearchRequest(
                    index_alias=index_alias,
-                    query=processed_query.to_summa_query(),
+                    query=processed_queries[index_alias].to_summa_query(),
                    collectors=[
                        search_service_pb2.Collector(
                            top_docs=search_service_pb2.TopDocsCollector(
                                limit=50,
-                                scorer=self.scorer(processed_query, index_alias),
+                                scorer=self.scorer(processed_queries[index_alias], index_alias),
                                snippets=self.snippets[index_alias],
-                                explain=processed_query.context.explain,
+                                explain=processed_queries[index_alias].context.explain,
                            )
                        ),
                        search_service_pb2.Collector(count=search_service_pb2.CountCollector())
@ -322,9 +334,9 @@ class SearchService(SearchServicer, BaseService):
                        meta_search_response.collector_outputs[0].top_docs.scored_documents,
                    )
                    has_next = len(new_scored_documents) > right_offset
-                    if 'scimag' in index_aliases:
+                    if 'scimag' in processed_queries:
                        await self.check_if_need_new_documents_by_dois(
-                            requested_dois=processed_query.context.dois,
+                            requested_dois=processed_queries['scimag'].context.dois,
                            scored_documents=new_scored_documents,
                            should_request=attempt.retry_state.attempt_number == 1
                        )
@ -333,7 +345,7 @@ class SearchService(SearchServicer, BaseService):
            scored_documents=new_scored_documents[left_offset:right_offset],
            has_next=has_next,
            count=meta_search_response.collector_outputs[1].count.count,
-            query_language=processed_query.context.query_language,
+            query_language=preprocessed_query.context.query_language,
        )

        return search_response_pb
--- a/nexus/pipe/configs/base.yaml
+++ b/nexus/pipe/configs/base.yaml
@ -6,7 +6,7 @@ pipe:
    kafka-0.example.net
  schema:
    - consumers:
-        - class: nexus.pipe.consumers.CrossReferencesBulkConsumer
+        - class: nexus.pipe.consumers.CrossReferencesConsumer
          topics:
            - name: cross_references
              workers: 4
--- a/nexus/pipe/consumers/init.py
+++ b/nexus/pipe/consumers/init.py
@ -1,17 +1,11 @@
-from .cross_references_consumer import (
-    CrossReferencesBulkConsumer,
-    CrossReferencesConsumer,
-)
+from .cross_references_consumer import CrossReferencesConsumer
 from .document_operations_consumer import (
-    DocumentOperationsBulkConsumer,
    DocumentOperationsConsumer,
    DocumentOperationsJsonConsumer,
 )

 __all__ = [
-    'CrossReferencesBulkConsumer',
    'CrossReferencesConsumer',
    'DocumentOperationsConsumer',
-    'DocumentOperationsBulkConsumer',
    'DocumentOperationsJsonConsumer',
 ]
--- a/nexus/pipe/consumers/base.py
+++ b/nexus/pipe/consumers/base.py
@ -66,9 +66,10 @@ class BaseConsumer(AioRootThing):
                        except (ConflictError, InterruptProcessing) as e:
                            logging.getLogger('statbox').info(e)
        except (asyncio.CancelledError, ConsumerStoppedError):
-            pass
+            return
        finally:
            await consumer.stop()
+        asyncio.create_task(self.stop())

    async def start(self):
        logging.getLogger('statbox').info({
@ -87,8 +88,10 @@ class BaseConsumer(AioRootThing):

    async def stop(self):
        if self.consumer_task:
-            self.consumer_task.cancel()
-            await self.consumer_task
+            consumer_task = self.consumer_task
+            self.consumer_task = None
+            consumer_task.cancel()
+            await consumer_task


 class BasePbConsumer(BaseConsumer):
@ -108,52 +111,3 @@ class BaseJsonConsumer(BaseConsumer):
        message = json.loads(msg.value)
        ParseDict(message, pb, ignore_unknown_fields=True)
        return pb
-
-
-class BaseBulkConsumer(BaseConsumer):
-    auto_commit = False
-    bulk_size = 20
-    timeout = 1
-
-    async def consume(self, consumer):
-        try:
-            while self.started:
-                try:
-                    result = await consumer.getmany(timeout_ms=self.timeout * 1000, max_records=self.bulk_size)
-                except (ConsumerStoppedError, asyncio.CancelledError):
-                    break
-                collector = []
-                for tp, messages in result.items():
-                    if messages:
-                        for message in messages:
-                            preprocessed_msg = self.preprocess(message)
-                            if preprocessed_msg:
-                                collector.append(preprocessed_msg)
-                for processor in self.processors:
-                    filtered = filter(processor.filter, collector)
-                    try:
-                        await processor.process_bulk(filtered)
-                    except InterruptProcessing as e:
-                        logging.getLogger('statbox').info(e)
-                try:
-                    await consumer.commit()
-                except CommitFailedError as e:
-                    error_log(e)
-                    continue
-        finally:
-            await consumer.stop()
-
-    async def start(self):
-        logging.getLogger('statbox').info({
-            'action': 'start',
-            'group_id': self.group_id,
-            'topic_names': self.topic_names,
-        })
-        consumer = self.create_consumer()
-        await consumer.start()
-        logging.getLogger('statbox').info({
-            'action': 'started',
-            'group_id': self.group_id,
-            'topic_names': self.topic_names,
-        })
-        self.consumer_task = asyncio.create_task(self.consume(consumer))
--- a/nexus/pipe/consumers/cross_references_consumer.py
+++ b/nexus/pipe/consumers/cross_references_consumer.py
@ -1,15 +1,8 @@
 from nexus.models.proto.operation_pb2 import \
    CrossReferenceOperation as CrossReferenceOperationPb

-from .base import (
-    BaseBulkConsumer,
-    BasePbConsumer,
-)
+from .base import BasePbConsumer


 class CrossReferencesConsumer(BasePbConsumer):
    pb_class = CrossReferenceOperationPb
-
-
-class CrossReferencesBulkConsumer(BaseBulkConsumer, CrossReferencesConsumer):
-    pass
--- a/nexus/pipe/consumers/document_operations_consumer.py
+++ b/nexus/pipe/consumers/document_operations_consumer.py
@ -2,7 +2,6 @@ from nexus.models.proto.operation_pb2 import \
    DocumentOperation as DocumentOperationPb

 from .base import (
-    BaseBulkConsumer,
    BaseJsonConsumer,
    BasePbConsumer,
 )
@ -14,7 +13,3 @@ class DocumentOperationsConsumer(BasePbConsumer):

 class DocumentOperationsJsonConsumer(BaseJsonConsumer):
    pb_class = DocumentOperationPb
-
-
-class DocumentOperationsBulkConsumer(BaseBulkConsumer, DocumentOperationsConsumer):
-    pass
--- a/nexus/pylon/client.py
+++ b/nexus/pylon/client.py
@ -22,12 +22,14 @@ class PylonClient(AioThing):
        source_configs: Optional[List],
        proxies: Optional[List[str]] = None,
        downloads_directory: Optional[str] = None,
-        default_driver_proxy_list: [Optional[List]] = None
+        default_driver_proxy_list: [Optional[List]] = None,
+        default_resolver_proxy_list: [Optional[List]] = None,
    ):
        super().__init__()
        self.proxy_manager = ProxyManager(proxies)
        self.downloads_directory = downloads_directory
        self.default_driver_proxy_list = default_driver_proxy_list
+        self.default_resolver_proxy_list = default_resolver_proxy_list
        self.sources = []
        for source_config in source_configs:
            source = Source.from_config(
@ -35,6 +37,7 @@ class PylonClient(AioThing):
                source_config=source_config,
                downloads_directory=downloads_directory,
                default_driver_proxy_list=default_driver_proxy_list,
+                default_resolver_proxy_list=default_resolver_proxy_list,
            )
            self.sources.append(source)
            self.starts.append(source)
--- a/nexus/pylon/configs/pylon.yaml
+++ b/nexus/pylon/configs/pylon.yaml
@ -138,6 +138,7 @@ pylon:
      resolver:
        args:
          format_string: 'https://www.sciencedirect.com/science/article/pii/{selected}/pdfft?isDTMRedir=true&download=true'
+          resolve_timeout: 25.0
          selector: '(.resource.primary.URL | split("/"))[-1]'
          timeout: 40.0
        class: nexus.pylon.resolvers.DoiOrgRequestResolver
--- a/nexus/pylon/drivers/base.py
+++ b/nexus/pylon/drivers/base.py
@ -26,7 +26,7 @@ class BaseDriver(NetworkAgent):
            validator_cls = validator['class']
        validator_cls = import_object(validator_cls)

-        self.validator = validator_cls or BaseValidator
+        self.validator = validator_cls
        self.downloads_directory = downloads_directory

    def __str__(self):
--- a/nexus/pylon/drivers/browser.py
+++ b/nexus/pylon/drivers/browser.py
@ -3,7 +3,6 @@ import json
 import logging
 import os.path
 import shutil
-import sys
 import time
 from pathlib import Path
 from typing import (
--- a/nexus/pylon/matcher.py
+++ b/nexus/pylon/matcher.py
@ -1,4 +1,5 @@
 import re
+import sys


 class Matcher:
--- a/nexus/pylon/resolvers/doi_org_request.py
+++ b/nexus/pylon/resolvers/doi_org_request.py
@ -11,6 +11,11 @@ import jq
 from nexus.pylon.prepared_request import PreparedRequest
 from nexus.pylon.proxy_manager import ProxyManager
 from nexus.pylon.resolvers.base import BaseResolver
+from tenacity import (
+    retry,
+    stop_after_attempt,
+    wait_random,
+)


 class DoiOrgRequestResolver(BaseResolver):
@ -24,6 +29,7 @@ class DoiOrgRequestResolver(BaseResolver):
        proxy_manager: Optional[ProxyManager] = None,
    ):
        super().__init__(proxy_list=proxy_list, proxy_manager=proxy_manager)
+        self.text_selector = selector
        self.selector = jq.compile(selector)
        self.format_string = format_string
        self.timeout = timeout
@ -32,6 +38,11 @@ class DoiOrgRequestResolver(BaseResolver):
    def __str__(self):
        return f'{self.__class__.__name__}(selector = {self.selector}, format_string = {self.format_string})'

+    @retry(
+        reraise=True,
+        wait=wait_random(min=1, max=3),
+        stop=stop_after_attempt(4),
+    )
    async def resolve_through_doi_org(self, params):
        async with self.get_session() as session:
            doi_url = f'https://doi.org/{params["doi"]}'
@ -51,6 +62,7 @@ class DoiOrgRequestResolver(BaseResolver):
            logging.getLogger('error').error({
                'action': 'error',
                'mode': 'pylon',
+                'params': params,
                'error': str(e)
            })
            return
@ -60,3 +72,11 @@ class DoiOrgRequestResolver(BaseResolver):
                url=self.format_string.format(selected=selected),
                timeout=self.timeout,
            )
+        else:
+            logging.getLogger('debug').error({
+                'action': 'missed_selector',
+                'mode': 'pylon',
+                'params': params,
+                'selector': self.text_selector,
+                'format_string': self.format_string,
+            })
--- a/nexus/pylon/source.py
+++ b/nexus/pylon/source.py
@ -33,13 +33,17 @@ class Source(AioThing):
        source_config,
        downloads_directory: str,
        default_driver_proxy_list: List,
+        default_resolver_proxy_list: List,
    ) -> 'Source':
        matcher = Matcher(source_config['matcher'])

        resolver_cls = import_object(
            source_config.get('resolver', {}).get('class', 'nexus.pylon.resolvers.TemplateResolver')
        )
-        resolver_args = dict(proxy_manager=proxy_manager)
+        resolver_args = dict(
+            proxy_manager=proxy_manager,
+            proxy_list=default_resolver_proxy_list,
+        )
        resolver_args.update(**source_config.get('resolver', {}).get('args', {}))
        resolver = resolver_cls(**resolver_args)

--- a/nexus/translations/translations.yaml
+++ b/nexus/translations/translations.yaml
@ -4,7 +4,7 @@
 en:
  ABOUT_US: |
    **About us**
-    Among the most impactful inventions of humanity such as the wheel, semiconductors or the wing were the invention of writing and advent of mass printing. Uwe of printing press made possible wide spread of ideas and knowledge. This burst increased the number of educated people and in its turn people started the Age of Enlightenment and shaped modern civilization.
+    Among the most impactful inventions of humanity such as the wheel, semiconductors or the wing were the invention of writing and advent of mass printing. Use of printing press made possible wide spread of ideas and knowledge. This burst increased the number of educated people and in its turn people started the Age of Enlightenment and shaped modern civilization.

    Why printing provoked such dramatic changes in society?
    Sane beings and knowledge live in a natural symbiosis: knowledge are fruiting and growing while circulated among people and people do the same. We may even note that complexity of human civilization, its achievements and technological progress during all its history have been correlated with the amount of accumulated knowledge and possibility to share and navigate through it.
@ -14,7 +14,7 @@ en:
    However, there is one remaining issue: knowledge is usurped by large publishers, corporations such as Elsevier, Springer, Wiley, T&F etc. Held by copyright, the knowledge remains out of reach for many researchers and for many enterprises that could launch the new Age of Enlightenment.

    Knowledge is not just usurped, an entire system has been built which is putting in chains researchers, academia and readers:
-    - Large publishers promote and lobby approaches that encourage evaluation of researchers using the number of published articles in journals having high-impact. Everybody knows who owns journals of high "impact"
+    - Large publishers promote and lobby approaches that encourage evaluation of researchers using the number of published articles in journals having high-impact
    - Publishers make authors to pay for publishing tax-funded researches, then readers to pay for access to published tax-funded researches, while peer-reviewing is done for free by other researchers and this activity is considered as "honorable" in academia
    - Publishers such as Elsevier repeatedly carried and carry campaigns against open access

--- a/nexus/views/telegram/document_list_widget.py
+++ b/nexus/views/telegram/document_list_widget.py
@ -17,6 +17,7 @@ class DocumentListWidget:
        document_holders: List[BaseHolder],
        bot_name,
        header: Optional[str] = None,
+        promotioner=None,
        has_next: bool = False,
        session_id: Optional[str] = None,
        message_id: Optional[int] = None,
@ -29,6 +30,7 @@ class DocumentListWidget:
        self.document_holders = document_holders
        self.bot_name = bot_name
        self.header = header
+        self.promotioner = promotioner
        self.cmd = cmd
        self.has_next = has_next
        self.session_id = session_id
@ -57,6 +59,10 @@ class DocumentListWidget:
        if self.header:
            serp = f'**{self.header}**\n\n{serp}'

+        promotion_language = self.chat.language
+        promo = self.promotioner.choose_promotion(promotion_language)
+        serp = f'{serp}\n\n{promo}\n'
+
        buttons = []
        if self.cmd and self.message_id and self.session_id and (self.has_next or self.page > 0):
            buttons = [
--- a/nexus/views/telegram/progress_bar.py
+++ b/nexus/views/telegram/progress_bar.py
@ -100,9 +100,9 @@ class ProgressBar:
    async def show_banner(self):
        return await self.send_message(await self.render_banner(), ignore_last_call=True)

-    async def callback(self, done, total):
+    async def callback(self, done, total, ignore_last_call=False):
        self._set_progress(done, total)
-        return await self.send_message(await self.render_progress())
+        return await self.send_message(await self.render_progress(), ignore_last_call=ignore_last_call)


 class ThrottlerWrapper:
--- a/rules/python/requirements-lock.txt
+++ b/rules/python/requirements-lock.txt
@ -139,7 +139,7 @@ spacy-loggers==1.0.3
 SQLAlchemy==1.4.39
 sqlparse==0.4.2
 srsly==2.4.4
-Telethon==1.24.0
+Telethon==1.25.0
 tenacity==8.0.1
 termcolor==1.1.0
 textblob==0.17.1