2021-04-09 15:27:58 +03:00
|
|
|
import asyncio
|
|
|
|
|
2022-09-06 19:33:57 +03:00
|
|
|
from aiobaseclient.exceptions import BadRequestError
|
2021-04-09 15:27:58 +03:00
|
|
|
from library.aiogrpctools.base import BaseService
|
2022-09-02 18:44:56 +03:00
|
|
|
from library.telegram.common import close_button
|
2021-04-09 15:27:58 +03:00
|
|
|
from nexus.views.telegram.common import vote_button
|
|
|
|
from telethon.errors import rpcerrorlist
|
|
|
|
from telethon.tl.types import DocumentAttributeFilename
|
|
|
|
from tenacity import (
|
|
|
|
retry,
|
|
|
|
retry_if_exception_type,
|
|
|
|
stop_after_attempt,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def is_group_or_channel(chat_id: int):
|
|
|
|
return chat_id < 0
|
|
|
|
|
|
|
|
|
2022-09-06 19:33:57 +03:00
|
|
|
class ProcessedDocument:
|
|
|
|
def __init__(self, processed_document):
|
|
|
|
self.processed_document = processed_document
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
async def setup(file_data, grobid_client, request_context):
|
|
|
|
try:
|
|
|
|
processed_document = await grobid_client.process_fulltext_document(pdf_file=file_data)
|
|
|
|
except BadRequestError as e:
|
|
|
|
request_context.statbox(action='unparsable_document')
|
|
|
|
request_context.error_log(e)
|
|
|
|
processed_document = {}
|
|
|
|
return ProcessedDocument(processed_document)
|
|
|
|
|
|
|
|
@property
|
|
|
|
def doi(self):
|
|
|
|
return self.processed_document.get('doi')
|
|
|
|
|
|
|
|
@property
|
|
|
|
def title(self):
|
|
|
|
return self.processed_document.get('title')
|
|
|
|
|
|
|
|
@property
|
|
|
|
def abstract(self):
|
|
|
|
return self.processed_document.get('abstract')
|
|
|
|
|
|
|
|
@property
|
|
|
|
def body(self):
|
|
|
|
return self.processed_document.get('body')
|
|
|
|
|
|
|
|
|
2021-04-09 15:27:58 +03:00
|
|
|
class BaseHubService(BaseService):
|
2022-09-06 19:33:57 +03:00
|
|
|
async def found_item(self, bot_name, doi):
|
2022-09-02 18:44:56 +03:00
|
|
|
if mutual_aid_service := self.application.mutual_aid_services.get(bot_name):
|
|
|
|
await mutual_aid_service.delete_request(doi)
|
|
|
|
await self.application.idm_client.reschedule_subscriptions(
|
|
|
|
subscriptions_ids=dict(
|
|
|
|
subscription_query=f'doi:{doi}',
|
|
|
|
),
|
|
|
|
new_schedule={'schedule': '*/1 * * * *'},
|
|
|
|
)
|
2021-04-09 15:27:58 +03:00
|
|
|
|
|
|
|
async def get_ipfs_hashes(self, file):
|
|
|
|
return list(map(
|
|
|
|
lambda x: x['Hash'],
|
|
|
|
await asyncio.gather(
|
2022-09-02 18:44:56 +03:00
|
|
|
self.application.ipfs_client.add_bytes(file, cid_version=1, hash='blake2b-256', only_hash=True),
|
|
|
|
self.application.ipfs_client.add_bytes(file, cid_version=0, hash='sha2-256', only_hash=True),
|
2022-09-13 17:15:16 +03:00
|
|
|
self.application.ipfs_client.add_bytes(file, cid_version=1, hash='blake3', only_hash=True),
|
2021-04-09 15:27:58 +03:00
|
|
|
)
|
|
|
|
))
|
|
|
|
|
2022-09-06 19:33:57 +03:00
|
|
|
def set_fields_from_processed(self, document_pb, processed_document: ProcessedDocument):
|
2022-09-02 18:44:56 +03:00
|
|
|
new_fields = []
|
2022-09-06 19:33:57 +03:00
|
|
|
if processed_document.abstract and not document_pb.abstract:
|
|
|
|
document_pb.abstract = processed_document.abstract
|
2022-09-02 18:44:56 +03:00
|
|
|
new_fields.append('abstract')
|
2022-09-06 19:33:57 +03:00
|
|
|
if processed_document.body and not document_pb.content:
|
|
|
|
document_pb.content = processed_document.body
|
2022-09-02 18:44:56 +03:00
|
|
|
new_fields.append('content')
|
|
|
|
return new_fields
|
|
|
|
|
2021-04-09 15:27:58 +03:00
|
|
|
@retry(
|
|
|
|
reraise=True,
|
|
|
|
stop=stop_after_attempt(3),
|
|
|
|
retry=retry_if_exception_type((rpcerrorlist.TimeoutError, ValueError)),
|
|
|
|
)
|
|
|
|
async def send_file(
|
|
|
|
self,
|
2022-09-02 18:44:56 +03:00
|
|
|
document_holder,
|
2021-04-09 15:27:58 +03:00
|
|
|
file,
|
|
|
|
request_context,
|
|
|
|
session_id,
|
|
|
|
document_id=None,
|
|
|
|
voting=True,
|
2022-09-02 18:44:56 +03:00
|
|
|
close=False,
|
2021-04-09 15:27:58 +03:00
|
|
|
progress_callback=None,
|
2022-09-02 18:44:56 +03:00
|
|
|
chat_id=None,
|
|
|
|
reply_to=None,
|
2021-04-09 15:27:58 +03:00
|
|
|
):
|
|
|
|
if document_id is None:
|
2022-09-02 18:44:56 +03:00
|
|
|
document_id = document_holder.id
|
|
|
|
buttons = []
|
2021-04-09 15:27:58 +03:00
|
|
|
if voting:
|
2022-09-02 18:44:56 +03:00
|
|
|
buttons += [
|
2021-04-09 15:27:58 +03:00
|
|
|
vote_button(
|
|
|
|
case='broken',
|
2022-09-02 18:44:56 +03:00
|
|
|
index_alias=document_holder.index_alias,
|
2021-04-09 15:27:58 +03:00
|
|
|
document_id=document_id,
|
|
|
|
language=request_context.chat.language,
|
|
|
|
session_id=session_id,
|
|
|
|
),
|
|
|
|
vote_button(
|
|
|
|
case='ok',
|
2022-09-02 18:44:56 +03:00
|
|
|
index_alias=document_holder.index_alias,
|
2021-04-09 15:27:58 +03:00
|
|
|
document_id=document_id,
|
|
|
|
language=request_context.chat.language,
|
|
|
|
session_id=session_id,
|
|
|
|
),
|
|
|
|
]
|
2022-09-02 18:44:56 +03:00
|
|
|
if close:
|
|
|
|
buttons += [
|
|
|
|
close_button(session_id=session_id)
|
|
|
|
]
|
|
|
|
if not buttons:
|
|
|
|
buttons = None
|
|
|
|
short_description = (
|
|
|
|
document_holder.view_builder(request_context.chat.language)
|
|
|
|
.add_short_description().add_doi_link(label=True, on_newline=True).build()
|
|
|
|
)
|
2022-03-28 17:39:36 +03:00
|
|
|
caption = (
|
2022-09-02 18:44:56 +03:00
|
|
|
f"{short_description}\n"
|
|
|
|
f"@{self.application.config['telegram']['related_channel']}"
|
2022-03-28 17:39:36 +03:00
|
|
|
)
|
2022-09-02 18:44:56 +03:00
|
|
|
message = await self.application.telegram_clients[request_context.bot_name].send_file(
|
|
|
|
attributes=[DocumentAttributeFilename(document_holder.get_filename())],
|
2021-04-09 15:27:58 +03:00
|
|
|
buttons=buttons,
|
2022-03-28 17:39:36 +03:00
|
|
|
caption=caption,
|
2022-09-02 18:44:56 +03:00
|
|
|
entity=chat_id or request_context.chat.chat_id,
|
2021-04-09 15:27:58 +03:00
|
|
|
file=file,
|
2022-09-02 18:44:56 +03:00
|
|
|
progress_callback=progress_callback,
|
|
|
|
reply_to=reply_to,
|
2021-04-09 15:27:58 +03:00
|
|
|
)
|
|
|
|
request_context.statbox(
|
|
|
|
action='sent',
|
|
|
|
voting=voting,
|
|
|
|
)
|
|
|
|
return message
|