mirror of
https://github.com/nexus-stc/hyperboria
synced 2025-01-09 18:25:53 +01:00
fff80cd4e7
- fix(nexus): Preparing configs to be published - feat(nexus): Various fixes for opening left sources - fix(nexus): Fine-tune versions 1 internal commit(s) GitOrigin-RevId: 6c834cd3f4f5f18109a159a73503700dac63b0bb
199 lines
7.6 KiB
Python
199 lines
7.6 KiB
Python
import hashlib
|
|
import logging
|
|
import time
|
|
|
|
from aiogrobid import GrobidClient
|
|
from aiogrobid.exceptions import BadRequestError
|
|
from grpc import (
|
|
Server,
|
|
ServicerContext,
|
|
)
|
|
from izihawa_utils.pb_to_json import MessageToDict
|
|
from library.aiogrpctools.base import aiogrpc_request_wrapper
|
|
from library.telegram.base import (
|
|
BaseTelegramClient,
|
|
RequestContext,
|
|
)
|
|
from nexus.hub.exceptions import (
|
|
FileTooBigError,
|
|
UnavailableMetadataError,
|
|
UnparsableDoiError,
|
|
)
|
|
from nexus.hub.proto.submitter_service_pb2 import \
|
|
SubmitRequest as SubmitRequestPb
|
|
from nexus.hub.proto.submitter_service_pb2 import \
|
|
SubmitResponse as SubmitResponsePb
|
|
from nexus.hub.proto.submitter_service_pb2_grpc import (
|
|
SubmitterServicer,
|
|
add_SubmitterServicer_to_server,
|
|
)
|
|
from nexus.hub.user_manager import UserManager
|
|
from nexus.meta_api.aioclient import MetaApiGrpcClient
|
|
from nexus.models.proto.operation_pb2 import \
|
|
DocumentOperation as DocumentOperationPb
|
|
from nexus.models.proto.operation_pb2 import UpdateDocument as UpdateDocumentPb
|
|
from nexus.models.proto.sharience_pb2 import Sharience as ShariencePb
|
|
from nexus.models.proto.typed_document_pb2 import \
|
|
TypedDocument as TypedDocumentPb
|
|
from nexus.translations import t
|
|
from nexus.views.telegram.common import close_button
|
|
from nexus.views.telegram.scimag import ScimagView
|
|
from telethon.extensions import BinaryReader
|
|
|
|
from .base import BaseHubService
|
|
|
|
|
|
async def operation_log(document_operation_pb):
|
|
logging.getLogger('operation').info(msg=MessageToDict(document_operation_pb))
|
|
|
|
|
|
class SubmitterService(SubmitterServicer, BaseHubService):
|
|
def __init__(
|
|
self,
|
|
server: Server,
|
|
service_name: str,
|
|
bot_external_name: str,
|
|
grobid_config: dict,
|
|
ipfs_config: dict,
|
|
meta_api_config: dict,
|
|
telegram_client: BaseTelegramClient,
|
|
):
|
|
super().__init__(
|
|
service_name=service_name,
|
|
bot_external_name=bot_external_name,
|
|
ipfs_config=ipfs_config,
|
|
telegram_client=telegram_client,
|
|
)
|
|
self.server = server
|
|
self.grobid_client = GrobidClient(base_url=grobid_config['url'])
|
|
self.meta_api_client = MetaApiGrpcClient(base_url=meta_api_config['url'])
|
|
self.telegram_client = telegram_client
|
|
self.bot_external_name = bot_external_name
|
|
self.user_manager = UserManager()
|
|
self.waits.extend([self.grobid_client, self.meta_api_client])
|
|
|
|
async def start(self):
|
|
add_SubmitterServicer_to_server(self, self.server)
|
|
|
|
async def stop(self):
|
|
await self.ipfs_client.close()
|
|
|
|
@aiogrpc_request_wrapper(log=False)
|
|
async def submit(
|
|
self,
|
|
request: SubmitRequestPb,
|
|
context: ServicerContext,
|
|
metadata: dict,
|
|
) -> SubmitResponsePb:
|
|
session_id = metadata.get('session-id')
|
|
request_context = RequestContext(
|
|
bot_name=self.service_name,
|
|
chat=request.chat,
|
|
request_id=metadata.get('request-id'),
|
|
)
|
|
request_context.add_default_fields(
|
|
mode='submit',
|
|
session_id=metadata.get('session-id'),
|
|
**self.get_default_service_fields(),
|
|
)
|
|
|
|
document = BinaryReader(request.telegram_document).tgread_object()
|
|
if document.size > 20 * 1024 * 1024:
|
|
request_context.error_log(FileTooBigError(size=document.size))
|
|
request_context.statbox(action='file_too_big')
|
|
await self.telegram_client.send_message(
|
|
request_context.chat.chat_id,
|
|
t('FILE_TOO_BIG_ERROR', language=request_context.chat.language),
|
|
buttons=[close_button()],
|
|
)
|
|
return SubmitResponsePb()
|
|
processing_message = await self.telegram_client.send_message(
|
|
request_context.chat.chat_id,
|
|
t("PROCESSING_PAPER", language=request_context.chat.language).format(
|
|
filename=document.attributes[0].file_name,
|
|
),
|
|
)
|
|
try:
|
|
file = await self.telegram_client.download_document(document=document, file=bytes)
|
|
try:
|
|
processed_document = await self.grobid_client.process_fulltext_document(pdf_file=file)
|
|
except BadRequestError as e:
|
|
request_context.statbox(action='unparsable_document')
|
|
request_context.error_log(e)
|
|
await self.telegram_client.send_message(
|
|
request_context.chat.chat_id,
|
|
t('UNPARSABLE_DOCUMENT_ERROR', language=request_context.chat.language).format(
|
|
filename=document.attributes[0].file_name,
|
|
),
|
|
buttons=[close_button()],
|
|
)
|
|
return SubmitResponsePb()
|
|
|
|
if not processed_document.get('doi'):
|
|
request_context.statbox(action='unparsable_doi')
|
|
request_context.error_log(UnparsableDoiError())
|
|
await self.telegram_client.send_message(
|
|
request_context.chat.chat_id,
|
|
t('UNPARSABLE_DOI_ERROR', language=request_context.chat.language).format(
|
|
filename=document.attributes[0].file_name,
|
|
),
|
|
buttons=[close_button()],
|
|
)
|
|
return SubmitResponsePb()
|
|
|
|
search_response_pb = await self.meta_api_client.search(
|
|
schemas=('scimag',),
|
|
query=processed_document['doi'],
|
|
page=0,
|
|
page_size=1,
|
|
request_id=request_context.request_id,
|
|
session_id=session_id,
|
|
user_id=request_context.chat.chat_id,
|
|
language=request_context.chat.language,
|
|
)
|
|
|
|
if len(search_response_pb.scored_documents) == 0:
|
|
request_context.statbox(action='unavailable_metadata')
|
|
request_context.error_log(UnavailableMetadataError(doi=processed_document['doi']))
|
|
await self.telegram_client.send_message(
|
|
request_context.chat.chat_id,
|
|
t(
|
|
'UNAVAILABLE_METADATA_ERROR',
|
|
language=request_context.chat.language
|
|
).format(doi=processed_document['doi']),
|
|
buttons=[close_button()],
|
|
)
|
|
return SubmitResponsePb()
|
|
|
|
document_view = ScimagView(search_response_pb.scored_documents[0].typed_document.scimag)
|
|
uploaded_message = await self.send_file(
|
|
document_view=document_view,
|
|
file=file,
|
|
request_context=request_context,
|
|
session_id=session_id,
|
|
voting=False,
|
|
)
|
|
finally:
|
|
await processing_message.delete()
|
|
|
|
document_operation_pb = DocumentOperationPb(
|
|
update_document=UpdateDocumentPb(
|
|
typed_document=TypedDocumentPb(sharience=ShariencePb(
|
|
parent_id=document_view.id,
|
|
uploader_id=request_context.chat.chat_id,
|
|
updated_at=int(time.time()),
|
|
md5=hashlib.md5(file).hexdigest(),
|
|
filesize=document.size,
|
|
ipfs_multihashes=await self.get_ipfs_hashes(file=file),
|
|
telegram_file_id=uploaded_message.file.id,
|
|
)),
|
|
),
|
|
)
|
|
request_context.statbox(
|
|
action='success',
|
|
document_id=document_view.id,
|
|
schema='scimag',
|
|
)
|
|
await operation_log(document_operation_pb)
|
|
return SubmitResponsePb()
|