mirror of
https://github.com/nexus-stc/hyperboria
synced 2024-12-04 17:02:53 +01:00
43be16e4bc
- [nexus] Remove outdated protos - [nexus] Development - [nexus] Development - [nexus] Development - [nexus] Development - [nexus] Development - [nexus] Refactor views - [nexus] Update aiosumma - [nexus] Add tags - [nexus] Development - [nexus] Update repository - [nexus] Update repository - [nexus] Update dependencies - [nexus] Update dependencies - [nexus] Fixes for MetaAPI - [nexus] Support for new queries - [nexus] Adopt new versions of search - [nexus] Improving Nexus - [nexus] Various fixes - [nexus] Add profile - [nexus] Fixes for ingestion - [nexus] Refactorings and bugfixes - [idm] Add profile methods - [nexus] Fix stalled nexus-meta bugs - [nexus] Various bugfixes - [nexus] Restore IDM API functionality GitOrigin-RevId: a0842345a6dde5b321279ab5510a50c0def0e71a
126 lines
5.3 KiB
Python
126 lines
5.3 KiB
Python
import asyncio
|
|
import io
|
|
import re
|
|
import zipfile
|
|
|
|
from izihawa_nlptools.regex import DOI_REGEX
|
|
from library.telegram.base import RequestContext
|
|
from library.telegram.common import close_button
|
|
from nexus.bot.exceptions import UnknownFileFormatError
|
|
from nexus.hub.proto import submitter_service_pb2 as submitter_service_pb
|
|
from nexus.translations import t
|
|
from telethon import events
|
|
|
|
from .base import BaseHandler
|
|
|
|
|
|
class SubmitHandler(BaseHandler):
|
|
filter = events.NewMessage(
|
|
func=lambda e: e.document and e.document.mime_type in ('application/pdf', 'application/zip'),
|
|
incoming=True
|
|
)
|
|
is_group_handler = True
|
|
writing_handler = True
|
|
|
|
def get_doi_hint(self, message, reply_message):
|
|
doi_hint = None
|
|
if message.raw_text:
|
|
doi_regex = re.search(DOI_REGEX, message.raw_text)
|
|
if doi_regex:
|
|
doi_hint = doi_regex.group(1) + '/' + doi_regex.group(2)
|
|
if not doi_hint and reply_message:
|
|
doi_regex = re.search(DOI_REGEX, reply_message.raw_text)
|
|
if doi_regex:
|
|
doi_hint = doi_regex.group(1) + '/' + doi_regex.group(2)
|
|
return doi_hint
|
|
|
|
async def handler(self, event, request_context: RequestContext):
|
|
session_id = self.generate_session_id()
|
|
|
|
request_context.add_default_fields(session_id=session_id)
|
|
request_context.statbox(action='show', mode='submit', mime_type=event.document.mime_type)
|
|
|
|
reply_to = None
|
|
message = event
|
|
reply_message = await event.get_reply_message()
|
|
if reply_message:
|
|
reply_to = reply_message.id
|
|
|
|
doi_hint = self.get_doi_hint(message=message, reply_message=reply_message)
|
|
doi_hint_priority = '⚡' in message.raw_text
|
|
user_id = message.sender_id
|
|
request_context.statbox(
|
|
action='analyzed',
|
|
mode='submit',
|
|
doi_hint=doi_hint,
|
|
doi_hint_priority=doi_hint_priority,
|
|
reply_to=reply_to,
|
|
)
|
|
|
|
match event.document.mime_type:
|
|
case 'application/pdf':
|
|
return await self.application.hub_client.submit(
|
|
file=submitter_service_pb.TelegramFile(
|
|
document=bytes(event.document),
|
|
file_id=event.file.id,
|
|
message_id=event.id,
|
|
),
|
|
chat=request_context.chat,
|
|
bot_name=request_context.bot_name,
|
|
reply_to=reply_to,
|
|
request_id=request_context.request_id,
|
|
session_id=session_id,
|
|
doi_hint=doi_hint,
|
|
doi_hint_priority=doi_hint_priority,
|
|
uploader_id=user_id,
|
|
)
|
|
case 'application/zip':
|
|
try:
|
|
if request_context.is_personal_mode():
|
|
file_data = await self.application.telegram_client.download_document(
|
|
document=event.document,
|
|
file=bytes,
|
|
)
|
|
request_context.statbox(action='unpack', mode='submit', size=len(file_data))
|
|
with zipfile.ZipFile(io.BytesIO(file_data), 'r') as zf:
|
|
for filename in zf.namelist():
|
|
if not filename.lower().endswith('.pdf'):
|
|
continue
|
|
nested_file = zf.read(filename)
|
|
request_context.statbox(
|
|
action='unpacked_file',
|
|
mode='submit',
|
|
filename=filename,
|
|
size=len(nested_file),
|
|
)
|
|
await self.application.hub_client.submit(
|
|
file=submitter_service_pb.PlainFile(
|
|
data=nested_file,
|
|
filename=filename,
|
|
),
|
|
chat=request_context.chat,
|
|
bot_name=request_context.bot_name,
|
|
reply_to=reply_to,
|
|
request_id=request_context.request_id,
|
|
session_id=session_id,
|
|
uploader_id=user_id,
|
|
)
|
|
else:
|
|
await event.reply(t('ZIP_FILES_ARE_NOT_SUPPORTED_IN_GROUP_MODE', request_context.chat.language))
|
|
finally:
|
|
return await event.delete()
|
|
case _:
|
|
request_context.statbox(action='unknown_file_format')
|
|
request_context.error_log(UnknownFileFormatError(format=event.document.mime_type))
|
|
return await asyncio.gather(
|
|
event.reply(
|
|
t('UNKNOWN_FILE_FORMAT_ERROR', request_context.chat.language),
|
|
buttons=None if request_context.is_group_mode() else [close_button()],
|
|
),
|
|
event.delete(),
|
|
)
|
|
|
|
|
|
class EditSubmitHandler(SubmitHandler):
|
|
filter = events.MessageEdited(func=lambda e: e.document, incoming=True)
|