mirror of
https://github.com/nexus-stc/hyperboria
synced 2024-12-04 17:02:53 +01:00
43be16e4bc
- [nexus] Remove outdated protos - [nexus] Development - [nexus] Development - [nexus] Development - [nexus] Development - [nexus] Development - [nexus] Refactor views - [nexus] Update aiosumma - [nexus] Add tags - [nexus] Development - [nexus] Update repository - [nexus] Update repository - [nexus] Update dependencies - [nexus] Update dependencies - [nexus] Fixes for MetaAPI - [nexus] Support for new queries - [nexus] Adopt new versions of search - [nexus] Improving Nexus - [nexus] Various fixes - [nexus] Add profile - [nexus] Fixes for ingestion - [nexus] Refactorings and bugfixes - [idm] Add profile methods - [nexus] Fix stalled nexus-meta bugs - [nexus] Various bugfixes - [nexus] Restore IDM API functionality GitOrigin-RevId: a0842345a6dde5b321279ab5510a50c0def0e71a
129 lines
4.9 KiB
Python
129 lines
4.9 KiB
Python
import io
|
|
import re
|
|
|
|
from library.telegram.base import RequestContext
|
|
from library.telegram.common import close_button
|
|
from library.telegram.utils import safe_execution
|
|
from nexus.translations import t
|
|
from nlptools.izihawa_nlptools.utils import cast_string_to_single_string
|
|
from telethon import events
|
|
from telethon.tl.types import DocumentAttributeFilename
|
|
|
|
from .base import BaseHandler
|
|
|
|
|
|
class SeedHandler(BaseHandler):
|
|
filter = events.NewMessage(
|
|
incoming=True,
|
|
pattern=re.compile(r'^/(r)?seed(?:@\w+)?'
|
|
r'(?:(?:\s+(\d+))?(?:\s+(\d+))?(\n+.*)?)?$'),
|
|
)
|
|
is_group_handler = False
|
|
|
|
async def handler(self, event: events.ChatAction, request_context: RequestContext):
|
|
session_id = self.generate_session_id()
|
|
request_context.add_default_fields(mode='seed', session_id=session_id)
|
|
|
|
random_seed = True if event.pattern_match.group(1) else False
|
|
|
|
if string_offset := event.pattern_match.group(2):
|
|
offset = int(string_offset.strip() or 0)
|
|
else:
|
|
offset = 0
|
|
|
|
if string_limit := event.pattern_match.group(3):
|
|
limit = min(int(string_limit.strip()), 10000)
|
|
else:
|
|
limit = offset
|
|
offset = 0
|
|
|
|
original_query = ''
|
|
if string_query := event.pattern_match.group(4):
|
|
original_query = string_query.strip()
|
|
query = f'+({original_query}) +ipfs_multihashes:[* TO *]'
|
|
else:
|
|
query = '+ipfs_multihashes:[* TO *]'
|
|
|
|
if not string_query and not string_limit and not string_offset:
|
|
request_context.statbox(action='help')
|
|
return await event.reply(t('SEED_HELP', language=request_context.chat.language), buttons=[close_button()])
|
|
|
|
wait_message = await event.respond(t('SEED_GENERATION', language=request_context.chat.language))
|
|
async with safe_execution(error_log=request_context.error_log):
|
|
await event.delete()
|
|
|
|
request_context.statbox(
|
|
action='request',
|
|
offset=offset,
|
|
limit=limit,
|
|
query=query,
|
|
)
|
|
|
|
if random_seed:
|
|
meta_search_response = await self.application.meta_api_client.meta_search(
|
|
index_aliases=['scitech', ],
|
|
query=query,
|
|
collectors=[{
|
|
'reservoir_sampling': {
|
|
'limit': limit,
|
|
'fields': ['ipfs_multihashes', 'doi', 'md5'],
|
|
}
|
|
}, {
|
|
'count': {}
|
|
}],
|
|
skip_cache_loading=True,
|
|
skip_cache_saving=True,
|
|
query_tags=['seed'],
|
|
)
|
|
documents = meta_search_response.collector_outputs[0].reservoir_sampling.random_documents
|
|
count = meta_search_response.collector_outputs[1].count.count
|
|
else:
|
|
meta_search_response = await self.application.meta_api_client.meta_search(
|
|
index_aliases=['scitech', ],
|
|
query=query,
|
|
collectors=[{
|
|
'top_docs': {
|
|
'limit': limit,
|
|
'offset': offset,
|
|
'scorer': {'eval_expr': '-updated_at'},
|
|
'fields': ['ipfs_multihashes', 'doi', 'md5'],
|
|
}
|
|
}, {
|
|
'count': {}
|
|
}],
|
|
query_tags=['seed'],
|
|
)
|
|
documents = meta_search_response.collector_outputs[0].top_docs.scored_documents
|
|
count = meta_search_response.collector_outputs[1].count.count
|
|
|
|
buffer = io.BytesIO()
|
|
for document in documents:
|
|
buffer.write(document.document.encode())
|
|
buffer.write(b'\n')
|
|
buffer.flush()
|
|
|
|
casted_query = cast_string_to_single_string(original_query)
|
|
if not casted_query:
|
|
casted_query = 'cids'
|
|
filename = f'{casted_query[:16]}-{offset}-{limit}-{count}.cids.txt'
|
|
oneliner = f'cat {filename} | jq -c -r ".ipfs_multihashes[0]" | xargs -I{{}} ipfs pin add {{}}'
|
|
|
|
query_head = f'`{original_query}`\n\n' if original_query else ''
|
|
offset_head = f'**Offset:** {offset}\n' if not random_seed else ''
|
|
await self.application.telegram_client.send_file(
|
|
attributes=[DocumentAttributeFilename(filename)],
|
|
buttons=[close_button()],
|
|
caption=f'{query_head}'
|
|
f'{offset_head}'
|
|
f'**Limit:** {limit}\n'
|
|
f'**Total:** {count}\n\n'
|
|
f'**One-liner:** \n'
|
|
f'`{oneliner}`',
|
|
entity=request_context.chat.chat_id,
|
|
file=buffer.getvalue(),
|
|
reply_to=event,
|
|
)
|
|
buffer.close()
|
|
async with safe_execution(error_log=request_context.error_log):
|
|
await self.application.telegram_client.delete_messages(request_context.chat.chat_id, [wait_message.id])
|