hyperboria/idm/api/services/profile.py

209 lines
8.1 KiB
Python

import asyncio
from collections import defaultdict
from idm.api.proto import (
profile_service_pb2,
profile_service_pb2_grpc,
subscription_manager_service_pb2,
subscription_manager_service_pb2_grpc,
)
from library.aiogrpctools.base import (
BaseService,
aiogrpc_request_wrapper,
)
from psycopg.rows import dict_row
from pypika import (
CustomFunction,
PostgreSQLQuery,
Table,
functions,
)
from pypika.pseudocolumns import PseudoColumn
class ProfileService(profile_service_pb2_grpc.ProfileServicer, BaseService):
chats_table = Table('chats')
scimag_table = Table('scimag')
scitech_table = Table('scitech')
sharience_table = Table('sharience')
subscriptions_table = Table('subscriptions')
Unnest = CustomFunction('UNNEST', ['column'])
async def start(self):
profile_service_pb2_grpc.add_ProfileServicer_to_server(self, self.application.server)
async def get_downloaded_documents(self, chat_id, starting_from=0, last_n_documents=None):
if last_n_documents is None:
last_n_documents = 2**32 - 1
query = f'''
select document_id from telegram_statbox_log
where mode = 'download' and action = 'get'
and chat_id = {chat_id} and event_datetime > FROM_UNIXTIME({starting_from})
order by event_datetime desc limit {last_n_documents}
'''
document_ids = []
async for row in self.application.clickhouse_client.iterate(query):
document_ids.append(row['document_id'])
if not document_ids:
return []
document_query = (
PostgreSQLQuery
.from_(self.scimag_table)
.select(
self.scimag_table.id,
self.scimag_table.title,
self.scimag_table.issns,
self.scimag_table.tags,
)
.where(self.scimag_table.id.isin(document_ids))
* PostgreSQLQuery
.from_(self.scitech_table)
.select(
self.scitech_table.id,
self.scitech_table.title,
PseudoColumn('array[]::text[]').as_('issns'),
self.scitech_table.tags,
)
.where(self.scitech_table.id.isin(document_ids))
).get_sql()
documents_dict = {}
async for document_row in self.application.pool_holder['nexus'].iterate(document_query, row_factory=dict_row):
documents_dict[document_row['id']] = profile_service_pb2.ShortDocumentDescription(
id=document_row['id'],
title=document_row['title'],
issns=document_row['issns'],
tags=document_row['tags'],
)
documents = []
for document_id in document_ids:
document = documents_dict.get(document_id)
if document:
documents.append(document)
return documents
async def get_chat_config(self, chat_id):
async for row in self.application.pool_holder['idm'].iterate(
PostgreSQLQuery
.from_(self.chats_table)
.select(self.chats_table.is_connectome_enabled)
.where(self.chats_table.chat_id == chat_id)
.get_sql()
):
return row[0]
async def get_stats(self, downloaded_documents):
issns_counter = defaultdict(int)
tags_counter = defaultdict(int)
for download_document in downloaded_documents:
for issn in download_document.issns:
issns_counter[issn] += 1
for tag in download_document.tags:
tags_counter[tag] += 1
most_popular_issns = sorted(issns_counter, key=issns_counter.get, reverse=True)[:14]
most_popular_tags = sorted(tags_counter, key=tags_counter.get, reverse=True)[:7]
most_popular_series = []
if most_popular_issns:
async for row in self.application.pool_holder['nexus'].iterate(
"select name, array_agg(issn) as issns from series "
"where issn in ({most_popular_issns}) "
"group by name order by name "
"limit 7".format(
most_popular_issns=','.join(map(lambda x: "'" + x + "'", most_popular_issns)),
),
row_factory=dict_row,
):
most_popular_series.append(profile_service_pb2.Series(
name=row['name'],
issns=row['issns'],
))
return most_popular_series, most_popular_tags
async def get_uploads_count(self, chat_id):
sql = (
PostgreSQLQuery.from_(self.sharience_table)
.select(functions.Count(self.sharience_table.parent_id).distinct())
.groupby(self.sharience_table.uploader_id)
.where(self.sharience_table.uploader_id == chat_id)
).get_sql()
async for row in self.application.pool_holder['nexus'].iterate(sql):
return row[0]
async def get_subscriptions(self, chat_id):
subscriptions_sql = (
PostgreSQLQuery.select(
self.subscriptions_table.id,
self.subscriptions_table.chat_id,
self.subscriptions_table.subscription_query,
self.subscriptions_table.schedule,
self.subscriptions_table.is_oneshot,
self.subscriptions_table.is_downloadable,
self.subscriptions_table.valid_until,
self.subscriptions_table.next_check_at,
self.subscriptions_table.subscription_type,
)
.from_(self.subscriptions_table)
.where(self.subscriptions_table.chat_id == chat_id)
.orderby(self.subscriptions_table.id)
).get_sql()
subscriptions = []
async for row in self.application.pool_holder['idm'].iterate(subscriptions_sql, row_factory=dict_row):
subscriptions.append(subscription_manager_service_pb2.Subscription(
id=row['id'],
chat_id=row['chat_id'],
subscription_query=row['subscription_query'],
schedule=row['schedule'],
is_oneshot=row['is_oneshot'],
is_downloadable=row['is_downloadable'],
valid_until=row['valid_until'],
next_check_at=row['next_check_at'],
subscription_type=row['subscription_type'],
))
return subscriptions
@aiogrpc_request_wrapper()
async def get_profile(
self,
request: profile_service_pb2.GetProfileRequest,
context,
metadata,
) -> profile_service_pb2.GetProfileResponse:
downloaded_documents = await self.get_downloaded_documents(
chat_id=request.chat_id,
starting_from=request.starting_from,
last_n_documents=request.last_n_documents if request.HasField('last_n_documents') else None,
)
uploads_count, stats, subscriptions, is_connectome_enabled = await asyncio.gather(
self.get_uploads_count(chat_id=request.chat_id),
self.get_stats(downloaded_documents=downloaded_documents),
self.get_subscriptions(chat_id=request.chat_id),
self.get_chat_config(chat_id=request.chat_id),
)
most_popular_series, most_popular_tags = stats
self.statbox(
mode='profile',
action='show',
chat_id=request.chat_id,
uploads_count=uploads_count,
downloads_count=len(downloaded_documents),
most_popular_tags=most_popular_tags,
most_popular_series=[series.name for series in most_popular_series],
is_connectome_enabled=is_connectome_enabled,
)
return profile_service_pb2.GetProfileResponse(
most_popular_tags=most_popular_tags,
most_popular_series=most_popular_series,
subscriptions=subscriptions,
uploads_count=uploads_count,
downloads_count=len(downloaded_documents),
downloaded_documents=downloaded_documents if is_connectome_enabled else [],
is_connectome_enabled=is_connectome_enabled,
)