- [nexus] Update schema

- [nexus] Remove outdated protos
  - [nexus] Development
  - [nexus] Development
  - [nexus] Development
  - [nexus] Development
  - [nexus] Development
  - [nexus] Refactor views
  - [nexus] Update aiosumma
  - [nexus] Add tags
  - [nexus] Development
  - [nexus] Update repository
  - [nexus] Update repository
  - [nexus] Update dependencies
  - [nexus] Update dependencies
  - [nexus] Fixes for MetaAPI
  - [nexus] Support for new queries
  - [nexus] Adopt new versions of search
  - [nexus] Improving Nexus
  - [nexus] Various fixes
  - [nexus] Add profile
  - [nexus] Fixes for ingestion
  - [nexus] Refactorings and bugfixes
  - [idm] Add profile methods
  - [nexus] Fix stalled nexus-meta bugs
  - [nexus] Various bugfixes
  - [nexus] Restore IDM API functionality

GitOrigin-RevId: a0842345a6dde5b321279ab5510a50c0def0e71a
This commit is contained in:
the-superpirate 2022-09-02 18:44:56 +03:00
parent 71ad7176ec
commit 43be16e4bc
526 changed files with 9909 additions and 44224 deletions

View File

@ -14,7 +14,6 @@ config_setting(
platform(
name = "linux_x86",
constraint_values = [
"@rules_rust//rust/platform:linux",
"@bazel_tools//platforms:linux",
"@bazel_tools//platforms:x86_64",
],

126
WORKSPACE
View File

@ -1,7 +1,4 @@
workspace(
name = "hyperboria",
managed_directories = {"@npm": ["rules/nodejs/node_modules"]},
)
workspace(name = "hyperboria")
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
@ -9,9 +6,14 @@ http_archive(
name = "bazel_skylib",
sha256 = "ebdf850bfef28d923a2cc67ddca86355a449b5e4f38b0a70e584dc24e5984aa6",
strip_prefix = "bazel-skylib-f80bc733d4b9f83d427ce3442be2e07427b2cc8d",
urls = [
"https://github.com/bazelbuild/bazel-skylib/archive/f80bc733d4b9f83d427ce3442be2e07427b2cc8d.tar.gz",
],
urls = ["https://github.com/bazelbuild/bazel-skylib/archive/f80bc733d4b9f83d427ce3442be2e07427b2cc8d.tar.gz"],
)
http_archive(
name = "com_github_grpc_grpc",
sha256 = "291db3c4e030164421b89833ee761a2e6ca06b1d1f8e67953df762665d89439d",
strip_prefix = "grpc-1.46.1",
urls = ["https://github.com/grpc/grpc/archive/v1.46.1.tar.gz"],
)
# ToDo: wait for https://github.com/bazelbuild/rules_docker/pull/1638
@ -24,46 +26,18 @@ http_archive(
],
)
http_archive(
name = "build_bazel_rules_nodejs",
sha256 = "f7037c8e295fdc921f714962aee7c496110052511e2b14076bd8e2d46bc9819c",
urls = ["https://github.com/bazelbuild/rules_nodejs/releases/download/4.4.5/rules_nodejs-4.4.5.tar.gz"],
)
http_archive(
name = "io_bazel_rules_k8s",
sha256 = "a08850199d6900328ef899906717fb1dfcc6cde62701c63725748b2e6ca1d5d9",
strip_prefix = "rules_k8s-d05cbea5c56738ef02c667c10951294928a1d64a",
urls = [
"https://github.com/bazelbuild/rules_k8s/archive/d05cbea5c56738ef02c667c10951294928a1d64a.tar.gz",
],
)
http_archive(
name = "rules_rust",
sha256 = "30c1b40d77a262e3f7dba6e4267fe4695b5eb1e68debc6aa06c3e09d429ae19a",
strip_prefix = "rules_rust-0.1.0",
urls = [
"https://github.com/bazelbuild/rules_rust/archive/0.1.0.tar.gz",
],
)
http_archive(
name = "rules_jvm_external",
sha256 = "2a547d8d5e99703de8de54b6188ff0ed470b3bfc88e346972d1c8865e2688391",
strip_prefix = "rules_jvm_external-3.3",
urls = [
"https://github.com/bazelbuild/rules_jvm_external/archive/3.3.tar.gz",
],
urls = ["https://github.com/bazelbuild/rules_k8s/archive/d05cbea5c56738ef02c667c10951294928a1d64a.tar.gz"],
)
http_archive(
name = "rules_pkg",
sha256 = "b9a5bdfe4f8ce0dedf9387eadd9f4844c383118b3f4cc27b586626b7998141c3",
strip_prefix = "rules_pkg-4b0b9f4679484f107f750a60190ff5ec6b164a5f/pkg",
urls = [
"https://github.com/bazelbuild/rules_pkg/archive/4b0b9f4679484f107f750a60190ff5ec6b164a5f.tar.gz",
],
urls = ["https://github.com/bazelbuild/rules_pkg/archive/4b0b9f4679484f107f750a60190ff5ec6b164a5f.tar.gz"],
)
http_archive(
@ -75,9 +49,24 @@ http_archive(
http_archive(
name = "rules_python",
sha256 = "15f84594af9da06750ceb878abbf129241421e3abbd6e36893041188db67f2fb",
strip_prefix = "rules_python-0.7.0",
urls = ["https://github.com/bazelbuild/rules_python/archive/0.7.0.tar.gz"],
sha256 = "95525d542c925bc2f4a7ac9b68449fc96ca52cfba15aa883f7193cdf745c38ff",
strip_prefix = "rules_python-cccbfb920c8b100744c53c0c03900f1be4040fe8",
url = "https://github.com/ppodolsky/rules_python/archive/cccbfb920c8b100744c53c0c03900f1be4040fe8.tar.gz",
)
http_archive(
name = "org_chromium_chromium",
build_file_content = """exports_files(["chromedriver"])""",
strip_prefix = "ungoogled-chromium_103.0.5060.134_1.vaapi_linux",
urls = [
"https://github.com/macchrome/linchrome/releases/download/v103.0.5060.134-r1002911-portable-ungoogled-Lin64/ungoogled-chromium_103.0.5060.134_1.vaapi_linux.tar.xz",
],
)
http_archive(
name = "org_izihawa_summa",
strip_prefix = "summa-ab7ea3eba9846094d1792077d578ddb585d8e070",
url = "https://github.com/izihawa/summa/archive/ab7ea3eba9846094d1792077d578ddb585d8e070.tar.gz",
)
# Images Install
@ -97,12 +86,19 @@ load("//rules/go:install.bzl", "go_install")
go_install()
# Python
register_toolchains("//rules/python:py_toolchain")
load("@rules_python//python:repositories.bzl", "python_register_toolchains")
python_register_toolchains(
name = "python3_10",
python_version = "3.10",
)
load("@python3_10//:defs.bzl", "interpreter")
load("@rules_python//python:pip.bzl", "pip_parse")
pip_parse(
name = "pip_modules",
python_interpreter_target = interpreter,
requirements_lock = "//rules/python:requirements-lock.txt",
)
@ -126,43 +122,13 @@ load("@com_github_grpc_grpc//bazel:grpc_deps.bzl", "grpc_deps")
grpc_deps()
load("@com_github_grpc_grpc//bazel:grpc_extra_deps.bzl", "grpc_extra_deps")
load("@com_google_googleapis//:repository_rules.bzl", "switched_rules_by_language")
grpc_extra_deps()
# Java
load("//rules/java:artifacts.bzl", "maven_fetch_remote_artifacts")
maven_fetch_remote_artifacts()
# Rust
load("@rules_rust//rust:repositories.bzl", "rust_repositories")
rust_repositories(
edition = "2021",
version = "1.59.0",
)
load("//rules/rust:crates.bzl", "raze_fetch_remote_crates")
raze_fetch_remote_crates()
# NodeJS
load("@build_bazel_rules_nodejs//:index.bzl", "node_repositories", "yarn_install")
node_repositories(
package_json = ["//rules/nodejs:package.json"],
preserve_symlinks = True,
)
yarn_install(
name = "npm",
package_json = "//rules/nodejs:package.json",
symlink_node_modules = True,
use_global_yarn_cache = True,
yarn_lock = "//rules/nodejs:yarn.lock",
switched_rules_by_language(
name = "com_google_googleapis_imports",
cc = True,
grpc = True,
python = True,
)
# Packaging
@ -185,19 +151,13 @@ load("@io_bazel_rules_docker//repositories:py_repositories.bzl", "py_deps")
py_deps()
load("@io_bazel_rules_docker//java:image.bzl", java_image_repos = "repositories")
load("@io_bazel_rules_docker//python3:image.bzl", py3_image_repos = "repositories")
load("@io_bazel_rules_docker//nodejs:image.bzl", nodejs_image_repos = "repositories")
load("@io_bazel_rules_docker//rust:image.bzl", rust_image_repos = "repositories")
java_image_repos()
nodejs_image_repos()
py3_image_repos()
rust_image_repos()
# K8s
load("@io_bazel_rules_k8s//k8s:k8s.bzl", "k8s_defaults", "k8s_repositories")

View File

@ -1,41 +1,12 @@
---
services:
nexus-cognitron-web:
depends_on:
- nexus-meta-api-envoy
environment:
ENV_TYPE: production
NEXUS_COGNITRON_WEB_application.address: 0.0.0.0
NEXUS_COGNITRON_WEB_application.port: 3000
NEXUS_COGNITRON_WEB_ipfs.gateway.url: https://cloudflare-ipfs.com
NEXUS_COGNITRON_WEB_meta_api.url: http://localhost:8080
image: thesuperpirate/nexus-cognitron-web:latest
ports:
- '3000:3000'
nexus-meta-api:
depends_on:
- summa
environment:
ENV_TYPE: production
NEXUS_META_API_grpc.address: '0.0.0.0'
NEXUS_META_API_grpc.port: 9090
NEXUS_META_API_summa.url: 'http://summa:8082'
image: thesuperpirate/nexus-meta-api:latest
nexus-meta-api-envoy:
depends_on:
- nexus-meta-api
image: envoyproxy/envoy-dev:latest
ports:
- '8080:8080'
volumes:
- './envoy.yaml:/etc/envoy/envoy.yaml'
summa:
environment:
ENV_TYPE: production
SUMMA_debug: 'true'
SUMMA_http.address: '0.0.0.0'
SUMMA_http.port: '8082'
image: izihawa/summa:latest
volumes:
- '${DATA_PATH}:/summa/data'
version: "3"

View File

@ -1,59 +0,0 @@
---
admin:
access_log_path: /tmp/admin_access.log
address:
socket_address:
address: 0.0.0.0
port_value: 9901
static_resources:
clusters:
- connect_timeout: 5s
http2_protocol_options: {}
lb_policy: round_robin
load_assignment:
cluster_name: cluster_0
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: nexus-meta-api
port_value: 9090
name: meta_api_service
type: logical_dns
listeners:
- address:
socket_address:
address: 0.0.0.0
port_value: 8080
filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
codec_type: auto
http_filters:
- name: envoy.filters.http.grpc_web
- name: envoy.filters.http.cors
- name: envoy.filters.http.router
route_config:
name: local_route
virtual_hosts:
- cors:
allow_headers: keep-alive,user-agent,cache-control,content-type,content-transfer-encoding,request-id,x-accept-content-transfer-encoding,x-accept-response-streaming,x-user-agent,x-grpc-web,grpc-timeout
allow_methods: GET, PUT, DELETE, POST, OPTIONS
allow_origin_string_match:
- prefix: "*"
expose_headers: request-id,grpc-status,grpc-message
max_age: "1728000"
domains: ["*"]
name: local_service
routes:
- match: { prefix: "/" }
route:
cluster: meta_api_service
idle_timeout: 0s
max_stream_duration:
grpc_timeout_header_max: 0s
stat_prefix: ingress_http
name: listener_0

View File

@ -32,6 +32,9 @@ py3_image(
"configs/logging.yaml",
],
layers = [
requirement("aiochclient"),
requirement("aiohttp"),
requirement("croniter"),
requirement("grpcio"),
requirement("pypika"),
requirement("uvloop"),
@ -42,7 +45,7 @@ py3_image(
"//library/aiopostgres",
"//library/configurator",
"//library/telegram",
requirement("izihawa-utils"),
requirement("izihawa_utils"),
],
main = "main.py",
srcs_version = "PY3ONLY",

View File

@ -1,20 +1,25 @@
from typing import Optional
from typing import (
Optional,
TypedDict,
Union,
)
from aiogrpcclient import BaseGrpcClient
from idm.api.proto.chat_manager_service_pb2 import Chat as ChatPb
from idm.api.proto.chat_manager_service_pb2 import Chats as ChatsPb
from idm.api.proto.chat_manager_service_pb2 import (
CreateChatRequest,
GetChatRequest,
ListChatsRequest,
UpdateChatRequest,
from idm.api.proto import (
chat_manager_service_pb2,
chat_manager_service_pb2_grpc,
profile_service_pb2,
profile_service_pb2_grpc,
subscription_manager_service_pb2,
subscription_manager_service_pb2_grpc,
)
from idm.api.proto.chat_manager_service_pb2_grpc import ChatManagerStub
class IdmApiGrpcClient(BaseGrpcClient):
stub_clses = {
'chat_manager': ChatManagerStub,
'chat_manager': chat_manager_service_pb2_grpc.ChatManagerStub,
'profile': profile_service_pb2_grpc.ProfileStub,
'subscription_manager': subscription_manager_service_pb2_grpc.SubscriptionManagerStub,
}
async def create_chat(
@ -23,15 +28,16 @@ class IdmApiGrpcClient(BaseGrpcClient):
username: str,
language: str,
request_id: Optional[str] = None,
) -> ChatPb:
session_id: Optional[str] = None,
) -> chat_manager_service_pb2.Chat:
response = await self.stubs['chat_manager'].create_chat(
CreateChatRequest(
chat_manager_service_pb2.CreateChatRequest(
chat_id=chat_id,
username=username,
language=language,
),
metadata=(
('request-id', request_id),
('request-id', request_id), ('session-id', session_id),
),
)
return response
@ -40,11 +46,12 @@ class IdmApiGrpcClient(BaseGrpcClient):
self,
chat_id: int,
request_id: Optional[str] = None,
) -> ChatPb:
session_id: Optional[str] = None,
) -> chat_manager_service_pb2.Chat:
response = await self.stubs['chat_manager'].get_chat(
GetChatRequest(chat_id=chat_id),
chat_manager_service_pb2.GetChatRequest(chat_id=chat_id),
metadata=(
('request-id', request_id),
('request-id', request_id), ('session-id', session_id),
),
)
return response
@ -52,12 +59,13 @@ class IdmApiGrpcClient(BaseGrpcClient):
async def list_chats(
self,
request_id: Optional[str] = None,
session_id: Optional[str] = None,
banned_at_moment: Optional[str] = None,
) -> ChatsPb:
) -> chat_manager_service_pb2.Chats:
response = await self.stubs['chat_manager'].list_chats(
ListChatsRequest(banned_at_moment=banned_at_moment),
chat_manager_service_pb2.ListChatsRequest(banned_at_moment=banned_at_moment),
metadata=(
('request-id', request_id),
('request-id', request_id), ('session-id', session_id),
),
)
return response
@ -66,25 +74,113 @@ class IdmApiGrpcClient(BaseGrpcClient):
self,
chat_id: int,
request_id: Optional[str] = None,
session_id: Optional[str] = None,
language: Optional[str] = None,
is_system_messaging_enabled: Optional[bool] = None,
is_discovery_enabled: Optional[bool] = None,
is_connectome_enabled: Optional[bool] = None,
ban_until: Optional[int] = None,
ban_message: Optional[str] = None,
is_admin: Optional[bool] = None,
) -> ChatPb:
) -> chat_manager_service_pb2.Chat:
response = await self.stubs['chat_manager'].update_chat(
UpdateChatRequest(
chat_manager_service_pb2.UpdateChatRequest(
chat_id=chat_id,
language=language,
is_system_messaging_enabled=is_system_messaging_enabled,
is_discovery_enabled=is_discovery_enabled,
is_connectome_enabled=is_connectome_enabled,
ban_until=ban_until,
ban_message=ban_message,
is_admin=is_admin,
),
metadata=(
('request-id', request_id),
('request-id', request_id), ('session-id', session_id),
),
)
return response
async def get_profile(
self,
chat_id: int,
starting_from: int = 0,
last_n_documents: Optional[int] = None,
request_id: Optional[str] = None,
session_id: Optional[str] = None,
) -> profile_service_pb2.GetProfileResponse:
response = await self.stubs['profile'].get_profile(
profile_service_pb2.GetProfileRequest(
chat_id=chat_id,
starting_from=starting_from,
last_n_documents=last_n_documents,
),
metadata=(
('request-id', request_id), ('session-id', session_id),
),
)
return response
async def subscribe(
self,
chat_id: int,
subscription_query: str,
schedule: str,
is_oneshot: Optional[bool] = None,
is_downloadable: Optional[bool] = None,
valid_until: Optional[int] = None,
subscription_type: subscription_manager_service_pb2.Subscription.Type
= subscription_manager_service_pb2.Subscription.Type.CUSTOM,
request_id: Optional[str] = None,
session_id: Optional[str] = None,
) -> subscription_manager_service_pb2.SubscribeResponse:
response = await self.stubs['subscription_manager'].subscribe(
subscription_manager_service_pb2.SubscribeRequest(
chat_id=chat_id,
subscription_query=subscription_query,
schedule=schedule,
is_oneshot=is_oneshot,
is_downloadable=is_downloadable,
valid_until=valid_until,
subscription_type=subscription_type,
),
metadata=(
('request-id', request_id),
('session-id', session_id),
),
)
return response
async def get_single_chat_task(
self,
request_id: Optional[str] = None,
session_id: Optional[str] = None,
) -> subscription_manager_service_pb2.GetSingleChatTaskResponse:
response = await self.stubs['subscription_manager'].get_single_chat_task(
subscription_manager_service_pb2.GetSingleChatTaskRequest(),
metadata=(
('request-id', request_id),
('session-id', session_id),
),
)
return response
async def reschedule_subscriptions(
self,
subscriptions_ids: dict,
is_fired: bool = False,
new_schedule: Optional[subscription_manager_service_pb2.NewSchedule] = None,
request_id: Optional[str] = None,
session_id: Optional[str] = None,
) -> subscription_manager_service_pb2.RescheduleSubscriptionsResponse:
response = await self.stubs['subscription_manager'].reschedule_subscriptions(
subscription_manager_service_pb2.RescheduleSubscriptionsRequest(
is_fired=is_fired,
new_schedule=new_schedule,
**subscriptions_ids,
),
metadata=(
('request-id', request_id),
('session-id', session_id),
),
)
return response

View File

@ -3,7 +3,12 @@ application:
debug: true
service_name: idm-api
database:
port: 5432
idm:
drivername: postgresql
port: 5432
nexus:
drivername: postgresql
port: 5432
grpc:
address: 0.0.0.0
port: 82

View File

@ -12,10 +12,14 @@ logging:
traceback:
class: library.logging.formatters.TracebackFormatter
handlers:
console:
class: logging.StreamHandler
level: WARNING
stream: 'ext://sys.stderr'
debug:
class: library.logging.handlers.BaseFileHandler
formatter: default
filename: '{{ log_path }}/debug.log'
formatter: default
level: DEBUG
error:
class: library.logging.handlers.BaseFileHandler
@ -44,6 +48,7 @@ logging:
propagate: false
error:
handlers:
- console
- error
- traceback
- warning
@ -54,10 +59,12 @@ logging:
propagate: false
telethon:
handlers:
- warning
- error
propagate: false
root:
handlers:
- console
- debug
level: DEBUG
version: 1

View File

@ -1,8 +1,12 @@
import asyncio
import uvloop
from aiochclient import ChClient
from aiohttp import ClientSession
from idm.api.configs import get_config
from idm.api.services.chat_manager import ChatManagerService
from idm.api.services.profile import ProfileService
from idm.api.services.subscription_manager import SubscriptionManagerService
from library.aiogrpctools import AioGrpcServer
from library.aiopostgres.pool_holder import AioPostgresPoolHolder
from library.configurator import Configurator
@ -13,36 +17,50 @@ class GrpcServer(AioGrpcServer):
def __init__(self, config: Configurator):
super().__init__(address=config['grpc']['address'], port=config['grpc']['port'])
database = config['database']
self.pool_holder = AioPostgresPoolHolder(
conninfo=f'dbname={database["database"]} '
f'user={database["username"]} '
f'password={database["password"]} '
f'host={database["host"]}'
f'port={database["port"]}',
timeout=30,
max_size=4,
)
self.pool_holder = {
'idm': AioPostgresPoolHolder(
conninfo=f'dbname={database["idm"]["database"]} user={database["idm"]["username"]} '
f'password={database["idm"]["password"]} host={database["idm"]["host"]} port={database["idm"]["port"]}',
timeout=30,
max_size=4,
),
'nexus': AioPostgresPoolHolder(
conninfo=f'dbname={database["nexus"]["database"]} user={database["nexus"]["username"]} '
f'password={database["nexus"]["password"]} host={database["nexus"]["host"]} port={database["nexus"]["port"]}',
timeout=30,
max_size=4,
)
}
self.starts.extend([self.pool_holder['idm'], self.pool_holder['nexus']])
self.chat_manager_service = ChatManagerService(
server=self.server,
application=self,
service_name=config['application']['service_name'],
pool_holder=self.pool_holder,
)
self.waits.extend([self.chat_manager_service, self.pool_holder])
async def create_app(config: Configurator):
grpc_server = GrpcServer(config)
await grpc_server.start_and_wait()
self.subscription_manager_service = SubscriptionManagerService(
application=self,
service_name=config['application']['service_name'],
)
self.clickhouse_session = ClientSession()
self.clickhouse_client = ChClient(
self.clickhouse_session,
url=config['clickhouse']['host'],
user=config['clickhouse']['username'],
password=config['clickhouse']['password'],
)
self.profile_service = ProfileService(
application=self,
service_name=config['application']['service_name'],
)
self.starts.extend([self.chat_manager_service, self.profile_service, self.subscription_manager_service])
def main():
config = get_config()
configure_logging(config)
if config['metrics']['enabled']:
from library.metrics_server import MetricsServer
MetricsServer(config['metrics']).fork_process()
asyncio.set_event_loop(uvloop.new_event_loop())
asyncio.get_event_loop().run_until_complete(create_app(config))
loop = uvloop.new_event_loop()
asyncio.set_event_loop(loop)
grpc_server = GrpcServer(config)
loop.run_until_complete(grpc_server.start_and_wait())
if __name__ == '__main__':

View File

@ -7,6 +7,8 @@ proto_library(
name = "proto",
srcs = [
"chat_manager_service.proto",
"profile_service.proto",
"subscription_manager_service.proto",
],
deps = [
"@com_google_protobuf//:wrappers_proto",

View File

@ -12,6 +12,7 @@ message Chat {
bool is_admin = 8;
int64 created_at = 10;
int64 updated_at = 11;
bool is_connectome_enabled = 12;
}
message Chats {
@ -40,6 +41,7 @@ message UpdateChatRequest {
optional int32 ban_until = 5;
optional string ban_message = 6;
optional bool is_admin = 7;
optional bool is_connectome_enabled = 8;
}
service ChatManager {

View File

@ -1,503 +0,0 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: idm/api/proto/chat_manager_service.proto
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor.FileDescriptor(
name='idm/api/proto/chat_manager_service.proto',
package='idm.api.proto',
syntax='proto3',
serialized_options=None,
create_key=_descriptor._internal_create_key,
serialized_pb=b'\n(idm/api/proto/chat_manager_service.proto\x12\ridm.api.proto\"\xe3\x01\n\x04\x43hat\x12\x0f\n\x07\x63hat_id\x18\x01 \x01(\x03\x12\x10\n\x08username\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12#\n\x1bis_system_messaging_enabled\x18\x04 \x01(\x08\x12\x1c\n\x14is_discovery_enabled\x18\x05 \x01(\x08\x12\x11\n\tban_until\x18\x06 \x01(\x05\x12\x13\n\x0b\x62\x61n_message\x18\x07 \x01(\t\x12\x10\n\x08is_admin\x18\x08 \x01(\x08\x12\x15\n\ris_subscribed\x18\t \x01(\x08\x12\x12\n\ncreated_at\x18\n \x01(\x03\"+\n\x05\x43hats\x12\"\n\x05\x63hats\x18\x01 \x03(\x0b\x32\x13.idm.api.proto.Chat\"H\n\x11\x43reateChatRequest\x12\x0f\n\x07\x63hat_id\x18\x01 \x01(\x03\x12\x10\n\x08username\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\"!\n\x0eGetChatRequest\x12\x0f\n\x07\x63hat_id\x18\x01 \x01(\x03\"F\n\x10ListChatsRequest\x12\x1d\n\x10\x62\x61nned_at_moment\x18\x01 \x01(\x05H\x00\x88\x01\x01\x42\x13\n\x11_banned_at_moment\"\xc2\x02\n\x11UpdateChatRequest\x12\x0f\n\x07\x63hat_id\x18\x01 \x01(\x03\x12\x15\n\x08language\x18\x02 \x01(\tH\x00\x88\x01\x01\x12(\n\x1bis_system_messaging_enabled\x18\x03 \x01(\x08H\x01\x88\x01\x01\x12!\n\x14is_discovery_enabled\x18\x04 \x01(\x08H\x02\x88\x01\x01\x12\x16\n\tban_until\x18\x05 \x01(\x05H\x03\x88\x01\x01\x12\x18\n\x0b\x62\x61n_message\x18\x06 \x01(\tH\x04\x88\x01\x01\x12\x15\n\x08is_admin\x18\x07 \x01(\x08H\x05\x88\x01\x01\x42\x0b\n\t_languageB\x1e\n\x1c_is_system_messaging_enabledB\x17\n\x15_is_discovery_enabledB\x0c\n\n_ban_untilB\x0e\n\x0c_ban_messageB\x0b\n\t_is_admin2\xa6\x02\n\x0b\x43hatManager\x12\x46\n\x0b\x63reate_chat\x12 .idm.api.proto.CreateChatRequest\x1a\x13.idm.api.proto.Chat\"\x00\x12@\n\x08get_chat\x12\x1d.idm.api.proto.GetChatRequest\x1a\x13.idm.api.proto.Chat\"\x00\x12\x45\n\nlist_chats\x12\x1f.idm.api.proto.ListChatsRequest\x1a\x14.idm.api.proto.Chats\"\x00\x12\x46\n\x0bupdate_chat\x12 .idm.api.proto.UpdateChatRequest\x1a\x13.idm.api.proto.Chat\"\x00\x62\x06proto3'
)
_CHAT = _descriptor.Descriptor(
name='Chat',
full_name='idm.api.proto.Chat',
filename=None,
file=DESCRIPTOR,
containing_type=None,
create_key=_descriptor._internal_create_key,
fields=[
_descriptor.FieldDescriptor(
name='chat_id', full_name='idm.api.proto.Chat.chat_id', index=0,
number=1, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
_descriptor.FieldDescriptor(
name='username', full_name='idm.api.proto.Chat.username', index=1,
number=2, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=b"".decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
_descriptor.FieldDescriptor(
name='language', full_name='idm.api.proto.Chat.language', index=2,
number=3, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=b"".decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
_descriptor.FieldDescriptor(
name='is_system_messaging_enabled', full_name='idm.api.proto.Chat.is_system_messaging_enabled', index=3,
number=4, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
_descriptor.FieldDescriptor(
name='is_discovery_enabled', full_name='idm.api.proto.Chat.is_discovery_enabled', index=4,
number=5, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
_descriptor.FieldDescriptor(
name='ban_until', full_name='idm.api.proto.Chat.ban_until', index=5,
number=6, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
_descriptor.FieldDescriptor(
name='ban_message', full_name='idm.api.proto.Chat.ban_message', index=6,
number=7, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=b"".decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
_descriptor.FieldDescriptor(
name='is_admin', full_name='idm.api.proto.Chat.is_admin', index=7,
number=8, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
_descriptor.FieldDescriptor(
name='is_subscribed', full_name='idm.api.proto.Chat.is_subscribed', index=8,
number=9, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
_descriptor.FieldDescriptor(
name='created_at', full_name='idm.api.proto.Chat.created_at', index=9,
number=10, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=60,
serialized_end=287,
)
_CHATS = _descriptor.Descriptor(
name='Chats',
full_name='idm.api.proto.Chats',
filename=None,
file=DESCRIPTOR,
containing_type=None,
create_key=_descriptor._internal_create_key,
fields=[
_descriptor.FieldDescriptor(
name='chats', full_name='idm.api.proto.Chats.chats', index=0,
number=1, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=289,
serialized_end=332,
)
_CREATECHATREQUEST = _descriptor.Descriptor(
name='CreateChatRequest',
full_name='idm.api.proto.CreateChatRequest',
filename=None,
file=DESCRIPTOR,
containing_type=None,
create_key=_descriptor._internal_create_key,
fields=[
_descriptor.FieldDescriptor(
name='chat_id', full_name='idm.api.proto.CreateChatRequest.chat_id', index=0,
number=1, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
_descriptor.FieldDescriptor(
name='username', full_name='idm.api.proto.CreateChatRequest.username', index=1,
number=2, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=b"".decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
_descriptor.FieldDescriptor(
name='language', full_name='idm.api.proto.CreateChatRequest.language', index=2,
number=3, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=b"".decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=334,
serialized_end=406,
)
_GETCHATREQUEST = _descriptor.Descriptor(
name='GetChatRequest',
full_name='idm.api.proto.GetChatRequest',
filename=None,
file=DESCRIPTOR,
containing_type=None,
create_key=_descriptor._internal_create_key,
fields=[
_descriptor.FieldDescriptor(
name='chat_id', full_name='idm.api.proto.GetChatRequest.chat_id', index=0,
number=1, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=408,
serialized_end=441,
)
_LISTCHATSREQUEST = _descriptor.Descriptor(
name='ListChatsRequest',
full_name='idm.api.proto.ListChatsRequest',
filename=None,
file=DESCRIPTOR,
containing_type=None,
create_key=_descriptor._internal_create_key,
fields=[
_descriptor.FieldDescriptor(
name='banned_at_moment', full_name='idm.api.proto.ListChatsRequest.banned_at_moment', index=0,
number=1, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
_descriptor.OneofDescriptor(
name='_banned_at_moment', full_name='idm.api.proto.ListChatsRequest._banned_at_moment',
index=0, containing_type=None,
create_key=_descriptor._internal_create_key,
fields=[]),
],
serialized_start=443,
serialized_end=513,
)
_UPDATECHATREQUEST = _descriptor.Descriptor(
name='UpdateChatRequest',
full_name='idm.api.proto.UpdateChatRequest',
filename=None,
file=DESCRIPTOR,
containing_type=None,
create_key=_descriptor._internal_create_key,
fields=[
_descriptor.FieldDescriptor(
name='chat_id', full_name='idm.api.proto.UpdateChatRequest.chat_id', index=0,
number=1, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
_descriptor.FieldDescriptor(
name='language', full_name='idm.api.proto.UpdateChatRequest.language', index=1,
number=2, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=b"".decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
_descriptor.FieldDescriptor(
name='is_system_messaging_enabled', full_name='idm.api.proto.UpdateChatRequest.is_system_messaging_enabled', index=2,
number=3, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
_descriptor.FieldDescriptor(
name='is_discovery_enabled', full_name='idm.api.proto.UpdateChatRequest.is_discovery_enabled', index=3,
number=4, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
_descriptor.FieldDescriptor(
name='ban_until', full_name='idm.api.proto.UpdateChatRequest.ban_until', index=4,
number=5, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
_descriptor.FieldDescriptor(
name='ban_message', full_name='idm.api.proto.UpdateChatRequest.ban_message', index=5,
number=6, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=b"".decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
_descriptor.FieldDescriptor(
name='is_admin', full_name='idm.api.proto.UpdateChatRequest.is_admin', index=6,
number=7, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
_descriptor.OneofDescriptor(
name='_language', full_name='idm.api.proto.UpdateChatRequest._language',
index=0, containing_type=None,
create_key=_descriptor._internal_create_key,
fields=[]),
_descriptor.OneofDescriptor(
name='_is_system_messaging_enabled', full_name='idm.api.proto.UpdateChatRequest._is_system_messaging_enabled',
index=1, containing_type=None,
create_key=_descriptor._internal_create_key,
fields=[]),
_descriptor.OneofDescriptor(
name='_is_discovery_enabled', full_name='idm.api.proto.UpdateChatRequest._is_discovery_enabled',
index=2, containing_type=None,
create_key=_descriptor._internal_create_key,
fields=[]),
_descriptor.OneofDescriptor(
name='_ban_until', full_name='idm.api.proto.UpdateChatRequest._ban_until',
index=3, containing_type=None,
create_key=_descriptor._internal_create_key,
fields=[]),
_descriptor.OneofDescriptor(
name='_ban_message', full_name='idm.api.proto.UpdateChatRequest._ban_message',
index=4, containing_type=None,
create_key=_descriptor._internal_create_key,
fields=[]),
_descriptor.OneofDescriptor(
name='_is_admin', full_name='idm.api.proto.UpdateChatRequest._is_admin',
index=5, containing_type=None,
create_key=_descriptor._internal_create_key,
fields=[]),
],
serialized_start=516,
serialized_end=838,
)
_CHATS.fields_by_name['chats'].message_type = _CHAT
_LISTCHATSREQUEST.oneofs_by_name['_banned_at_moment'].fields.append(
_LISTCHATSREQUEST.fields_by_name['banned_at_moment'])
_LISTCHATSREQUEST.fields_by_name['banned_at_moment'].containing_oneof = _LISTCHATSREQUEST.oneofs_by_name['_banned_at_moment']
_UPDATECHATREQUEST.oneofs_by_name['_language'].fields.append(
_UPDATECHATREQUEST.fields_by_name['language'])
_UPDATECHATREQUEST.fields_by_name['language'].containing_oneof = _UPDATECHATREQUEST.oneofs_by_name['_language']
_UPDATECHATREQUEST.oneofs_by_name['_is_system_messaging_enabled'].fields.append(
_UPDATECHATREQUEST.fields_by_name['is_system_messaging_enabled'])
_UPDATECHATREQUEST.fields_by_name['is_system_messaging_enabled'].containing_oneof = _UPDATECHATREQUEST.oneofs_by_name['_is_system_messaging_enabled']
_UPDATECHATREQUEST.oneofs_by_name['_is_discovery_enabled'].fields.append(
_UPDATECHATREQUEST.fields_by_name['is_discovery_enabled'])
_UPDATECHATREQUEST.fields_by_name['is_discovery_enabled'].containing_oneof = _UPDATECHATREQUEST.oneofs_by_name['_is_discovery_enabled']
_UPDATECHATREQUEST.oneofs_by_name['_ban_until'].fields.append(
_UPDATECHATREQUEST.fields_by_name['ban_until'])
_UPDATECHATREQUEST.fields_by_name['ban_until'].containing_oneof = _UPDATECHATREQUEST.oneofs_by_name['_ban_until']
_UPDATECHATREQUEST.oneofs_by_name['_ban_message'].fields.append(
_UPDATECHATREQUEST.fields_by_name['ban_message'])
_UPDATECHATREQUEST.fields_by_name['ban_message'].containing_oneof = _UPDATECHATREQUEST.oneofs_by_name['_ban_message']
_UPDATECHATREQUEST.oneofs_by_name['_is_admin'].fields.append(
_UPDATECHATREQUEST.fields_by_name['is_admin'])
_UPDATECHATREQUEST.fields_by_name['is_admin'].containing_oneof = _UPDATECHATREQUEST.oneofs_by_name['_is_admin']
DESCRIPTOR.message_types_by_name['Chat'] = _CHAT
DESCRIPTOR.message_types_by_name['Chats'] = _CHATS
DESCRIPTOR.message_types_by_name['CreateChatRequest'] = _CREATECHATREQUEST
DESCRIPTOR.message_types_by_name['GetChatRequest'] = _GETCHATREQUEST
DESCRIPTOR.message_types_by_name['ListChatsRequest'] = _LISTCHATSREQUEST
DESCRIPTOR.message_types_by_name['UpdateChatRequest'] = _UPDATECHATREQUEST
_sym_db.RegisterFileDescriptor(DESCRIPTOR)
Chat = _reflection.GeneratedProtocolMessageType('Chat', (_message.Message,), {
'DESCRIPTOR' : _CHAT,
'__module__' : 'idm.api.proto.chat_manager_service_pb2'
# @@protoc_insertion_point(class_scope:idm.api.proto.Chat)
})
_sym_db.RegisterMessage(Chat)
Chats = _reflection.GeneratedProtocolMessageType('Chats', (_message.Message,), {
'DESCRIPTOR' : _CHATS,
'__module__' : 'idm.api.proto.chat_manager_service_pb2'
# @@protoc_insertion_point(class_scope:idm.api.proto.Chats)
})
_sym_db.RegisterMessage(Chats)
CreateChatRequest = _reflection.GeneratedProtocolMessageType('CreateChatRequest', (_message.Message,), {
'DESCRIPTOR' : _CREATECHATREQUEST,
'__module__' : 'idm.api.proto.chat_manager_service_pb2'
# @@protoc_insertion_point(class_scope:idm.api.proto.CreateChatRequest)
})
_sym_db.RegisterMessage(CreateChatRequest)
GetChatRequest = _reflection.GeneratedProtocolMessageType('GetChatRequest', (_message.Message,), {
'DESCRIPTOR' : _GETCHATREQUEST,
'__module__' : 'idm.api.proto.chat_manager_service_pb2'
# @@protoc_insertion_point(class_scope:idm.api.proto.GetChatRequest)
})
_sym_db.RegisterMessage(GetChatRequest)
ListChatsRequest = _reflection.GeneratedProtocolMessageType('ListChatsRequest', (_message.Message,), {
'DESCRIPTOR' : _LISTCHATSREQUEST,
'__module__' : 'idm.api.proto.chat_manager_service_pb2'
# @@protoc_insertion_point(class_scope:idm.api.proto.ListChatsRequest)
})
_sym_db.RegisterMessage(ListChatsRequest)
UpdateChatRequest = _reflection.GeneratedProtocolMessageType('UpdateChatRequest', (_message.Message,), {
'DESCRIPTOR' : _UPDATECHATREQUEST,
'__module__' : 'idm.api.proto.chat_manager_service_pb2'
# @@protoc_insertion_point(class_scope:idm.api.proto.UpdateChatRequest)
})
_sym_db.RegisterMessage(UpdateChatRequest)
_CHATMANAGER = _descriptor.ServiceDescriptor(
name='ChatManager',
full_name='idm.api.proto.ChatManager',
file=DESCRIPTOR,
index=0,
serialized_options=None,
create_key=_descriptor._internal_create_key,
serialized_start=841,
serialized_end=1135,
methods=[
_descriptor.MethodDescriptor(
name='create_chat',
full_name='idm.api.proto.ChatManager.create_chat',
index=0,
containing_service=None,
input_type=_CREATECHATREQUEST,
output_type=_CHAT,
serialized_options=None,
create_key=_descriptor._internal_create_key,
),
_descriptor.MethodDescriptor(
name='get_chat',
full_name='idm.api.proto.ChatManager.get_chat',
index=1,
containing_service=None,
input_type=_GETCHATREQUEST,
output_type=_CHAT,
serialized_options=None,
create_key=_descriptor._internal_create_key,
),
_descriptor.MethodDescriptor(
name='list_chats',
full_name='idm.api.proto.ChatManager.list_chats',
index=2,
containing_service=None,
input_type=_LISTCHATSREQUEST,
output_type=_CHATS,
serialized_options=None,
create_key=_descriptor._internal_create_key,
),
_descriptor.MethodDescriptor(
name='update_chat',
full_name='idm.api.proto.ChatManager.update_chat',
index=3,
containing_service=None,
input_type=_UPDATECHATREQUEST,
output_type=_CHAT,
serialized_options=None,
create_key=_descriptor._internal_create_key,
),
])
_sym_db.RegisterServiceDescriptor(_CHATMANAGER)
DESCRIPTOR.services_by_name['ChatManager'] = _CHATMANAGER
# @@protoc_insertion_point(module_scope)

View File

@ -1,166 +0,0 @@
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
"""Client and server classes corresponding to protobuf-defined services."""
import grpc
from idm.api.proto import \
chat_manager_service_pb2 as \
idm_dot_api_dot_proto_dot_chat__manager__service__pb2
class ChatManagerStub(object):
"""Missing associated documentation comment in .proto file."""
def __init__(self, channel):
"""Constructor.
Args:
channel: A grpc.Channel.
"""
self.create_chat = channel.unary_unary(
'/idm.api.proto.ChatManager/create_chat',
request_serializer=idm_dot_api_dot_proto_dot_chat__manager__service__pb2.CreateChatRequest.SerializeToString,
response_deserializer=idm_dot_api_dot_proto_dot_chat__manager__service__pb2.Chat.FromString,
)
self.get_chat = channel.unary_unary(
'/idm.api.proto.ChatManager/get_chat',
request_serializer=idm_dot_api_dot_proto_dot_chat__manager__service__pb2.GetChatRequest.SerializeToString,
response_deserializer=idm_dot_api_dot_proto_dot_chat__manager__service__pb2.Chat.FromString,
)
self.list_chats = channel.unary_unary(
'/idm.api.proto.ChatManager/list_chats',
request_serializer=idm_dot_api_dot_proto_dot_chat__manager__service__pb2.ListChatsRequest.SerializeToString,
response_deserializer=idm_dot_api_dot_proto_dot_chat__manager__service__pb2.Chats.FromString,
)
self.update_chat = channel.unary_unary(
'/idm.api.proto.ChatManager/update_chat',
request_serializer=idm_dot_api_dot_proto_dot_chat__manager__service__pb2.UpdateChatRequest.SerializeToString,
response_deserializer=idm_dot_api_dot_proto_dot_chat__manager__service__pb2.Chat.FromString,
)
class ChatManagerServicer(object):
"""Missing associated documentation comment in .proto file."""
def create_chat(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def get_chat(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def list_chats(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def update_chat(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def add_ChatManagerServicer_to_server(servicer, server):
rpc_method_handlers = {
'create_chat': grpc.unary_unary_rpc_method_handler(
servicer.create_chat,
request_deserializer=idm_dot_api_dot_proto_dot_chat__manager__service__pb2.CreateChatRequest.FromString,
response_serializer=idm_dot_api_dot_proto_dot_chat__manager__service__pb2.Chat.SerializeToString,
),
'get_chat': grpc.unary_unary_rpc_method_handler(
servicer.get_chat,
request_deserializer=idm_dot_api_dot_proto_dot_chat__manager__service__pb2.GetChatRequest.FromString,
response_serializer=idm_dot_api_dot_proto_dot_chat__manager__service__pb2.Chat.SerializeToString,
),
'list_chats': grpc.unary_unary_rpc_method_handler(
servicer.list_chats,
request_deserializer=idm_dot_api_dot_proto_dot_chat__manager__service__pb2.ListChatsRequest.FromString,
response_serializer=idm_dot_api_dot_proto_dot_chat__manager__service__pb2.Chats.SerializeToString,
),
'update_chat': grpc.unary_unary_rpc_method_handler(
servicer.update_chat,
request_deserializer=idm_dot_api_dot_proto_dot_chat__manager__service__pb2.UpdateChatRequest.FromString,
response_serializer=idm_dot_api_dot_proto_dot_chat__manager__service__pb2.Chat.SerializeToString,
),
}
generic_handler = grpc.method_handlers_generic_handler(
'idm.api.proto.ChatManager', rpc_method_handlers)
server.add_generic_rpc_handlers((generic_handler,))
# This class is part of an EXPERIMENTAL API.
class ChatManager(object):
"""Missing associated documentation comment in .proto file."""
@staticmethod
def create_chat(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/idm.api.proto.ChatManager/create_chat',
idm_dot_api_dot_proto_dot_chat__manager__service__pb2.CreateChatRequest.SerializeToString,
idm_dot_api_dot_proto_dot_chat__manager__service__pb2.Chat.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def get_chat(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/idm.api.proto.ChatManager/get_chat',
idm_dot_api_dot_proto_dot_chat__manager__service__pb2.GetChatRequest.SerializeToString,
idm_dot_api_dot_proto_dot_chat__manager__service__pb2.Chat.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def list_chats(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/idm.api.proto.ChatManager/list_chats',
idm_dot_api_dot_proto_dot_chat__manager__service__pb2.ListChatsRequest.SerializeToString,
idm_dot_api_dot_proto_dot_chat__manager__service__pb2.Chats.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def update_chat(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/idm.api.proto.ChatManager/update_chat',
idm_dot_api_dot_proto_dot_chat__manager__service__pb2.UpdateChatRequest.SerializeToString,
idm_dot_api_dot_proto_dot_chat__manager__service__pb2.Chat.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)

View File

@ -0,0 +1,37 @@
syntax = "proto3";
package idm.api.proto;
import "idm/api/proto/subscription_manager_service.proto";
service Profile {
rpc get_profile(GetProfileRequest) returns (GetProfileResponse) {};
}
message GetProfileRequest {
int64 chat_id = 1;
int32 starting_from = 2;
optional int32 last_n_documents = 3;
}
message Series {
repeated string issns = 1;
string name = 2;
}
message ShortDocumentDescription {
int64 id = 1;
string title = 2;
repeated string tags = 3;
repeated string issns = 4;
}
message GetProfileResponse {
int64 downloads_count = 1;
int64 uploads_count = 2;
repeated string similar_users_logins = 3;
repeated string most_popular_tags = 4;
repeated Subscription subscriptions = 5;
repeated Series most_popular_series = 6;
repeated ShortDocumentDescription downloaded_documents = 7;
bool is_connectome_enabled = 8;
}

View File

@ -0,0 +1,60 @@
syntax = "proto3";
package idm.api.proto;
service SubscriptionManager {
rpc get_single_chat_task(GetSingleChatTaskRequest) returns (GetSingleChatTaskResponse) {};
rpc subscribe(SubscribeRequest) returns (SubscribeResponse) {};
rpc reschedule_subscriptions(RescheduleSubscriptionsRequest) returns (RescheduleSubscriptionsResponse) {}
}
message Subscription {
enum Type {
CUSTOM = 0;
DIGEST = 1;
DOI = 2;
}
int64 id = 1;
int64 chat_id = 2;
string subscription_query = 3;
string schedule = 4;
bool is_oneshot = 5;
bool is_downloadable = 6;
optional uint32 valid_until = 7;
uint32 next_check_at = 8;
Type subscription_type = 9;
}
message NewSchedule {
bool is_persistent = 1;
string schedule = 2;
}
message RescheduleSubscriptionsRequest {
oneof subscriptions_ids {
int64 subscription_id = 1;
string subscription_query = 2;
}
bool is_fired = 3;
optional NewSchedule new_schedule = 4;
}
message RescheduleSubscriptionsResponse {}
message GetSingleChatTaskRequest {}
message GetSingleChatTaskResponse {
repeated Subscription subscriptions = 1;
int64 chat_id = 2;
}
message SubscribeRequest {
int64 chat_id = 1;
string subscription_query = 2;
string schedule = 3;
bool is_oneshot = 4;
bool is_downloadable = 5;
optional uint32 valid_until = 7;
Subscription.Type subscription_type = 9;
}
message SubscribeResponse {}

View File

@ -1,4 +1,4 @@
import logging
import sys
from grpc import StatusCode
from idm.api.proto.chat_manager_service_pb2 import Chat as ChatPb
@ -7,11 +7,11 @@ from idm.api.proto.chat_manager_service_pb2_grpc import (
ChatManagerServicer,
add_ChatManagerServicer_to_server,
)
from izihawa_utils.pb_to_json import MessageToDict
from library.aiogrpctools.base import (
BaseService,
aiogrpc_request_wrapper,
)
from psycopg.rows import dict_row
from pypika import (
PostgreSQLQuery,
Table,
@ -21,13 +21,8 @@ from pypika import (
class ChatManagerService(ChatManagerServicer, BaseService):
chats_table = Table('chats')
def __init__(self, server, service_name, pool_holder):
super().__init__(service_name=service_name)
self.server = server
self.pool_holder = pool_holder
async def start(self):
add_ChatManagerServicer_to_server(self, self.server)
add_ChatManagerServicer_to_server(self, self.application.server)
@aiogrpc_request_wrapper()
async def create_chat(self, request, context, metadata):
@ -37,6 +32,7 @@ class ChatManagerService(ChatManagerServicer, BaseService):
username=request.username,
is_system_messaging_enabled=True,
is_discovery_enabled=True,
is_connectome_enabled=False,
)
query = (
PostgreSQLQuery
@ -47,6 +43,7 @@ class ChatManagerService(ChatManagerServicer, BaseService):
self.chats_table.username,
self.chats_table.is_system_messaging_enabled,
self.chats_table.is_discovery_enabled,
self.chats_table.is_connectome_enabled,
)
.insert(
chat.chat_id,
@ -54,57 +51,89 @@ class ChatManagerService(ChatManagerServicer, BaseService):
chat.username,
chat.is_system_messaging_enabled,
chat.is_discovery_enabled,
chat.is_connectome_enabled,
)
.on_conflict('chat_id')
.do_nothing()
).get_sql()
async with self.pool_holder.pool.acquire() as session:
await session.execute(query)
return await self._get_chat(session=session, chat_id=request.chat_id, context=context)
await self.application.pool_holder['idm'].execute(query)
return await self._get_chat(chat_id=request.chat_id, context=context)
async def _get_chat(self, session, chat_id, context):
query = (
async def _get_chat(self, chat_id, context):
sql = (
PostgreSQLQuery
.from_(self.chats_table)
.select('*')
.select(
self.chats_table.chat_id,
self.chats_table.username,
self.chats_table.language,
self.chats_table.is_system_messaging_enabled,
self.chats_table.is_discovery_enabled,
self.chats_table.is_connectome_enabled,
self.chats_table.ban_until,
self.chats_table.ban_message,
self.chats_table.is_admin,
self.chats_table.created_at,
self.chats_table.updated_at,
)
.where(self.chats_table.chat_id == chat_id)
).get_sql()
result = await session.execute(query)
chat = await result.fetchone()
if chat is None:
chats = [ChatPb(**row) async for row in self.application.pool_holder['idm'].iterate(sql, row_factory=dict_row)]
if not chats:
await context.abort(StatusCode.NOT_FOUND, 'not_found')
return ChatPb(**chat)
return chats[0]
@aiogrpc_request_wrapper()
@aiogrpc_request_wrapper(log=False)
async def get_chat(self, request, context, metadata):
async with self.pool_holder.pool.acquire() as session:
return await self._get_chat(session=session, chat_id=request.chat_id, context=context)
return await self._get_chat(chat_id=request.chat_id, context=context)
@aiogrpc_request_wrapper()
@aiogrpc_request_wrapper(log=False)
async def list_chats(self, request, context, metadata):
query = (
sql = (
PostgreSQLQuery
.from_(self.chats_table)
.select('*')
.select(
self.chats_table.chat_id,
self.chats_table.username,
self.chats_table.language,
self.chats_table.is_system_messaging_enabled,
self.chats_table.is_discovery_enabled,
self.chats_table.is_connectome_enabled,
self.chats_table.ban_until,
self.chats_table.ban_message,
self.chats_table.is_admin,
self.chats_table.created_at,
self.chats_table.updated_at,
)
.where(self.chats_table.ban_until > request.banned_at_moment)
.limit(10)
).get_sql()
async with self.pool_holder.pool.acquire() as session:
results = await session.execute(query)
chats = await results.fetchall()
return ChatsPb(
chats=list(map(lambda x: ChatPb(**x), chats))
)
return ChatsPb(chats=[ChatPb(**row) async for row in self.application.pool_holder['idm'].iterate(sql, row_factory=dict_row)])
@aiogrpc_request_wrapper()
async def update_chat(self, request, context, metadata):
query = PostgreSQLQuery.update(self.chats_table)
sql = PostgreSQLQuery.update(self.chats_table)
for field in request.DESCRIPTOR.fields:
if field.containing_oneof and request.HasField(field.name):
field_value = getattr(request, field.name)
query = query.set(field.name, field_value)
query = query.where(self.chats_table.chat_id == request.chat_id).returning('*').get_sql()
async with self.pool_holder.pool.acquire() as session:
result = await session.execute(query)
chat = await result.fetchone()
return ChatPb(**chat)
sql = sql.set(field.name, field_value)
sql = sql.where(self.chats_table.chat_id == request.chat_id).returning(
self.chats_table.chat_id,
self.chats_table.username,
self.chats_table.language,
self.chats_table.is_system_messaging_enabled,
self.chats_table.is_discovery_enabled,
self.chats_table.is_connectome_enabled,
self.chats_table.ban_until,
self.chats_table.ban_message,
self.chats_table.is_admin,
self.chats_table.created_at,
self.chats_table.updated_at,
).get_sql()
rows = []
async for row in self.application.pool_holder['idm'].iterate(sql, row_factory=dict_row):
rows.append(row)
if not rows:
return await context.abort(StatusCode.NOT_FOUND, 'not_found')
return ChatPb(**rows[0])

204
idm/api/services/profile.py Normal file
View File

@ -0,0 +1,204 @@
import asyncio
from collections import defaultdict
from idm.api.proto import (
profile_service_pb2,
profile_service_pb2_grpc,
subscription_manager_service_pb2,
subscription_manager_service_pb2_grpc,
)
from library.aiogrpctools.base import (
BaseService,
aiogrpc_request_wrapper,
)
from psycopg.rows import dict_row
from pypika import (
CustomFunction,
PostgreSQLQuery,
Table,
functions,
)
from pypika.pseudocolumns import PseudoColumn
class ProfileService(profile_service_pb2_grpc.ProfileServicer, BaseService):
chats_table = Table('chats')
scimag_table = Table('scimag')
scitech_table = Table('scitech')
sharience_table = Table('sharience')
subscriptions_table = Table('subscriptions')
Unnest = CustomFunction('UNNEST', ['column'])
async def start(self):
profile_service_pb2_grpc.add_ProfileServicer_to_server(self, self.application.server)
async def get_downloaded_documents(self, chat_id, starting_from=0, last_n_documents=None):
if last_n_documents is None:
last_n_documents = 2**32 - 1
query = f'''
select document_id from telegram_statbox_log
where mode = 'download' and action = 'get'
and chat_id = {chat_id} and event_datetime > FROM_UNIXTIME({starting_from})
order by event_datetime desc limit {last_n_documents}
'''
document_ids = []
async for row in self.application.clickhouse_client.iterate(query):
document_ids.append(row['document_id'])
if not document_ids:
return []
document_query = (
PostgreSQLQuery
.from_(self.scimag_table)
.select(
self.scimag_table.id,
self.scimag_table.title,
self.scimag_table.issns,
self.scimag_table.tags,
)
.where(self.scimag_table.id.isin(document_ids))
* PostgreSQLQuery
.from_(self.scitech_table)
.select(
self.scitech_table.id,
self.scitech_table.title,
PseudoColumn('array[]::text[]').as_('issns'),
self.scitech_table.tags,
)
.where(self.scitech_table.id.isin(document_ids))
).get_sql()
documents_dict = {}
async for document_row in self.application.pool_holder['nexus'].iterate(document_query, row_factory=dict_row):
documents_dict[document_row['id']] = profile_service_pb2.ShortDocumentDescription(
id=document_row['id'],
title=document_row['title'],
issns=document_row['issns'],
tags=document_row['tags'],
)
documents = []
for document_id in document_ids:
document = documents_dict.get(document_id)
if document:
documents.append(document)
return documents
async def get_chat_config(self, chat_id):
async for row in self.application.pool_holder['idm'].iterate(
PostgreSQLQuery
.from_(self.chats_table)
.select(self.chats_table.is_connectome_enabled)
.where(self.chats_table.chat_id == chat_id)
.get_sql()
):
return row[0]
async def get_stats(self, downloaded_documents):
issns_counter = defaultdict(int)
tags_counter = defaultdict(int)
for download_document in downloaded_documents:
for issn in download_document.issns:
issns_counter[issn] += 1
for tag in download_document.tags:
tags_counter[tag] += 1
most_popular_issns = sorted(issns_counter, key=issns_counter.get, reverse=True)[:7]
most_popular_tags = sorted(tags_counter, key=tags_counter.get, reverse=True)[:7]
most_popular_series = []
async for row in self.application.pool_holder['nexus'].iterate(
f"select name, issns from series where issns && array[{most_popular_issns}]::text[]".format(
most_popular_issns=','.join(map(lambda x: "'" + x + "'", most_popular_issns)),
),
row_factory=dict_row,
):
most_popular_series.append(profile_service_pb2.Series(
name=row['name'],
issns=row['issns'],
))
return most_popular_series, most_popular_tags
async def get_uploads_count(self, chat_id):
sql = (
PostgreSQLQuery.from_(self.sharience_table)
.select(functions.Count(self.sharience_table.parent_id).distinct())
.groupby(self.sharience_table.uploader_id)
.where(self.sharience_table.uploader_id == chat_id)
).get_sql()
async for row in self.application.pool_holder['nexus'].iterate(sql):
return row[0]
async def get_subscriptions(self, chat_id):
subscriptions_sql = (
PostgreSQLQuery.select(
self.subscriptions_table.id,
self.subscriptions_table.chat_id,
self.subscriptions_table.subscription_query,
self.subscriptions_table.schedule,
self.subscriptions_table.is_oneshot,
self.subscriptions_table.is_downloadable,
self.subscriptions_table.valid_until,
self.subscriptions_table.next_check_at,
self.subscriptions_table.subscription_type,
)
.from_(self.subscriptions_table)
.where(self.subscriptions_table.chat_id == chat_id)
.orderby(self.subscriptions_table.id)
).get_sql()
subscriptions = []
async for row in self.application.pool_holder['idm'].iterate(subscriptions_sql, row_factory=dict_row):
subscriptions.append(subscription_manager_service_pb2.Subscription(
id=row['id'],
chat_id=row['chat_id'],
subscription_query=row['subscription_query'],
schedule=row['schedule'],
is_oneshot=row['is_oneshot'],
is_downloadable=row['is_downloadable'],
valid_until=row['valid_until'],
next_check_at=row['next_check_at'],
subscription_type=row['subscription_type'],
))
return subscriptions
@aiogrpc_request_wrapper()
async def get_profile(
self,
request: profile_service_pb2.GetProfileRequest,
context,
metadata,
) -> profile_service_pb2.GetProfileResponse:
downloaded_documents = await self.get_downloaded_documents(
chat_id=request.chat_id,
starting_from=request.starting_from,
last_n_documents=request.last_n_documents if request.HasField('last_n_documents') else None,
)
uploads_count, stats, subscriptions, is_connectome_enabled = await asyncio.gather(
self.get_uploads_count(chat_id=request.chat_id),
self.get_stats(downloaded_documents=downloaded_documents),
self.get_subscriptions(chat_id=request.chat_id),
self.get_chat_config(chat_id=request.chat_id),
)
most_popular_series, most_popular_tags = stats
self.statbox(
mode='profile',
action='show',
chat_id=request.chat_id,
uploads_count=uploads_count,
downloads_count=len(downloaded_documents),
most_popular_tags=most_popular_tags,
most_popular_series=[series.name for series in most_popular_series],
is_connectome_enabled=is_connectome_enabled,
)
return profile_service_pb2.GetProfileResponse(
most_popular_tags=most_popular_tags,
most_popular_series=most_popular_series,
subscriptions=subscriptions,
uploads_count=uploads_count,
downloads_count=len(downloaded_documents),
downloaded_documents=downloaded_documents if is_connectome_enabled else [],
is_connectome_enabled=is_connectome_enabled,
)

View File

@ -0,0 +1,202 @@
import logging
import time
from croniter import croniter
from grpc import StatusCode
from idm.api.proto import (
subscription_manager_service_pb2,
subscription_manager_service_pb2_grpc,
)
from library.aiogrpctools.base import (
BaseService,
aiogrpc_request_wrapper,
)
from psycopg.rows import dict_row
from pypika import (
PostgreSQLQuery,
Table,
)
class SubscriptionManagerService(subscription_manager_service_pb2_grpc.SubscriptionManagerServicer, BaseService):
chats_table = Table('chats')
subscriptions_table = Table('subscriptions')
async def start(self):
subscription_manager_service_pb2_grpc.add_SubscriptionManagerServicer_to_server(self, self.application.server)
@aiogrpc_request_wrapper(log=False)
async def get_single_chat_task(
self,
request: subscription_manager_service_pb2.SubscribeRequest,
context,
metadata,
) -> subscription_manager_service_pb2.GetSingleChatTaskRequest:
subquery = (
PostgreSQLQuery
.from_(self.subscriptions_table)
.select(
self.subscriptions_table.chat_id,
self.subscriptions_table.next_check_at,
)
.inner_join(self.chats_table)
.using('chat_id')
.where(self.chats_table.is_discovery_enabled == True)
.where(self.subscriptions_table.next_check_at.notnull())
.where(self.subscriptions_table.valid_until > int(time.time()))
.orderby(self.subscriptions_table.next_check_at).limit(1)
)
query = (
PostgreSQLQuery.select(
self.subscriptions_table.id,
self.subscriptions_table.chat_id,
self.subscriptions_table.subscription_query,
self.subscriptions_table.schedule,
self.subscriptions_table.is_oneshot,
self.subscriptions_table.is_downloadable,
self.subscriptions_table.next_check_at,
self.subscriptions_table.valid_until,
self.subscriptions_table.subscription_type,
)
.from_(self.subscriptions_table)
.inner_join(subquery)
.using('chat_id')
.where(self.subscriptions_table.next_check_at < subquery.next_check_at + 5)
.orderby(self.subscriptions_table.next_check_at)
).get_sql()
subscriptions = []
chat_id = None
async for row in self.application.pool_holder['idm'].iterate(query, row_factory=dict_row):
chat_id = row['chat_id']
subscriptions.append(subscription_manager_service_pb2.Subscription(**row))
return subscription_manager_service_pb2.GetSingleChatTaskResponse(
subscriptions=subscriptions,
chat_id=chat_id,
)
@aiogrpc_request_wrapper(log=False)
async def subscribe(
self,
request: subscription_manager_service_pb2.SubscribeRequest,
context,
metadata,
) -> subscription_manager_service_pb2.SubscribeResponse:
next_check_at = None
valid_until = request.valid_until if request.HasField('valid_until') else 2 ** 31 - 1
if request.schedule:
if not croniter.is_valid(request.schedule):
return await context.abort(StatusCode.INVALID_ARGUMENT, request.schedule)
next_check_at = croniter(request.schedule).next(ret_type=float)
query = (
PostgreSQLQuery
.into(self.subscriptions_table)
.columns(
self.subscriptions_table.chat_id,
self.subscriptions_table.subscription_query,
self.subscriptions_table.schedule,
self.subscriptions_table.is_oneshot,
self.subscriptions_table.is_downloadable,
self.subscriptions_table.valid_until,
self.subscriptions_table.next_check_at,
self.subscriptions_table.subscription_type,
)
.insert(
request.chat_id,
request.subscription_query,
request.schedule,
request.is_oneshot,
request.is_downloadable,
valid_until,
next_check_at,
request.subscription_type
)
.on_conflict(
self.subscriptions_table.chat_id,
self.subscriptions_table.subscription_query,
)
.do_update(
self.subscriptions_table.valid_until,
valid_until,
)
).get_sql()
await self.application.pool_holder['idm'].execute(query)
return subscription_manager_service_pb2.SubscribeResponse()
@aiogrpc_request_wrapper(log=False)
async def reschedule_subscriptions(
self,
request: subscription_manager_service_pb2.RescheduleSubscriptionsRequest,
context,
metadata,
) -> subscription_manager_service_pb2.RescheduleSubscriptionsResponse:
response_pb = subscription_manager_service_pb2.RescheduleSubscriptionsResponse()
match str(request.WhichOneof('subscriptions_ids')):
case 'subscription_id':
select_condition = self.subscriptions_table.id == request.subscription_id
case 'subscription_query':
select_condition = self.subscriptions_table.subscription_query == request.subscription_query
case _:
raise RuntimeError(f"Unknown file type {request.WhichOneof('subscriptions_ids')}")
if request.HasField('new_schedule'):
schedule = request.new_schedule.schedule
next_check_at = None
if request.new_schedule.schedule:
if not croniter.is_valid(schedule):
return await context.abort(StatusCode.INVALID_ARGUMENT, schedule)
next_check_at = int(croniter(schedule).next(ret_type=float))
update_sql = (
PostgreSQLQuery.update(self.subscriptions_table)
.where(select_condition)
.set(self.subscriptions_table.next_check_at, next_check_at)
)
if request.new_schedule.is_persistent:
update_sql = update_sql.set(self.subscriptions_table.schedule, schedule)
update_sql = update_sql.get_sql()
await self.application.pool_holder['idm'].execute(update_sql)
logging.getLogger('statbox').info({
'action': 'rescheduled',
'mode': 'reschedule_subscriptions',
'sql': update_sql,
})
else:
select_sql = (
PostgreSQLQuery
.from_(self.subscriptions_table).select(
self.subscriptions_table.id,
self.subscriptions_table.schedule,
self.subscriptions_table.is_oneshot)
.where(select_condition)
)
async for row in self.application.pool_holder['idm'].iterate(select_sql.get_sql(), row_factory=dict_row):
if row['is_oneshot'] and request.is_fired:
delete_sql = (
PostgreSQLQuery
.from_(self.subscriptions_table)
.delete()
.where(self.subscriptions_table.id == row['id'])
).get_sql()
await self.application.pool_holder['idm'].execute(delete_sql)
logging.getLogger('statbox').info({
'action': 'delete',
'mode': 'reschedule_subscriptions',
'subscription_id': row['id'],
'is_oneshot': row['is_oneshot'],
'is_fired': request.is_fired,
})
else:
next_check_at = int(croniter(row['schedule']).next(ret_type=float))
update_sql = (
PostgreSQLQuery
.update(self.subscriptions_table)
.where(self.subscriptions_table.id == row['id'])
.set(self.subscriptions_table.next_check_at, next_check_at)
).get_sql()
await self.application.pool_holder['idm'].execute(update_sql)
logging.getLogger('statbox').info({
'action': 'rescheduled',
'mode': 'reschedule_subscriptions',
'sql': update_sql,
})
return response_pb

View File

@ -11,8 +11,8 @@ def images_install():
container_pull(
name = "ubuntu",
digest = "sha256:d0b4808a158b42b6efb3ae93abb567b1cb6ee097221813c0315390de0fa320b9",
digest = "sha256:c27987afd3fd8234bcf7a81e46cf86c2c4c10ef06e80f0869c22c6ff22b29f9d",
registry = "index.docker.io",
repository = "library/ubuntu",
tag = "21.10",
tag = "22.04",
)

View File

@ -14,7 +14,7 @@ download_pkgs(
"libgomp1",
"libgoogle-perftools-dev",
"libprotobuf23",
"libssl1.1",
"libssl3",
],
)
@ -38,8 +38,8 @@ download_pkgs(
name = "download-base-python-image",
image_tar = ":base-production-image.tar",
packages = [
"python3.9",
"python3.9-distutils",
"python3",
"python3-distutils",
],
)
@ -54,10 +54,9 @@ install_pkgs(
container_image(
name = "base-python-image",
base = ":install-base-python-image",
entrypoint = ["/usr/bin/python3.9"],
entrypoint = ["/usr/bin/python3"],
symlinks = {
"/usr/bin/python": "/usr/bin/python3.9",
"/usr/bin/python3": "/usr/bin/python3.9",
"/usr/bin/python": "/usr/bin/python3",
},
visibility = ["//visibility:public"],
)

View File

@ -13,33 +13,50 @@ from library.logging import error_log
class AioGrpcServer(AioRootThing):
def __init__(self, address, port):
def __init__(self, address, port, max_message_length: int = 300 * 1024 * 1024, termination_timeout: float = 1.0):
super().__init__()
self.address = address
self.port = port
self.server = aio.server()
self.termination_timeout = termination_timeout
self.server = aio.server(
options=[
('grpc.max_send_message_length', max_message_length),
('grpc.max_receive_message_length', max_message_length),
]
)
self.server.add_insecure_port(f'{address}:{port}')
async def start(self):
logging.getLogger('debug').info({
'action': 'starting',
logging.getLogger('debug').debug({
'action': 'start',
'address': self.address,
'mode': 'grpc',
'port': self.port,
'extras': [x.__class__.__name__ for x in self.starts + self.waits]
'extras': [x.__class__.__name__ for x in self.starts]
})
await self.server.start()
await self.server.wait_for_termination()
r = await self.server.start()
logging.getLogger('debug').debug({
'action': 'started',
'address': self.address,
'mode': 'grpc',
'port': self.port,
})
return r
async def stop(self):
logging.getLogger('debug').info({
'action': 'stopping',
logging.getLogger('debug').debug({
'action': 'stop',
'mode': 'grpc',
})
await self.server.stop(None)
r = await self.server.stop(self.termination_timeout)
logging.getLogger('debug').debug({
'action': 'stopped',
'mode': 'grpc',
})
return r
def log_config(self, config):
logging.getLogger('debug').info(
logging.getLogger('debug').debug(
'\n' + yaml.safe_dump(config.get_files()),
)
@ -47,8 +64,9 @@ class AioGrpcServer(AioRootThing):
class BaseService(AioThing):
error_mapping = {}
def __init__(self, service_name):
def __init__(self, application, service_name):
super().__init__()
self.application = application
self.service_name = service_name
self.class_name = camel_to_snake(self.__class__.__name__)

View File

@ -1,27 +1,87 @@
import asyncio
import logging
from typing import Optional
import psycopg
from aiokit import AioThing
from izihawa_utils.exceptions import BaseError
from psycopg.rows import tuple_row
from psycopg_pool import AsyncConnectionPool
class OperationalError(BaseError):
level = logging.WARNING
code = 'operational_error'
class AioPostgresPoolHolder(AioThing):
def __init__(self, conninfo, timeout=30, min_size=1, max_size=4):
def __init__(self, conninfo, timeout=30, min_size=1, max_size=1, is_recycling=True):
super().__init__()
self.pool = None
self.fn = lambda: AsyncConnectionPool(
conninfo=conninfo,
timeout=timeout,
min_size=min_size,
max_size=max_size,
max_size=max_size + int(is_recycling),
)
self.is_recycling = is_recycling
self.recycling_task = None
self.timeout = timeout
async def _get_connection(self):
ev = asyncio.Event()
conn = await self.pool.getconn()
asyncio.get_running_loop().add_reader(conn.fileno(), ev.set)
return ev, conn
async def recycling(self):
logging.getLogger('debug').debug({
'action': 'start_recycling',
'mode': 'pool',
'stats': self.pool.get_stats(),
})
ev, conn = await self._get_connection()
try:
while True:
try:
await asyncio.wait_for(ev.wait(), self.timeout)
except asyncio.TimeoutError:
continue
try:
await conn.execute("SELECT 1")
except psycopg.OperationalError:
asyncio.get_running_loop().remove_reader(conn.fileno())
await self.pool.putconn(conn)
await self.pool.check()
ev, conn = await self._get_connection()
except asyncio.CancelledError:
pass
finally:
await self.pool.putconn(conn)
logging.getLogger('debug').debug({
'action': 'stopped_recycling',
'mode': 'pool',
'stats': self.pool.get_stats(),
})
async def start(self):
if not self.pool:
self.pool = self.fn()
await self.pool.wait()
if self.is_recycling:
self.recycling_task = asyncio.create_task(self.recycling())
async def stop(self):
if self.pool:
if self.recycling_task:
self.recycling_task.cancel()
await self.recycling_task
self.recycling_task = None
logging.getLogger('debug').debug({
'action': 'close',
'mode': 'pool',
'stats': self.pool.get_stats(),
})
await self.pool.close()
self.pool = None

View File

@ -1,22 +0,0 @@
load("@build_bazel_rules_nodejs//:index.bzl", "js_library")
js_library(
name = "base-client",
package_name = "base-client",
srcs = ["base-client.js"],
visibility = ["//visibility:public"],
deps = [
"//library/js:utils",
"@npm//axios",
],
)
js_library(
name = "utils",
package_name = "utils",
srcs = ["utils.js"],
visibility = ["//visibility:public"],
deps = [
"@npm//lodash",
],
)

View File

@ -1,52 +0,0 @@
import { removeUndefined, toCamel, toSnake } from 'utils'
import Axios from 'axios'
export default class BaseClient {
constructor ({ baseUrl, headers = null, beforeRequest = null, afterRequest = null, errorHandler = null, withCredentials = false } = {}) {
this.nativeClient = Axios.create({
baseURL: baseUrl,
withCredentials: withCredentials,
headers: {
'X-Bypass-Cache': 1,
'Accept-Language': 'en'
},
transformResponse: Axios.defaults.transformResponse.concat([data => {
return toCamel(data)
}])
})
this.nativeClient.defaults.withCredentials = withCredentials
this.nativeClient.interceptors.request.use((config) => {
if (config.data) {
config.data = removeUndefined(config.data)
config.data = toSnake(config.data)
}
if (config.headers) {
if (typeof headers === 'function') {
config.headers = Object.assign(config.headers, headers())
} else {
config.headers = Object.assign(config.headers, headers)
}
}
if (beforeRequest) {
beforeRequest()
}
return config
})
this.nativeClient.interceptors.response.use((response) => {
if (afterRequest) {
afterRequest()
}
return response.data
}, (error) => {
if (afterRequest) {
afterRequest()
}
if (errorHandler) {
return errorHandler(error)
} else {
return Promise.reject(error)
}
})
}
}

View File

@ -1,145 +0,0 @@
import lodash from 'lodash'
export const alignToLines = function (array, lineSize) {
const lines = []
const length = array.length
for (let i = 0; i < length; i += lineSize) {
const line = []
for (let l = 0; l < lineSize; l++) {
if (i + l < length) {
line.push(array[i + l])
}
}
lines.push(line)
}
return lines
}
export function removeUndefined (obj) {
Object.keys(obj).forEach(key => {
if (obj[key] && typeof obj[key] === 'object') removeUndefined(obj[key])
else if (obj[key] === undefined) delete obj[key]
})
return obj
}
function castObjectKeys (o, depth, func, exclude) {
if (depth === 0) {
return o
}
if (lodash.isArray(o)) {
return o.map(x => {
if (exclude !== undefined && $.inArray(x, exclude) > -1) {
return x
} else {
return castObjectKeys(x, depth - 1, func, exclude)
}
})
} else if (lodash.isPlainObject(o)) {
const castedObject = {}
for (const key in o) {
if (exclude !== undefined && $.inArray(key, exclude) > -1) {
castedObject[key] = o[key]
} else {
castedObject[func(key)] = castObjectKeys(o[key], depth - 1, func, exclude)
}
}
return castedObject
} else {
return o
}
}
export const toSnake = function (o, depth, exclude) {
return castObjectKeys(o, depth || -1, lodash.snakeCase, exclude)
}
export const toCamel = function (o, depth, exclude) {
return castObjectKeys(o, depth || -1, lodash.camelCase, exclude)
}
export const toKebab = function (o, depth, exclude) {
return castObjectKeys(o, depth || -1, lodash.kebabCase, exclude)
}
export const queryString = function (o) {
o = JSON.parse(JSON.stringify(o))
const r = []
for (const key in o) {
const value = o[key]
if (value !== undefined) {
if (Array.isArray(value)) {
value.map((it, index) => r.push(`${key}-${index}=${it}`))
} else {
r.push(toSnake(key) + '=' + value)
}
}
}
return r.join('&')
}
export var aggregation = (baseClass, ...mixins) => {
class base extends baseClass {
constructor (...args) {
super(...args)
mixins.forEach((Mixin) => {
copyProps(this, (new Mixin(...args)))
})
}
}
const copyProps = (target, source) => {
Object.getOwnPropertyNames(source)
.concat(Object.getOwnPropertySymbols(source))
.forEach((prop) => {
if (!prop.match(/^(?:constructor|prototype|arguments|caller|name|bind|call|apply|toString|length)$/)) {
Object.defineProperty(target, prop, Object.getOwnPropertyDescriptor(source, prop))
}
})
}
mixins.forEach((mixin) => {
copyProps(base.prototype, mixin.prototype)
copyProps(base, mixin)
})
return base
}
export const capitalizeFirstLetter = function (s) {
return s.charAt(0).toUpperCase() + s.slice(1)
}
export const extend = function () {
const extended = {}
let deep = false
let i = 0
const length = arguments.length
if (Object.prototype.toString.call(arguments[0]) === '[object Boolean]') {
deep = arguments[0]
i++
}
const merge = function (obj) {
for (const prop in obj) {
if (Object.prototype.hasOwnProperty.call(obj, prop)) {
// If deep merge and property is an object, merge properties
if (deep && Object.prototype.toString.call(obj[prop]) === '[object Object]') {
extended[prop] = extend(true, extended[prop], obj[prop])
} else {
extended[prop] = obj[prop]
}
}
}
}
for (; i < length; i++) {
const obj = arguments[i]
merge(obj)
}
return extended
}
export const getRandomInt = function (min, max) {
min = Math.ceil(min)
max = Math.floor(max)
return Math.floor(Math.random() * (max - min + 1)) + min
}

View File

@ -7,9 +7,9 @@ py_library(
srcs_version = "PY3ONLY",
visibility = ["//visibility:public"],
deps = [
requirement("orjson"),
requirement("prometheus_client"),
requirement("izihawa_types"),
requirement("izihawa_utils"),
requirement("orjson"),
requirement("prometheus_client"),
],
)

View File

@ -33,7 +33,7 @@ def error_log(e, level=logging.ERROR, **fields):
elif fields:
e = {'error': repr(e), **fields}
logging.getLogger('error').log(
msg=e,
msg=str(e),
level=level
)

View File

@ -9,10 +9,12 @@ from typing import (
from aiokit import AioThing
from izihawa_utils.random import generate_request_id
from izihawa_utils.text import mask
from library.logging import error_log
from telethon import (
TelegramClient,
connection,
hints,
sessions,
)
from tenacity import ( # noqa
@ -22,6 +24,7 @@ from tenacity import ( # noqa
wait_fixed,
)
from .common import close_button
from .session_backend import AlchemySessionContainer
@ -42,6 +45,7 @@ class BaseTelegramClient(AioThing):
raise ValueError(
'Your API ID or Hash cannot be empty or None. Set up telegram.app_id and/or telegram.app_hash'
)
self.app_id = app_id
self._telegram_client = TelegramClient(
self._get_session(database),
app_id,
@ -53,6 +57,9 @@ class BaseTelegramClient(AioThing):
self.password = password
self.bot_token = bot_token
def __str__(self):
return f'BaseTelegramClient(app_id={self.app_id}, phone={mask(self.phone)}, bot_token={mask(self.bot_token)})'
def _get_session(self, database):
if database.get('drivername') == 'postgresql':
self.container = AlchemySessionContainer(
@ -80,21 +87,22 @@ class BaseTelegramClient(AioThing):
@retry(retry=retry_if_exception_type(ConnectionError), stop=stop_after_attempt(3), wait=wait_fixed(5))
async def start(self):
logging.getLogger('debug').info({'mode': 'telegram', 'action': 'starting'})
logging.getLogger('debug').debug({'mode': 'telegram', 'action': 'start'})
await self._telegram_client.start(
phone=lambda: self.phone,
bot_token=self.bot_token,
password=self.password,
code_callback=self.polling_file,
password=self.polling_file('/tmp/telegram_password'),
code_callback=self.polling_file('/tmp/telegram_code'),
)
logging.getLogger('debug').info({'mode': 'telegram', 'action': 'started'})
logging.getLogger('debug').debug({'mode': 'telegram', 'action': 'started'})
async def polling_file(self):
fname = '/tmp/telegram_code'
while not os.path.exists(fname):
await asyncio.sleep(5.0)
with open(fname, 'r') as code_file:
return code_file.read().strip()
def polling_file(self, fname):
async def f():
while not os.path.exists(fname):
await asyncio.sleep(5.0)
with open(fname, 'r') as code_file:
return code_file.read().strip()
return f
async def stop(self):
return await self.disconnect()
@ -125,6 +133,12 @@ class BaseTelegramClient(AioThing):
**kwargs,
)
def upload_file(self, file: hints.FileLike, file_name: str):
return self._telegram_client.upload_file(
file=file,
file_name=file_name,
)
def edit_message(self, *args, **kwargs):
return self._telegram_client.edit_message(*args, **kwargs)
@ -188,13 +202,21 @@ class RequestContext:
self.default_fields.update(fields)
def statbox(self, **kwargs):
logging.getLogger('statbox').info(
msg=dict(
**self.default_fields,
**kwargs,
),
)
logging.getLogger('statbox').info(msg=self.default_fields | kwargs)
def debug_log(self, **kwargs):
logging.getLogger('debug').debug(msg=self.default_fields | kwargs)
def error_log(self, e, level=logging.ERROR, **fields):
all_fields = {**self.default_fields, **fields}
all_fields = self.default_fields | fields
error_log(e, level=level, **all_fields)
def is_group_mode(self):
return self.chat.chat_id < 0
def is_personal_mode(self):
return self.chat.chat_id > 0
def personal_buttons(self):
if self.is_personal_mode():
return [close_button()]

View File

@ -0,0 +1,14 @@
from telethon import Button
def close_button(session_id: str = None):
if session_id:
return Button.inline(
text='✖️',
data=f'/close_{session_id}',
)
else:
return Button.inline(
text='✖️',
data='/close',
)

View File

@ -7,39 +7,40 @@ from typing import (
Optional,
)
from library.logging import error_log
from telethon import (
errors,
events,
)
from .base import RequestContext
@asynccontextmanager
async def safe_execution(
request_context: RequestContext,
error_log=error_log,
on_fail: Optional[Callable[[], Awaitable]] = None,
level=logging.WARNING,
):
try:
try:
yield
except events.StopPropagation:
raise
except errors.MessageNotModifiedError:
pass
except (
errors.UserIsBlockedError,
errors.QueryIdInvalidError,
errors.MessageDeleteForbiddenError,
errors.MessageIdInvalidError,
errors.MessageNotModifiedError,
errors.ChatAdminRequiredError,
) as e:
request_context.error_log(e, level=logging.WARNING)
error_log(e, level=level)
except Exception as e:
error_log(e, level=level)
traceback.print_exc()
request_context.error_log(e)
if on_fail:
await on_fail()
except events.StopPropagation:
raise
except Exception as e:
request_context.error_log(e)
error_log(e, level=level)

View File

@ -9,7 +9,6 @@
- ✅ [`ingest`](ingest) - retrieving metadata from external APIs and putting it onto Kafka
- ✅ [`meta_api`](meta_api) - rescoring and merging API for Summa backends
- ✅ [`models`](models) - shared Protobuf models
- ✅ [`nlptools`](nlptools) - text routines
- ✅ [`pipe`](pipe) - processing pipeline based on Kafka
- ✅ [`pylon`](pylon) - smart client for downloading files from the Internet/IPFS
- ✅ [`translations`](translations) - text translations used in `bot` and `hub`

View File

@ -20,6 +20,6 @@ py_library(
"//library/aiopostgres",
requirement("izihawa_types"),
"//nexus/models/proto:proto_py",
"//nexus/nlptools",
requirement("izihawa_nlptools"),
],
)

View File

@ -1,6 +1,4 @@
import time
from datetime import date
import numpy as np
from nexus.models.proto.scimag_pb2 import Scimag as ScimagPb
from .base import BaseAction
@ -20,14 +18,12 @@ def extract_dates(date_parts):
if not date_parts or not date_parts[0]:
return 0, None
year, month, day = date_parts[0] + [0] * (3 - len(date_parts[0]))
if year:
issued_at = int(time.mktime(date(
year=year,
month=month if month else 1,
day=day if day else 1,
).timetuple()))
return year, issued_at
return 0, None
if not year:
return 0, None
month = month if month else 1
day = day if day else 1
issued_at = np.datetime64(f'{year}-{month:02d}-{day:02d}').astype('datetime64[s]').astype(np.int64)
return year, issued_at
def extract_first(arr, default=''):
@ -71,17 +67,19 @@ def extract_references(references):
return dois
def clean_issns(issns):
if issns:
cleaned_issns = []
for issn in issns:
if issn != '0000-0000':
cleaned_issns.append(issn)
return cleaned_issns
def extract_title(title, subtitle):
return ': '.join(filter(lambda x: bool(x), [title.strip(), subtitle.strip()]))
class ToThinScimagPbAction(BaseAction):
async def do(self, item: dict) -> ScimagPb:
if 'DOI' not in item:
raise InterruptProcessing(document_id=None, reason='no_doi')
return ScimagPb(doi=item['DOI'])
class ToScimagPbAction(BaseAction):
async def do(self, item: dict) -> ScimagPb:
if 'DOI' not in item:
@ -91,9 +89,9 @@ class ToScimagPbAction(BaseAction):
container_title=extract_first(item.get('container-title')),
doi=item['DOI'],
issue=item.get('issue'),
issns=item.get('ISSN'),
issns=clean_issns(item.get('ISSN')),
language=item.get('language'),
ref_by_count=item.get('is-referenced-by-count'),
referenced_by_count=item.get('is-referenced-by-count'),
references=extract_references(item.get('reference')),
tags=item.get('subject'),
title=extract_title(extract_first(item.get('title')), extract_first(item.get('subtitle'))),

View File

@ -20,7 +20,7 @@ class ToPostgresAction(BaseAction):
f'password={database["password"]} '
f'host={database["host"]}',
)
self.waits.append(self.pool_holder)
self.starts.append(self.pool_holder)
async def do(self, document_operation_pb: DocumentOperationPb) -> DocumentOperationPb:
store_telegram_file_id_pb = document_operation_pb.store_telegram_file_id

View File

@ -1,16 +1,26 @@
import asyncio
import logging
from typing import (
Optional,
Set,
)
import orjson as json
from aiocrossref import CrossrefClient
from aiocrossref.exceptions import (
NotFoundError,
WrongContentTypeError,
)
from aiokafka import AIOKafkaProducer
from aiosumma import SummaClient
from izihawa_utils.common import filter_none
from izihawa_utils.pb_to_json import MessageToDict
from library.aiopostgres.pool_holder import AioPostgresPoolHolder
from nexus.actions import scimag_pb
from nexus.actions.base import BaseAction
from nexus.actions.common import canonize_doi
from nexus.actions.crossref_api import ToScimagPbAction
from nexus.actions.exceptions import InterruptProcessing
from nexus.models.proto.operation_pb2 import \
CrossReferenceOperation as CrossReferenceOperationPb
from nexus.models.proto.operation_pb2 import \
@ -21,11 +31,7 @@ from pypika import (
Table,
)
from pypika.terms import Array
from .. import scimag_pb
from ..base import BaseAction
from ..crossref_api import ToScimagPbAction
from ..exceptions import InterruptProcessing
from summa.proto import index_service_pb2 as index_service_pb
class ToPostgresAction(BaseAction):
@ -33,6 +39,7 @@ class ToPostgresAction(BaseAction):
db_multi_fields = {
'authors',
'ipfs_multihashes',
'isbns',
'issns',
'tags',
}
@ -40,6 +47,7 @@ class ToPostgresAction(BaseAction):
'id',
'abstract',
'container_title',
'content',
'doi',
'embedding',
'filesize',
@ -52,7 +60,8 @@ class ToPostgresAction(BaseAction):
'last_page',
'meta_language',
'md5',
'ref_by_count',
'page_rank',
'referenced_by_count',
'scimag_bulk_id',
'title',
'type',
@ -69,7 +78,7 @@ class ToPostgresAction(BaseAction):
f'password={database["password"]} '
f'host={database["host"]}',
)
self.waits.append(self.pool_holder)
self.starts.append(self.pool_holder)
def cast_field_value(self, field_name: str, field_value):
if field_name in self.db_multi_fields:
@ -82,18 +91,9 @@ class ToPostgresAction(BaseAction):
return scimag_pb.HasField(field_name)
return field_value
def generate_delete_sql(self, scimag_pb: ScimagPb):
return (
PostgreSQLQuery
.from_('scimag')
.where(self.scimag_table.id == scimag_pb.id)
.delete()
.get_sql()
)
def generate_insert_sql(self, scimag_pb: ScimagPb, fields: Optional[Set[str]] = None):
columns = []
inserts = []
params = []
fields = fields or self.db_fields
for field_name in fields:
@ -101,12 +101,12 @@ class ToPostgresAction(BaseAction):
field_value = getattr(scimag_pb, field_name)
field_name, field_value = self.cast_field_value(field_name, field_value)
columns.append(field_name)
inserts.append(field_value)
params.append(field_value)
query = PostgreSQLQuery.into(self.scimag_table).columns(*columns).insert(*inserts)
query = PostgreSQLQuery.into(self.scimag_table).columns(*columns).insert(*params)
if columns:
query = query.on_conflict('doi')
for field, val in zip(columns, inserts):
for field, val in zip(columns, params):
query = query.do_update(field, val)
return query.returning(self.scimag_table.id).get_sql()
@ -134,13 +134,10 @@ class ToPostgresAction(BaseAction):
fields = update_document_pb.fields or self.db_fields
if scimag_pb.id:
if not scimag_pb.is_deleted:
sql = self.generate_update_sql(
scimag_pb,
fields=fields,
)
else:
sql = self.generate_delete_sql(scimag_pb)
sql = self.generate_update_sql(
scimag_pb,
fields=fields,
)
await self.pool_holder.execute(sql)
else:
sql = self.generate_insert_sql(
@ -152,11 +149,83 @@ class ToPostgresAction(BaseAction):
return document_operation_pb
class ReferencesToKafkaAction(BaseAction):
def __init__(self, topic, brokers):
class ToSummaAction(BaseAction):
forbidden_types = {
'book-series',
'book-set',
'book-track',
'component',
'dataset',
'journal',
'journal-issue',
'journal-volume',
'other',
'peer-review',
'proceedings',
'report-series',
}
def __init__(self, kafka, summa):
super().__init__()
self.topic = topic
self.brokers = brokers
self.kafka = kafka
self.producer = None
self.summa_config = summa
self.summa_client = SummaClient(endpoint=summa['endpoint'])
async def start(self):
self.producer = self.get_producer()
await self.producer.start()
await self.summa_client.start()
async def stop(self):
await self.summa_client.stop()
if self.producer:
await self.producer.stop()
self.producer = None
def get_producer(self):
return AIOKafkaProducer(
loop=asyncio.get_running_loop(),
bootstrap_servers=self.kafka['bootstrap_servers'],
max_request_size=self.kafka['max_request_size'],
)
async def async_index(self, scimag_pb: ScimagPb):
for topic_name in self.kafka['topic_names']:
await self.producer.send_and_wait(
topic_name,
index_service_pb.IndexOperation(
index_document=index_service_pb.IndexDocumentOperation(
document=json.dumps(filter_none(MessageToDict(scimag_pb, preserving_proto_field_name=True))),
),
).SerializeToString(),
)
async def sync_index(self, scimag_pb: ScimagPb):
document = filter_none(MessageToDict(scimag_pb, preserving_proto_field_name=True))
logging.getLogger('statbox').info({'action': 'sync_index', 'document': document})
await self.summa_client.index_document(index_alias=self.summa_config['index_alias'], document=document)
await self.summa_client.commit_index(index_alias=self.summa_config['index_alias'])
async def do(self, document_operation_pb: DocumentOperationPb) -> DocumentOperationPb:
update_document_pb = document_operation_pb.update_document
scimag_pb = update_document_pb.typed_document.scimag
if scimag_pb.type in self.forbidden_types:
return document_operation_pb
if not scimag_pb.HasField('issued_at'):
scimag_pb.issued_at = -62135596800
if update_document_pb.full_text_index:
if update_document_pb.full_text_index_commit:
await self.sync_index(scimag_pb=scimag_pb)
else:
await self.async_index(scimag_pb=scimag_pb)
return document_operation_pb
class ReferencesToKafkaAction(BaseAction):
def __init__(self, kafka):
super().__init__()
self.kafka = kafka
self.producer = None
async def start(self):
@ -164,13 +233,14 @@ class ReferencesToKafkaAction(BaseAction):
await self.producer.start()
async def stop(self):
await self.producer.stop()
self.producer = None
if self.producer:
await self.producer.stop()
self.producer = None
def get_producer(self):
return AIOKafkaProducer(
loop=asyncio.get_running_loop(),
bootstrap_servers=self.brokers,
bootstrap_servers=self.kafka['bootstrap_servers'],
)
async def do(self, document_operation_pb: DocumentOperationPb) -> DocumentOperationPb:
@ -181,10 +251,12 @@ class ReferencesToKafkaAction(BaseAction):
source=scimag_pb.doi,
target=reference,
)
await self.producer.send_and_wait(
self.topic,
reference_operation.SerializeToString(),
)
for topic_name in self.kafka['topic_names']:
await self.producer.send_and_wait(
topic_name,
reference_operation.SerializeToString(),
)
return document_operation_pb
@ -192,7 +264,6 @@ class FillFromExternalSourceAction(BaseAction):
def __init__(self, crossref):
super().__init__()
self.crossref_client = CrossrefClient(
delay=1.0 / crossref['rps'],
max_retries=crossref.get('max_retries', 15),
proxy_url=crossref.get('proxy_url'),
retry_delay=crossref.get('retry_delay', 0.5),
@ -200,18 +271,31 @@ class FillFromExternalSourceAction(BaseAction):
user_agent=crossref.get('user_agent'),
ttl_dns_cache=crossref.get('ttl_dns_cache'),
)
self.doi_client = self.crossref_client
self.crossref_api_to_scimag_pb_action = ToScimagPbAction()
self.waits.append(self.crossref_client)
self.starts.append(self.crossref_client)
async def try_resolve(self, doi, look_at_doi_org=False):
try:
return await self.crossref_client.works(doi=doi)
except (WrongContentTypeError, NotFoundError) as e:
if look_at_doi_org:
doi_org_response = await self.doi_client.get(doi=doi)
if doi_org_response:
resolved_doi = canonize_doi(doi_org_response.get('published-print', {}).get('DOI'))
if resolved_doi:
try:
return await self.crossref_client.works(doi=resolved_doi)
except (WrongContentTypeError, NotFoundError) as e:
raise InterruptProcessing(document_id=doi, reason=str(e))
raise InterruptProcessing(document_id=doi, reason=str(e))
async def do(self, document_operation_pb: DocumentOperationPb) -> DocumentOperationPb:
update_document_pb = document_operation_pb.update_document
scimag_pb = update_document_pb.typed_document.scimag
if not update_document_pb.should_fill_from_external_source:
return document_operation_pb
scimag_pb = update_document_pb.typed_document.scimag
try:
crossref_api_response = await self.crossref_client.works(doi=scimag_pb.doi)
except (WrongContentTypeError, NotFoundError) as e:
raise InterruptProcessing(document_id=scimag_pb.doi, reason=str(e))
crossref_api_response = await self.try_resolve(doi=scimag_pb.doi)
new_scimag_pb = await self.crossref_api_to_scimag_pb_action.do(crossref_api_response)
scimag_pb.MergeFrom(new_scimag_pb)
return document_operation_pb
@ -221,9 +305,21 @@ class CleanAction(BaseAction):
def __init__(self):
super().__init__()
self.cleaner = scimag_pb.CleanAction()
self.waits.append(self.cleaner)
self.language_detect = scimag_pb.DetectLanguageAction()
self.starts.append(self.cleaner)
async def do(self, document_operation_pb: DocumentOperationPb) -> DocumentOperationPb:
update_document_pb = document_operation_pb.update_document
update_document_pb.typed_document.scimag.CopyFrom(await self.cleaner.do(update_document_pb.typed_document.scimag))
scimag_pb = update_document_pb.typed_document.scimag
scimag_pb = await self.cleaner.do(scimag_pb)
scimag_pb = await self.language_detect.do(scimag_pb)
if update_document_pb.fields and (scimag_pb.language or scimag_pb.meta_language):
fields = set(update_document_pb.fields)
if scimag_pb.language:
fields.add('language')
if scimag_pb.meta_language:
fields.add('meta_language')
del update_document_pb.fields[:]
update_document_pb.fields.extend(fields)
update_document_pb.typed_document.scimag.CopyFrom(scimag_pb)
return document_operation_pb

View File

@ -1,5 +1,12 @@
import logging
import asyncio
from typing import (
Optional,
Set,
)
import orjson as json
from aiokafka import AIOKafkaProducer
from izihawa_utils.common import filter_none
from izihawa_utils.pb_to_json import MessageToDict
from library.aiopostgres.pool_holder import AioPostgresPoolHolder
from nexus.models.proto.operation_pb2 import \
@ -10,7 +17,11 @@ from pypika import (
Table,
functions,
)
from pypika.terms import Array
from pypika.terms import (
Array,
NullValue,
)
from summa.proto import index_service_pb2 as index_service_pb
from .. import scitech_pb
from ..base import BaseAction
@ -46,6 +57,7 @@ class ToPostgresAction(BaseAction):
'title',
'updated_at',
'volume',
'periodical',
}
db_multi_fields = {
'authors',
@ -53,6 +65,15 @@ class ToPostgresAction(BaseAction):
'isbns',
'tags',
}
essential_fields = {
'title',
'authors',
'volume',
'periodical',
'series',
'pages',
'edition',
}
db_fields = db_single_fields | db_multi_fields
def __init__(self, database):
@ -63,7 +84,7 @@ class ToPostgresAction(BaseAction):
f'password={database["password"]} '
f'host={database["host"]}',
)
self.waits.append(self.pool_holder)
self.starts.append(self.pool_holder)
def cast_field_value(self, field_name, field_value):
if field_name in self.db_multi_fields:
@ -78,6 +99,72 @@ class ToPostgresAction(BaseAction):
return scitech_pb.HasField(field_name)
return field_value
def generate_insert_sql(self, scitech_pb: ScitechPb, fields: Optional[Set[str]] = None):
columns = []
inserts = []
reset_original_id = False
has_original_id = False
has_is_deleted = False
for field_name in fields:
if self.is_field_set(scitech_pb, field_name):
field_value = getattr(scitech_pb, field_name)
field_name, field_value = self.cast_field_value(field_name, field_value)
columns.append(field_name)
inserts.append(field_value)
if field_name == 'original_id':
has_original_id = True
elif field_name == 'is_deleted':
has_is_deleted = True
elif field_name in self.essential_fields:
reset_original_id = True
if reset_original_id and not has_original_id:
columns.append('original_id')
inserts.append(NullValue())
if not has_is_deleted:
columns.append('is_deleted')
inserts.append(False)
query = (
PostgreSQLQuery
.into(self.scitech_table)
.columns(*columns)
.insert(*inserts)
)
if columns:
query = query.on_conflict('libgen_id', 'doi')
for col, val in zip(columns, inserts):
query = query.do_update(col, val)
sql = query.returning('id', 'original_id').get_sql()
return sql
def generate_update_sql(self, conditions, scitech_pb: ScitechPb, fields: Optional[Set[str]] = None):
query = PostgreSQLQuery.update(self.scitech_table)
reset_original_id = False
has_original_id = False
has_is_deleted = True
for field_name in fields:
if self.is_field_set(scitech_pb, field_name):
field_value = getattr(scitech_pb, field_name)
field_name, field_value = self.cast_field_value(field_name, field_value)
query = query.set(field_name, field_value)
if field_name == 'original_id':
has_original_id = True
elif field_name == 'is_deleted':
has_is_deleted = True
elif field_name in self.essential_fields:
reset_original_id = True
if reset_original_id and not has_original_id:
query = query.set('original_id', NullValue())
if not has_is_deleted:
query = query.set('is_deleted', False)
sql = query.where(conditions).returning('id', 'original_id').get_sql()
return sql
async def do(self, document_operation_pb: DocumentOperationPb) -> DocumentOperationPb:
update_document_pb = document_operation_pb.update_document
scitech_pb = update_document_pb.typed_document.scitech
@ -94,66 +181,82 @@ class ToPostgresAction(BaseAction):
conditions.append(self.scitech_table.doi == scitech_pb.doi)
# if scitech_pb.md5:
# conditions.append(self.scitech_table.md5 == UuidFunction(scitech_pb.md5))
if not conditions:
return
if conditions:
casted_conditions = conditions[0]
for condition in conditions[1:]:
casted_conditions = casted_conditions | condition
sql = (
PostgreSQLQuery
.from_(self.scitech_table)
.select(functions.Count('*'))
.where(casted_conditions)
.get_sql()
)
result = [row async for row in self.pool_holder.iterate(sql)]
count = result[0][0]
casted_conditions = conditions[0]
for condition in conditions[1:]:
casted_conditions = casted_conditions | condition
count_sql = (
PostgreSQLQuery
.from_(self.scitech_table)
.select(functions.Count('*'))
.where(casted_conditions)
.get_sql()
)
result = [row async for row in self.pool_holder.iterate(count_sql)]
count = result[0][0]
if count > 1:
raise ConflictError(scitech_pb, duplicates=[])
if count > 1:
raise ConflictError(scitech_pb, duplicates=[])
if count == 1:
query = PostgreSQLQuery.update(self.scitech_table)
for field_name in fields:
if self.is_field_set(scitech_pb, field_name):
field_value = getattr(scitech_pb, field_name)
field_name, field_value = self.cast_field_value(field_name, field_value)
query = query.set(field_name, field_value)
sql = query.where(casted_conditions).returning('id', 'original_id').get_sql()
else:
columns = []
inserts = []
for field_name in fields:
if self.is_field_set(scitech_pb, field_name):
field_value = getattr(scitech_pb, field_name)
field_name, field_value = self.cast_field_value(field_name, field_value)
columns.append(field_name)
inserts.append(field_value)
query = (
PostgreSQLQuery
.into(self.scitech_table)
.columns(*columns)
.insert(*inserts)
.on_conflict('libgen_id', 'doi')
if count == 1:
sql = self.generate_update_sql(conditions=casted_conditions, scitech_pb=scitech_pb, fields=fields)
result = [row async for row in self.pool_holder.iterate(sql)][0]
scitech_pb.id = result[0]
scitech_pb.original_id = result[1] or 0
else:
sql = self.generate_insert_sql(scitech_pb=scitech_pb, fields=fields)
result = [row async for row in self.pool_holder.iterate(sql)][0]
scitech_pb.id = result[0]
scitech_pb.original_id = result[1] or 0
return document_operation_pb
class ToSummaAction(BaseAction):
def __init__(self, kafka, summa):
super().__init__()
self.kafka = kafka
self.producer = None
self.summa = summa
async def start(self):
self.producer = self.get_producer()
await self.producer.start()
async def stop(self):
if self.producer:
await self.producer.stop()
self.producer = None
def get_producer(self):
return AIOKafkaProducer(
loop=asyncio.get_running_loop(),
bootstrap_servers=self.kafka['bootstrap_servers'],
max_request_size=self.kafka['max_request_size'],
)
async def do(self, document_operation_pb: DocumentOperationPb) -> DocumentOperationPb:
update_document_pb = document_operation_pb.update_document
scitech_pb = update_document_pb.typed_document.scitech
if update_document_pb.full_text_index:
for topic_name in self.kafka['topic_names']:
await self.producer.send_and_wait(
topic_name,
index_service_pb.IndexOperation(
index_document=index_service_pb.IndexDocumentOperation(
document=json.dumps(filter_none(MessageToDict(scitech_pb, preserving_proto_field_name=True))),
),
).SerializeToString(),
)
for col, val in zip(columns, inserts):
query = query.do_update(col, val)
sql = query.returning('id', 'original_id').get_sql()
try:
result = [row async for row in self.pool_holder.iterate(sql)]
except:
logging.getLogger('error').error({'sql': sql, 'scitech': MessageToDict(scitech_pb)})
raise
scitech_pb.id, scitech_pb.original_id = result[0][0], result[0][1] or 0
return document_operation_pb
return document_operation_pb
class CleanAction(BaseAction):
def __init__(self):
super().__init__()
self.cleaner = scitech_pb.CleanAction()
self.waits.append(self.cleaner)
self.starts.append(self.cleaner)
async def do(self, document_operation_pb: DocumentOperationPb) -> DocumentOperationPb:
update_document_pb = document_operation_pb.update_document

View File

@ -28,6 +28,8 @@ class ToPostgresAction(BaseAction):
'filesize',
'md5',
'updated_at',
'abstract',
'content',
}
db_fields = db_single_fields | db_multi_fields
@ -39,7 +41,7 @@ class ToPostgresAction(BaseAction):
f'password={database["password"]} '
f'host={database["host"]}',
)
self.waits.append(self.pool_holder)
self.starts.append(self.pool_holder)
def cast_field_value(self, field_name: str, field_value):
if field_name in self.db_multi_fields:

View File

@ -19,9 +19,9 @@ class ToPostgresAction(BaseAction):
f'user={database["username"]} '
f'password={database["password"]} '
f'host={database["host"]}',
max_size=2,
max_size=1,
)
self.waits.append(self.pool_holder)
self.starts.append(self.pool_holder)
def generate_insert_sql(self, document_id: int, value: int, voter_id: int):
query = PostgreSQLQuery.into(self.votes_table).columns(

View File

@ -85,6 +85,25 @@ def create_cu(libgen_id, coverurl, md5):
class ToScitechPbAction(BaseAction):
def process_tag(self, raw_tag) -> list:
tags = []
for tag in raw_tag.split(';'):
tag = tag.strip().lower()
if not bool(tag):
continue
for dash_tag in tag.split('--'):
tags.append(dash_tag.strip())
return list(sorted(set(tags)))
def process_isbns(self, identifier):
return list(filter(
lambda x: bool(x),
map(
lambda x: x.replace('-', '').strip(),
identifier.replace(';', ',').split(',')
),
))
async def do(self, item: dict) -> ScitechPb:
scitech_pb = ScitechPb(
authors=(item.get('author') or '').split('; '),
@ -94,25 +113,15 @@ class ToScitechPbAction(BaseAction):
extension=item.get('extension'),
filesize=safe_int(item['filesize']) or 0,
is_deleted=item.get('visible', '') != '',
isbns=list(filter(
lambda x: bool(x),
map(
lambda x: x.replace('-', '').strip(),
item['identifier'].replace(';', ',').split(',')
),
)),
isbns=self.process_isbns(item['identifier']),
language=LANGUAGE_TRANSLATION.get(item['language']),
libgen_id=int(item['id']),
md5=item['md5'].lower(),
pages=safe_int(item['pages']),
series=item.get('series'),
tags=list(filter(
lambda x: bool(x),
map(
lambda x: x.strip(),
item['tags'].split(';')
),
)),
volume=item.get('volumeinfo'),
periodical=item.get('periodical'),
tags=self.process_tag(item['tags']),
title=item['title'].replace('\0', '').strip(),
)
@ -124,6 +133,6 @@ class ToScitechPbAction(BaseAction):
year = safe_int(item['year'])
if year and year < 9999:
scitech_pb.year = year
# Subtract 1970
# Subtract 1970 because `np.datetime64(year, 'Y')` is not returning unixtime
scitech_pb.issued_at = np.datetime64(year, 'Y').astype('datetime64[s]').astype(np.int64) - 62167132800
return scitech_pb

View File

@ -1,14 +1,17 @@
import orjson as json
from izihawa_utils.common import filter_none
from nexus.models.proto.scimag_pb2 import Scimag as ScimagPb
from summa.proto.proto_grpc_py_pb import index_pb2 as index_pb
from summa.proto import index_service_pb2 as index_service_pb
from .base import BaseAction
class ToThinScimagPbAction(BaseAction):
async def do(self, item: dict) -> ScimagPb:
return ScimagPb(doi=item['doi'])
class ScimagToIndexOperationBytesAction(BaseAction):
async def do(self, item: dict) -> bytes:
return index_service_pb.IndexOperation(
index_document=index_service_pb.IndexDocumentOperation(
document=json.dumps(filter_none(item)),
),
).SerializeToString()
class ScitechToIndexOperationBytesAction(BaseAction):
@ -26,9 +29,8 @@ class ScitechToIndexOperationBytesAction(BaseAction):
async def do(self, item: dict) -> bytes:
# if item['original_id'] is not None:
# item = {rc: item[rc] for rc in self.restricted_column_set}
return index_pb.IndexOperation(
index_document=index_pb.IndexDocumentOperation(
return index_service_pb.IndexOperation(
index_document=index_service_pb.IndexDocumentOperation(
document=json.dumps(filter_none(item)),
reindex=True,
),
).SerializeToString()

View File

@ -1,22 +1,35 @@
from html import unescape
from bs4 import BeautifulSoup
from izihawa_nlptools.language_detect import detect_language
from izihawa_nlptools.utils import (
despace,
despace_full,
)
from nexus.models.proto.operation_pb2 import \
DocumentOperation as DocumentOperationPb
from nexus.models.proto.operation_pb2 import UpdateDocument as UpdateDocumentPb
from nexus.models.proto.scimag_pb2 import Scimag as ScimagPb
from nexus.models.proto.typed_document_pb2 import \
TypedDocument as TypedDocumentPb
from nexus.nlptools.language_detect import detect_language
from nexus.nlptools.utils import (
despace,
despace_full,
)
from .base import BaseAction
from .common import canonize_doi
class DetectLanguageAction(BaseAction):
async def do(self, scimag_pb: ScimagPb) -> ScimagPb:
if scimag_pb.title or scimag_pb.abstract or scimag_pb.content:
detected_language = detect_language(f'{scimag_pb.title} {scimag_pb.abstract} {scimag_pb.content}')
if detected_language:
scimag_pb.meta_language = detected_language
if scimag_pb.content:
scimag_pb.language = detected_language
if not scimag_pb.language:
scimag_pb.language = scimag_pb.meta_language
return scimag_pb
class CleanAction(BaseAction):
async def do(self, scimag_pb: ScimagPb) -> ScimagPb:
if scimag_pb.abstract:
@ -52,21 +65,20 @@ class CleanAction(BaseAction):
canonized_references = list(map(canonize_doi, scimag_pb.references))
del scimag_pb.references[:]
scimag_pb.references.extend(canonized_references)
if not scimag_pb.meta_language and (scimag_pb.title or scimag_pb.abstract):
detected_language = detect_language(f'{scimag_pb.title} {scimag_pb.abstract}')
if detected_language:
scimag_pb.meta_language = detected_language
if not scimag_pb.language:
scimag_pb.language = scimag_pb.meta_language
return scimag_pb
class ToDocumentOperationAction(BaseAction):
class ToDocumentOperationBytesAction(BaseAction):
def __init__(self, full_text_index: bool, should_fill_from_external_source: bool):
super().__init__()
self.full_text_index = full_text_index
self.should_fill_from_external_source = should_fill_from_external_source
async def do(self, item: ScimagPb) -> bytes:
document_operation_pb = DocumentOperationPb(
update_document=UpdateDocumentPb(
reindex=True,
should_fill_from_external_source=True,
full_text_index=self.full_text_index,
should_fill_from_external_source=self.should_fill_from_external_source,
typed_document=TypedDocumentPb(scimag=item),
),
)

View File

@ -1,21 +1,61 @@
from html import unescape
from bs4 import BeautifulSoup
from izihawa_nlptools.language_detect import detect_language
from izihawa_nlptools.utils import (
despace,
despace_full,
)
from nexus.models.proto.operation_pb2 import \
DocumentOperation as DocumentOperationPb
from nexus.models.proto.operation_pb2 import UpdateDocument as UpdateDocumentPb
from nexus.models.proto.scitech_pb2 import Scitech as ScitechPb
from nexus.models.proto.typed_document_pb2 import \
TypedDocument as TypedDocumentPb
from nexus.nlptools.language_detect import detect_language
from nexus.nlptools.utils import (
despace,
despace_full,
)
from .base import BaseAction
from .common import canonize_doi
editions = {
'1st': '1',
'1st ed.': '1',
'first edition': '1',
'none': '',
'2nd': '2',
'paperback': '',
'hardcover': '',
'1st ed': '1',
'reprint': '',
'2nd ed': '2',
'1. aufl.': '1',
'0': '',
'illustrated edition': '',
'3rd': '3',
'': '1',
'1st edition': '1',
'kindle edition': '',
'1st edition.': '1',
'1st ed. 2019': '1',
'3rd ed': '3',
'second edition': '2',
'2-е': '2',
'original': '',
'4th': '4',
'1st ed. 2020': '1',
'annotated edition': '',
'2nd edition': '2',
'2nd ed.': '2',
'5th': '5',
'1. aufl': '1',
'4th ed': '4',
'ebook': '',
'1. auflage': '1',
'first edition.': '1',
'3rd edition': '3',
'10th ed': '10',
'2-е издание, переработанное и дополненное': '2',
}
class CleanAction(BaseAction):
async def do(self, scitech_pb: ScitechPb) -> ScitechPb:
@ -29,6 +69,8 @@ class CleanAction(BaseAction):
line.replace_with(f'\n{line.text.strip()}\n')
scitech_pb.description = despace(description_soup.text.strip())
scitech_pb.periodical = despace_full(scitech_pb.periodical)
scitech_pb.volume = despace_full(scitech_pb.volume)
scitech_pb.series = despace_full(scitech_pb.series)
scitech_pb.title = despace_full(scitech_pb.title)
@ -42,16 +84,21 @@ class CleanAction(BaseAction):
scitech_pb.md5 = scitech_pb.md5.lower()
scitech_pb.extension = scitech_pb.extension.lower()
scitech_pb.doi = canonize_doi(scitech_pb.doi)
if scitech_pb.edition == 'None':
scitech_pb.edition = ''
if scitech_pb.edition is not None:
edition = scitech_pb.edition.lower()
scitech_pb.edition = editions.get(edition, edition)
return scitech_pb
class ToDocumentOperationPbAction(BaseAction):
class ToDocumentOperationBytesAction(BaseAction):
def __init__(self, full_text_index: bool):
super().__init__()
self.full_text_index = full_text_index
async def do(self, item: ScitechPb) -> bytes:
document_operation_pb = DocumentOperationPb(
update_document=UpdateDocumentPb(
reindex=True,
full_text_index=self.full_text_index,
typed_document=TypedDocumentPb(scitech=item),
),
)

View File

@ -25,8 +25,11 @@ py3_image(
requirement("aiodns"),
requirement("aiohttp"),
requirement("aiohttp_socks"),
requirement("dateparser"),
requirement("pandas"),
requirement("pytimeparse"),
requirement("python_socks"),
requirement("seaborn"),
requirement("tenacity"),
requirement("uvloop"),
"//idm/api/aioclient",
@ -39,8 +42,8 @@ py3_image(
"//nexus/hub/aioclient",
"//nexus/meta_api/aioclient",
"//nexus/models/proto:proto_py",
"//nexus/nlptools",
"//nexus/views/telegram",
requirement("izihawa_nlptools"),
requirement("izihawa_utils"),
],
)

View File

@ -19,7 +19,6 @@ class TelegramApplication(AioRootThing):
database=self.config['telegram'].get('database'),
mtproxy=self.config['telegram'].get('mtproxy'),
)
self.hub_client = HubGrpcClient(endpoint=self.config['hub']['endpoint'])
self.starts.append(self.hub_client)
self.idm_client = None
@ -39,8 +38,7 @@ class TelegramApplication(AioRootThing):
async def start(self):
self.set_handlers(self.telegram_client)
await self.telegram_client.start_and_wait()
await self.telegram_client.run_until_disconnected()
await self.telegram_client.start()
async def stop(self):
self.telegram_client.remove_event_handlers()

View File

@ -5,7 +5,6 @@ from library.configurator import Configurator
def get_config():
return Configurator([
'nexus/bot/configs/base.yaml',
'nexus/bot/configs/metrics.yaml?',
'nexus/bot/configs/%s.yaml?' % env.type,
'nexus/bot/configs/logging.yaml',
'nexus/bot/configs/promotions.yaml',

View File

@ -1,17 +1,17 @@
---
application:
# Amazon Recipient Email in /donate message
# Amazon Recipient Email in /howtohelp message
amazon_gift_card_recipient: pirate@ship.space
# Amazon URL for buying card in /donate message
# Amazon URL for buying card in /howtohelp message
amazon_gift_card_url: https://www.amazon.com/dp/B07TMNGSN4
bot_version: 1.6.0
# Bitcoin Donation address in /donate message
btc_donate_address: 3QbF3zRQVjn3qMJBSbmLC1gb6VUc555xkw
bot_version: 2.0.0
btc_donate_address: '3CLEdvAXtNqCNix6SQmyT5RscR6pzxGvg8'
# List of chat IDs that is allowed to bypass maintenance mode
bypass_maintenance: []
# Debugging mode
debug: true
eth_donate_address: '0x930B94dafE8f2dEf8C6b536d9F70A12604Af10C3'
# Enabled indices (passed to Nexus Meta API)
index_aliases:
- scitech
@ -21,25 +21,27 @@ application:
# and preventing creation of new users
is_read_only_mode: false
# Require subscription to `related_channel` before allowing to use the bot
is_subscription_required: true
# Libera Pay URL in /donate message
libera_pay_url:
is_subscription_required: false
maintenance_picture_url:
nexus_version: InterCom
nexus_version: Jabbah
# Default page size for SERP
page_size: 5
# Length of generated Request-Id used for tracking requests across all backends
# Length of generated Request-ID used for tracking requests across all backends
request_id_length: 12
# Length of generated Session-ID used in commands to clue user sessions
session_id_length: 8
sol_donate_address: 'FcJG17cEyG8LnNkdJg8HCAQQZKxqpwTupD9fc3GXMqxD'
too_difficult_picture_url:
upgrade_maintenance_picture_url:
# Configuring behaviour of the bot in some cases
views:
settings:
has_connectome_button: true
has_discovery_button: true
has_language_buttons: true
has_system_messaging_button: true
xmr_donate_address: '42HZx5Cg1uQ2CtCrq7QabP23BN7gBrGu6U6QumkMmR4bKS61gcoP8xyNzP5cJCbjac9yaWFhLsDmM3adMWyBKBXn1d9WiUb'
xrp_donate_address: 'rw2ciyaNshpHe7bCHo4bRWq6pqqynnWKQg'
xrp_donate_tag: '1968122674'
hub:
endpoint:
idm:
@ -48,15 +50,13 @@ idm:
log_path: '/var/log/nexus-bot'
meta_api:
endpoint:
metrics:
enabled: false
telegram:
# Telegram App Hash from https://my.telegram.org/
app_hash: '{{ APP_HASH }}'
# Telegram App ID from https://my.telegram.org/
app_id: 00000
# External bot name shown in messages to users
bot_name: libgen_scihub_1_bot
bot_name: libgen_scihub_2_bot
bot_token:
# WARNING! Potentially buggy telethon option. Sometimes it goes mad and overload users with tons of messages
# Collect missed messages at startup time and answer to them
@ -72,26 +72,35 @@ telegram:
- nexus.bot.handlers.ban.BanHandler
- nexus.bot.handlers.ban.BanlistHandler
- nexus.bot.handlers.ban.UnbanHandler
- nexus.bot.handlers.contact.ContactHandler
- nexus.bot.handlers.aboutus.AboutusHandler
- nexus.bot.handlers.copyright.CopyrightHandler
- nexus.bot.handlers.close.CloseHandler
- nexus.bot.handlers.donate.DonateHandler
- nexus.bot.handlers.download.DownloadHandler
- nexus.bot.handlers.emoji.EmojiHandler
- nexus.bot.handlers.howtohelp.HowToHelpHandler
- nexus.bot.handlers.help.HelpHandler
- nexus.bot.handlers.profile.ProfileHandler
- nexus.bot.handlers.profile.DigestHandler
- nexus.bot.handlers.rank.RankHandler
- nexus.bot.handlers.roll.RollHandler
- nexus.bot.handlers.seed.SeedHandler
- nexus.bot.handlers.settings.SettingsButtonsHandler
- nexus.bot.handlers.settings.SettingsHandler
- nexus.bot.handlers.shortlink.ShortlinkHandler
- nexus.bot.handlers.submit.SubmitHandler
- nexus.bot.handlers.submit.EditSubmitHandler
- nexus.bot.handlers.start.StartHandler
- nexus.bot.handlers.stop.StopHandler
- nexus.bot.handlers.top_missed.TopMissedHandler
- nexus.bot.handlers.trends.TrendsHelpHandler
- nexus.bot.handlers.trends.TrendsHandler
- nexus.bot.handlers.trends.TrendsEditHandler
- nexus.bot.handlers.view.ViewHandler
- nexus.bot.handlers.vote.VoteHandler
- nexus.bot.handlers.noop.NoopHandler
- nexus.bot.handlers.search.SearchHandler
- nexus.bot.handlers.search.SearchEditHandler
- nexus.bot.handlers.search.SearchPagingHandler
# Channel that will be shown in /help, /donate, /contact and in promotions
related_channel: '@nexus_search'
- nexus.bot.handlers.search.InlineSearchHandler
# Channel that will be shown in /help, /howtohelp and in promotions
related_channel: 'nexus_search'
twitter:
contact_url: https://twitter.com/the_superpirate

View File

@ -10,10 +10,14 @@ logging:
traceback:
class: library.logging.formatters.TracebackFormatter
handlers:
console:
class: logging.StreamHandler
level: WARNING
stream: 'ext://sys.stderr'
debug:
class: library.logging.handlers.BaseFileHandler
formatter: default
filename: '{{ log_path }}/debug.log'
formatter: default
level: DEBUG
error:
class: library.logging.handlers.BaseFileHandler
@ -29,7 +33,7 @@ logging:
class: library.logging.handlers.BaseFileHandler
filename: '{{ log_path }}/statbox.log'
formatter: default
level: DEBUG
level: INFO
traceback:
class: library.logging.handlers.BaseFileHandler
filename: '{{ log_path }}/traceback.log'
@ -56,14 +60,11 @@ logging:
propagate: false
error:
handlers:
- console
- error
- traceback
- warning
propagate: false
metrics:
handlers:
- error
propagate: false
operation:
handlers:
- operation
@ -74,10 +75,12 @@ logging:
propagate: false
telethon:
handlers:
- debug
- error
- warning
propagate: false
root:
handlers:
- console
- debug
level: DEBUG
version: 1

View File

@ -2,12 +2,35 @@
promotions:
- texts:
en: 🎁 Help us at /donate to accelerate knowledge unchaining
weight: 3.0
en: 💬 The victory of humanity is inevitable
weight: 1
- texts:
en: ⤴️ Stay tuned with us at {related_channel}
es: ⤴️ Mantente en contacto con nosotros en {related_channel}
it: ⤴️ Resta aggiornato con noi su {related_channel}
pb: ⤴️ Fique ligado conosco em {related_channel}
ru: ⤴️ Оставайся на связи с нами на {related_channel}
weight: 1.0
en: 💬 Shall build Standard Template Construct
weight: 1
- texts:
en: 💬 Gaining knowledge is the only purpose of life
weight: 1
- texts:
en: 💬 Knowledge cannot belong
weight: 1
- texts:
en: 💬 Obey the path of discovery
weight: 1
- texts:
en: 💬 Research is the only and ultimate goal
weight: 1
- texts:
en: ✋ Have a subscription to paid articles? [Help researchers!](https://t.me/nexus_aaron)
ru: ✋ Есть доступ к платным статьям? [Помоги ученым!](https://t.me/nexus_aaron)
weight: 25
- texts:
en: ✋ Help us, become a seeder of books. Learn how in /seed
ru: ✋ Сохрани наследие, раздавай книги нуждающимся. Узнай как в /seed
weight: 25
- texts:
en: ⤴️ Stay tuned with us at @{related_channel} and [Twitter]({twitter_contact_url})
es: ⤴️ Mantente en contacto con nosotros en @{related_channel} y [Twitter]({twitter_contact_url})
it: ⤴️ Resta aggiornato con noi su @{related_channel} e [Twitter]({twitter_contact_url})
pb: ⤴️ Fique ligado conosco em @{related_channel} e [Twitter]({twitter_contact_url})
ru: ⤴️ Оставайся на связи с нами на @{related_channel} и в [Twitter]({twitter_contact_url})
weight: 25

View File

@ -11,11 +11,6 @@ class BannedUserError(BaseError):
self.ban_timeout = ban_timeout
class MessageHasBeenDeletedError(BaseError):
level = logging.WARNING
code = 'message_has_been_deleted_error'
class UnknownFileFormatError(BaseError):
level = logging.WARNING
code = 'unknown_file_format_error'
@ -23,3 +18,12 @@ class UnknownFileFormatError(BaseError):
class UnknownIndexAliasError(BaseError):
code = 'unknown_index_alias_error'
class WidgetError(BaseError):
level = logging.WARNING
code = 'widget_error'
def __init__(self, text, buttons):
self.text = text
self.buttons = buttons

View File

@ -1,27 +1,27 @@
from . import (
aboutus,
admin,
ban,
close,
contact,
copyright,
donate,
download,
emoji,
help,
howtohelp,
legacy,
noop,
rank,
roll,
search,
seed,
settings,
shortlink,
start,
stop,
submit,
top_missed,
view,
vote,
)
__all__ = ['admin', 'ban', 'contact', 'copyright', 'close', 'donate', 'download', 'emoji', 'help',
'legacy', 'noop', 'roll', 'search', 'settings',
'shortlink', 'start', 'stop', 'submit', 'top_missed', 'view', 'vote']
__all__ = ['aboutus', 'admin', 'ban', 'copyright', 'close', 'download', 'help', 'howtohelp',
'legacy', 'noop', 'rank', 'roll', 'search', 'seed', 'settings',
'shortlink', 'start', 'stop', 'submit', 'view', 'vote']

View File

@ -0,0 +1,22 @@
from library.telegram.base import RequestContext
from nexus.translations import t
from telethon import (
Button,
events,
)
from .base import BaseHandler
class AboutusHandler(BaseHandler):
filter = events.NewMessage(incoming=True, pattern='^/aboutus(@[A-Za-z0-9_]+)?$')
is_group_handler = True
async def handler(self, event: events.ChatAction, request_context: RequestContext):
request_context.statbox(action='show', mode='aboutus')
await event.reply(
t('ABOUT_US', request_context.chat.language),
buttons=Button.clear(),
link_preview=False,
)

View File

@ -3,7 +3,8 @@ from datetime import (
timedelta,
)
from aiobaseclient.exceptions import ClientError
from grpc import StatusCode
from grpc.aio import AioRpcError
from library.telegram.base import RequestContext
from nexus.bot.widgets.banlist_widget import BanlistWidget
from pytimeparse.timeparse import timeparse
@ -13,7 +14,7 @@ from .admin import BaseAdminHandler
class BanHandler(BaseAdminHandler):
filter = events.NewMessage(incoming=True, pattern='^/ban ([0-9]+) ([A-Za-z0-9]+)\\s?(.*)?$')
filter = events.NewMessage(incoming=True, pattern='^/ban (-?[0-9]+) ([A-Za-z0-9]+)\\s?(.*)?$')
def parse_pattern(self, event: events.ChatAction):
chat_id = int(event.pattern_match.group(1))
@ -39,17 +40,16 @@ class BanHandler(BaseAdminHandler):
ban_until=ban_end_date.timestamp(),
banned_chat_id=chat_id,
)
except ClientError as e:
if e.code == 'nonexistent_entity_error':
await event.reply('Chat not found')
return
raise
await event.reply('User banned until ' + ban_end_date.strftime("%Y-%m-%d %H:%M") + ' UTC')
except AioRpcError as e:
if e.code() == StatusCode.NOT_FOUND:
return await event.reply('Chat not found')
else:
raise
return await event.reply('User banned until ' + ban_end_date.strftime("%Y-%m-%d %H:%M") + ' UTC')
class UnbanHandler(BaseAdminHandler):
filter = events.NewMessage(incoming=True, pattern='^/unban(?:_|\\s)([0-9]+)$')
filter = events.NewMessage(incoming=True, pattern='^/unban(?:_|\\s)(-?[0-9]+)$')
async def handler(self, event, request_context: RequestContext):
chat_id = int(event.pattern_match.group(1))
@ -64,13 +64,13 @@ class UnbanHandler(BaseAdminHandler):
action='unbanned',
unbanned_chat_id=chat_id,
)
except ClientError as e:
if e.code == 'nonexistent_entity_error':
await event.reply('Chat not found')
return
raise
except AioRpcError as e:
if e.code() == StatusCode.NOT_FOUND:
return await event.reply('Chat not found')
else:
raise
await event.reply('User unbanned')
return await event.reply('User unbanned')
class BanlistHandler(BaseAdminHandler):

View File

@ -2,7 +2,6 @@ import logging
import time
from abc import ABC
from datetime import datetime
from typing import Union
from grpc import StatusCode
from grpc.experimental.aio import AioRpcError
@ -11,18 +10,17 @@ from izihawa_utils.exceptions import BaseError
from izihawa_utils.random import random_string
from library.logging import error_log
from library.telegram.base import RequestContext
from library.telegram.common import close_button
from library.telegram.utils import safe_execution
from nexus.bot.application import TelegramApplication
from nexus.bot.exceptions import UnknownIndexAliasError
from nexus.models.proto.typed_document_pb2 import \
TypedDocument as TypedDocumentPb
from nexus.translations import t
from nexus.views.telegram.common import close_button
from nexus.views.telegram.scimag import ScimagView
from nexus.views.telegram.scitech import ScitechView
from telethon import (
TelegramClient,
events,
functions,
)
from telethon.errors import (
QueryIdInvalidError,
@ -82,10 +80,19 @@ class BaseHandler(ABC):
def short_index_alias_to_index_alias(self, short_index_alias: str) -> str:
return self.short_index_alias_to_index_alias_dict[short_index_alias]
async def get_last_messages_in_chat(self, event: events.ChatAction):
messages_holder = await self.application.telegram_client(functions.messages.GetMessagesRequest(
id=list(range(event.id + 1, event.id + 10)))
)
if messages_holder:
return messages_holder.messages
return []
async def get_typed_document_pb(
self,
index_alias: str,
document_id: int,
mode: str,
request_context: RequestContext,
session_id: str,
position: int,
@ -93,6 +100,7 @@ class BaseHandler(ABC):
return await self.application.meta_api_client.get(
index_alias=index_alias,
document_id=document_id,
mode=mode,
session_id=session_id,
position=position,
request_id=request_context.request_id,
@ -105,15 +113,15 @@ class BaseHandler(ABC):
position: int,
request_context: RequestContext,
session_id: str,
) -> ScimagView:
typed_document_pb = await self.get_typed_document_pb(
) -> TypedDocumentPb:
return await self.get_typed_document_pb(
index_alias='scimag',
document_id=document_id,
mode='view',
position=position,
request_context=request_context,
session_id=session_id,
)
return ScimagView(document_pb=typed_document_pb.scimag)
async def resolve_scitech(
self,
@ -121,30 +129,15 @@ class BaseHandler(ABC):
position: int,
request_context: RequestContext,
session_id: str,
) -> ScitechView:
typed_document_pb = await self.get_typed_document_pb(
) -> TypedDocumentPb:
return await self.get_typed_document_pb(
index_alias='scitech',
document_id=document_id,
mode='view',
position=position,
request_context=request_context,
session_id=session_id,
)
search_response_duplicates = await self.application.meta_api_client.search(
index_aliases=('scitech',),
query=f'original_id:{document_id}',
page_size=16,
request_id=request_context.request_id,
session_id=session_id,
user_id=str(request_context.chat.chat_id),
)
duplicates = [
scored_document.typed_document.scitech
for scored_document in search_response_duplicates.scored_documents
]
return ScitechView(
document_pb=typed_document_pb.scitech,
duplicates=duplicates,
)
async def resolve_document(
self,
@ -153,7 +146,7 @@ class BaseHandler(ABC):
position: int,
session_id: str,
request_context: RequestContext
) -> Union[ScimagView, ScitechView]:
) -> TypedDocumentPb:
if index_alias not in self.index_alias_to_resolver:
raise UnknownIndexAliasError(index_alias=index_alias)
@ -175,12 +168,12 @@ class BaseHandler(ABC):
async def _send_fail_response(self, event: events.ChatAction, request_context: RequestContext):
try:
await event.reply(
t('MAINTENANCE', language=request_context.chat.language).format(
t('MAINTENANCE', request_context.chat.language).format(
maintenance_picture_url=self.application.config['application']['maintenance_picture_url'],
),
buttons=[close_button()]
buttons=None if request_context.is_group_mode() else [close_button()]
)
except (ConnectionError, QueryIdInvalidError) as e:
except (ConnectionError, QueryIdInvalidError, ValueError) as e:
request_context.error_log(e)
async def _put_chat(self, event: events.ChatAction, request_id: str):
@ -218,13 +211,10 @@ class BaseHandler(ABC):
if is_banned(chat):
if chat.ban_message is not None:
async with safe_execution(
request_context=request_context,
error_log=request_context.error_log,
on_fail=lambda: self._send_fail_response(event, request_context),
):
await event.reply(t(
'BANNED',
language=chat.language
).format(
await event.reply(t('BANNED', chat.language).format(
datetime=str(time.ctime(chat.ban_until)),
reason=chat.ban_message,
))
@ -236,17 +226,18 @@ class BaseHandler(ABC):
and event.chat_id not in self.application.config['application']['bypass_maintenance']
):
await event.reply(
t('UPGRADE_MAINTENANCE', language='en').format(
t('UPGRADE_MAINTENANCE', 'en').format(
upgrade_maintenance_picture_url=self.application.config['application']
['upgrade_maintenance_picture_url']
),
buttons=None if (event.is_group or event.is_channel) else [close_button()]
)
raise events.StopPropagation()
async def _check_read_only(self, event: events.ChatAction):
if self.application.config['application']['is_read_only_mode']:
await event.reply(
t("READ_ONLY_MODE", language='en'),
t("READ_ONLY_MODE", 'en'),
)
raise events.StopPropagation()
@ -269,7 +260,7 @@ class BaseHandler(ABC):
and not await self.is_subscribed(chat)
):
async with safe_execution(
request_context=request_context,
error_log=request_context.error_log,
on_fail=lambda: self._send_fail_response(event, request_context),
):
await event.reply(t(
@ -292,6 +283,7 @@ class BaseHandler(ABC):
chat_id=event.chat_id,
is_system_messaging_enabled=True,
is_discovery_enabled=True,
is_connectome_enabled=False,
language='en',
username=username,
is_admin=False,
@ -326,7 +318,7 @@ class BaseHandler(ABC):
self.reset_last_widget(request_context.chat.chat_id)
async with safe_execution(
request_context=request_context,
error_log=request_context.error_log,
on_fail=lambda: self._send_fail_response(event, request_context),
):
await self.handler(
@ -343,6 +335,6 @@ class BaseHandler(ABC):
class BaseCallbackQueryHandler(BaseHandler, ABC):
async def _send_fail_response(self, event, request_context: RequestContext):
try:
await event.answer(t('MAINTENANCE_WO_PIC', language=request_context.chat.language))
await event.answer(t('MAINTENANCE_WO_PIC', request_context.chat.language))
except (ConnectionError, QueryIdInvalidError) as e:
request_context.error_log(e)

View File

@ -1,11 +1,17 @@
import asyncio
import time
from library.telegram.base import RequestContext
from nexus.translations import t
from telethon import events
from .base import BaseCallbackQueryHandler
def is_earlier_than_2_days(message):
return time.time() - time.mktime(message.date.timetuple()) < 48 * 60 * 60 - 10
class CloseHandler(BaseCallbackQueryHandler):
filter = events.CallbackQuery(pattern='^/close(?:_([A-Za-z0-9]+))?(?:_([0-9]+))?$')
@ -15,17 +21,20 @@ class CloseHandler(BaseCallbackQueryHandler):
session_id = session_id.decode()
request_context.add_default_fields(mode='close')
target_events = [event.answer()]
target_events = []
message = await event.get_message()
if message:
if message and is_earlier_than_2_days(message):
target_events.append(event.answer())
request_context.statbox(
action='close',
message_id=message.id,
session_id=session_id,
)
reply_message = await message.get_reply_message()
if reply_message:
if reply_message and is_earlier_than_2_days(reply_message):
target_events.append(reply_message.delete())
target_events.append(message.delete())
else:
target_events.append(event.answer(t('DELETION_FORBIDDEN_DUE_TO_AGE')))
await asyncio.gather(*target_events)

View File

@ -1,33 +0,0 @@
import re
from library.telegram.base import RequestContext
from nexus.bot.configs import config
from nexus.translations import t
from telethon import events
from .base import BaseHandler
class ContactHandler(BaseHandler):
filter = events.NewMessage(incoming=True, pattern=re.compile('^/contact\\s?(.*)', re.DOTALL))
is_group_handler = True
async def handler(self, event: events.ChatAction, request_context: RequestContext):
query = event.pattern_match.group(1)
if query:
request_context.statbox(action='show', mode='contact', query=query)
await event.reply(
t('THANK_YOU_FOR_CONTACT', language=request_context.chat.language).format(
related_channel=self.application.config['telegram']['related_channel'],
),
)
else:
request_context.statbox(action='show', mode='contact')
await event.reply(
t('CONTACT', language=request_context.chat.language).format(
btc_donate_address=config['application']['btc_donate_address'],
libera_pay_url=config['application']['libera_pay_url'],
related_channel=config['telegram']['related_channel'],
),
link_preview=False,
)

View File

@ -11,21 +11,18 @@ class CopyrightHandler(BaseHandler):
filter = events.NewMessage(incoming=True, pattern=re.compile('^/copyright\\s?(.*)', re.DOTALL))
async def handler(self, event: events.ChatAction, request_context: RequestContext):
request_context.add_default_fields(mode='copyright')
query = event.pattern_match.group(1)
if query:
request_context.statbox(
action='show',
mode='copyright',
query=query,
)
await self.application.telegram_client.forward_messages(
self.application.config['telegram']['copyright_infringement_account'],
event.message,
)
await event.reply(t(
'COPYRIGHT_INFRINGEMENT_ACCEPTED',
language=request_context.chat.language,
))
await event.reply(t('COPYRIGHT_INFRINGEMENT_ACCEPTED', request_context.chat.language))
else:
request_context.statbox(action='show', mode='copyright')
await event.reply(t('COPYRIGHT_DESCRIPTION', language=request_context.chat.language,))
request_context.statbox(action='show')
await event.reply(t('COPYRIGHT_DESCRIPTION', request_context.chat.language,))

View File

@ -32,6 +32,7 @@ class DownloadHandler(BaseCallbackQueryHandler):
typed_document_pb = await self.get_typed_document_pb(
index_alias=index_alias,
document_id=document_id,
mode='download',
request_context=request_context,
session_id=session_id,
position=position,
@ -45,12 +46,12 @@ class DownloadHandler(BaseCallbackQueryHandler):
)
if start_delivery_response_pb.status == StartDeliveryResponsePb.Status.ALREADY_DOWNLOADING:
await event.answer(
f'{t("ALREADY_DOWNLOADING", language=request_context.chat.language)}',
f'{t("ALREADY_DOWNLOADING", request_context.chat.language)}',
)
await remove_button(event, '⬇️', and_empty_too=True)
elif start_delivery_response_pb.status == StartDeliveryResponsePb.Status.TOO_MANY_DOWNLOADS:
await event.answer(
f'{t("TOO_MANY_DOWNLOADS", language=request_context.chat.language)}',
f'{t("TOO_MANY_DOWNLOADS", request_context.chat.language)}',
)
else:
await remove_button(event, '⬇️', and_empty_too=True)

View File

@ -1,17 +0,0 @@
from library.telegram.base import RequestContext
from nexus.nlptools.regex import STICKER_REGEX
from nexus.translations import t
from telethon import events
from .base import BaseHandler
class EmojiHandler(BaseHandler):
filter = events.NewMessage(
incoming=True,
pattern=STICKER_REGEX,
)
async def handler(self, event: events.ChatAction, request_context: RequestContext):
request_context.statbox(action='show', mode='emoji')
await event.reply(t('TANKS_BRUH', language=request_context.chat.language))

View File

@ -15,6 +15,6 @@ class HelpHandler(BaseHandler):
async def handler(self, event: events.ChatAction, request_context: RequestContext):
request_context.statbox(action='show', mode='help')
if event.is_group or event.is_channel:
await event.reply(t('HELP_FOR_GROUPS', language=request_context.chat.language), buttons=Button.clear())
await event.reply(t('HELP_FOR_GROUPS', request_context.chat.language), buttons=Button.clear())
else:
await event.reply(t('HELP', language=request_context.chat.language), buttons=Button.clear())
await event.reply(t('HELP', request_context.chat.language), buttons=Button.clear())

View File

@ -6,20 +6,21 @@ from telethon import events
from .base import BaseHandler
class DonateHandler(BaseHandler):
filter = events.NewMessage(incoming=True, pattern='^/donate(@[A-Za-z0-9_]+)?$')
class HowToHelpHandler(BaseHandler):
filter = events.NewMessage(incoming=True, pattern='^/howtohelp(@[A-Za-z0-9_]+)?$')
is_group_handler = True
async def handler(self, event: events.ChatAction, request_context: RequestContext):
request_context.statbox(action='show', mode='donate')
request_context.statbox(action='show', mode='howtohelp')
await event.reply(
t(
'DONATE',
language=request_context.chat.language
).format(
t('HOW_TO_HELP', request_context.chat.language).format(
amazon_gift_card_recipient=config['application'].get('amazon_gift_card_recipient', '🚫'),
amazon_gift_card_url=config['application'].get('amazon_gift_card_url', '🚫'),
btc_donate_address=config['application'].get('btc_donate_address', '🚫'),
libera_pay_url=config['application'].get('libera_pay_url', '🚫'),
eth_donate_address=config['application'].get('eth_donate_address', '🚫'),
related_channel=config['telegram'].get('related_channel', '🚫'),
sol_donate_address=config['application'].get('sol_donate_address', '🚫'),
xmr_donate_address=config['application'].get('xmr_donate_address', '🚫'),
xrp_donate_address=config['application'].get('xrp_donate_address', '🚫'),
xrp_donate_tag=config['application'].get('xrp_donate_tag', '🚫'),
))

View File

@ -15,7 +15,7 @@ class LegacyHandler(BaseHandler):
async def handler(self, event: events.ChatAction, request_context: RequestContext):
request_context.statbox(action='show', mode='legacy')
await event.reply(t('LEGACY', language=request_context.chat.language))
await event.reply(t('LEGACY', request_context.chat.language))
class LegacyCallbackHandler(BaseCallbackQueryHandler):
@ -25,4 +25,4 @@ class LegacyCallbackHandler(BaseCallbackQueryHandler):
async def handler(self, event: events.ChatAction, request_context: RequestContext):
request_context.statbox(action='show', mode='legacy')
return await event.answer(t('LEGACY', language=request_context.chat.language))
return await event.answer(t('LEGACY', request_context.chat.language))

View File

@ -0,0 +1,126 @@
import asyncio
import re
import time
from library.telegram.base import RequestContext
from nexus.bot.widgets.profile_widget import ProfileWidget
from nexus.views.telegram.base_holder import BaseHolder
from nexus.views.telegram.document_list_widget import DocumentListWidget
from telethon import events
from telethon.tl.types import PeerChannel
from .base import BaseHandler
class ProfileHandler(BaseHandler):
filter = events.NewMessage(incoming=True, pattern=re.compile('^/profile'))
is_group_handler = True
should_reset_last_widget = False
stop_propagation = True
async def handler(self, event, request_context: RequestContext):
request_context.add_default_fields(mode='profile')
profile_user_id = None
profile_reply_message = None
target_events = []
if request_context.is_personal_mode():
profile_user_id = request_context.chat.chat_id
target_events.append(event.delete())
else:
reply_message = await event.get_reply_message()
if reply_message:
target_events.append(event.delete())
if not isinstance(reply_message.from_id, PeerChannel):
profile_user_id = reply_message.from_id.user_id
profile_reply_message = reply_message
else:
if not isinstance(event.from_id, PeerChannel):
profile_user_id = event.from_id.user_id
profile_reply_message = event
else:
target_events.append(event.delete())
if profile_user_id is None:
return await asyncio.gather(*target_events)
request_context.statbox(
action='show',
profile_user_id=profile_user_id,
)
profile = await self.application.idm_client.get_profile(chat_id=profile_user_id, last_n_documents=300)
profile_widget = ProfileWidget(
application=self.application,
request_context=request_context,
profile=profile,
)
rendered_widget, buttons = await profile_widget.render()
if profile_reply_message:
target_events.append(profile_reply_message.reply(rendered_widget, buttons=buttons, link_preview=False))
else:
target_events.append(event.reply(rendered_widget, buttons=buttons, link_preview=False))
return asyncio.gather(*target_events)
class DigestHandler(BaseHandler):
filter = events.CallbackQuery(pattern=re.compile('^/digest$'))
should_reset_last_widget = False
async def handler(self, event, request_context: RequestContext):
bot_name = self.application.config['telegram']['bot_name']
session_id = self.generate_session_id()
request_context.add_default_fields(mode='digest', session_id=session_id)
profile = await self.application.idm_client.get_profile(
request_context.chat.chat_id,
last_n_documents=100,
)
query = []
for series in profile.most_popular_series:
for issn in series.issns:
query.append(f'issn:{issn}')
for tag in profile.most_popular_tags:
query.append(f'tag:"{tag}"')
query.append(f'+issued_at:[{int(time.time() - 3600 * 24 * 7)} TO {int(time.time())}]')
for document in profile.downloaded_documents:
query.append(f'-id:{document.id}')
query = ' '.join(query)
request_context.statbox(
action='query',
query=query,
)
search_response = await self.application.meta_api_client.meta_search(
index_aliases=['scimag'],
query=query,
collectors=[{'top_docs': {'limit': 5}}],
user_id=str(request_context.chat.chat_id),
query_tags=['digest'],
session_id=session_id,
request_id=request_context.request_id,
)
document_holders = [
BaseHolder.create_from_document(scored_document)
for scored_document in search_response.collector_outputs[0].top_docs.scored_documents
]
chat = await self.application.idm_client.get_chat(chat_id=request_context.chat.chat_id)
document_list_widget = DocumentListWidget(
chat=chat,
document_holders=document_holders,
bot_name=bot_name,
header='✨ Nexus Discovery ✨',
)
view, buttons = await document_list_widget.render()
await event.reply(
view,
buttons=buttons,
link_preview=False,
)

View File

@ -0,0 +1,25 @@
import logging
import re
from library.telegram.base import RequestContext
from library.telegram.common import close_button
from library.telegram.utils import safe_execution
from telethon import events
from .base import BaseHandler
class RankHandler(BaseHandler):
filter = events.NewMessage(incoming=True, pattern=re.compile(r'^/rank(?:@\w+)?(.*)?$', re.DOTALL))
is_group_handler = True
async def handler(self, event: events.ChatAction, request_context: RequestContext):
session_id = self.generate_session_id()
request_context.add_default_fields(mode='rank', session_id=session_id)
query = event.pattern_match.group(1).strip()
bot_name = self.application.config['telegram']['bot_name']
language = request_context.chat.language
async with safe_execution(error_log=request_context.error_log, level=logging.DEBUG):
await event.reply('Coming soon!', buttons=[close_button()])

View File

@ -1,44 +1,58 @@
import asyncio
import logging
import re
import time
from library.telegram.base import RequestContext
from library.telegram.utils import safe_execution
from nexus.views.telegram.base_holder import BaseHolder
from telethon import events
from .base import BaseHandler
class RollHandler(BaseHandler):
filter = events.NewMessage(incoming=True, pattern=re.compile('^/roll(@[A-Za-z0-9_]+)?$', re.DOTALL))
filter = events.NewMessage(incoming=True, pattern=re.compile(r'^/roll(?:@\w+)?(.*)?$', re.DOTALL))
is_group_handler = True
async def handler(self, event: events.ChatAction, request_context: RequestContext):
start_time = time.time()
session_id = self.generate_session_id()
request_context.add_default_fields(mode='roll', session_id=session_id)
request_context.statbox(action='show')
query = event.pattern_match.group(1).strip()
bot_name = self.application.config['telegram']['bot_name']
language = request_context.chat.language
roll_response_pb = await self.application.meta_api_client.roll(
language=request_context.chat.language,
meta_search_response = await self.application.meta_api_client.meta_search(
index_aliases=['scimag', 'scitech'],
languages={request_context.chat.language: 1.0} if request_context.chat.language else None,
query=query,
collectors=[{'reservoir_sampling': {'limit': 1}}],
session_id=session_id,
request_id=request_context.request_id,
user_id=str(request_context.chat.chat_id),
query_tags=['roll'],
skip_cache_loading=True,
skip_cache_saving=True,
)
scitech_view = await self.resolve_scitech(
document_id=roll_response_pb.document_id,
position=0,
request_context=request_context,
session_id=session_id,
)
view, buttons = scitech_view.get_view(
language=request_context.chat.language,
session_id=session_id,
bot_name=self.application.config['telegram']['bot_name'],
)
actions = [
self.application.telegram_client.send_message(
request_context.chat.chat_id,
view,
buttons=buttons,
),
event.delete(),
]
return await asyncio.gather(*actions)
random_documents = meta_search_response.collector_outputs[0].reservoir_sampling.random_documents
if random_documents:
holder = BaseHolder.create_from_document(random_documents[0])
promo = self.application.promotioner.choose_promotion(language).format(
related_channel=self.application.config['telegram']['related_channel'],
twitter_contact_url=self.application.config['twitter']['contact_url'],
)
view = holder.view_builder(language).add_view(bot_name=bot_name).add_new_line(2).add(promo, escaped=True).build()
buttons_builder = holder.buttons_builder(language)
if request_context.is_group_mode():
buttons_builder.add_remote_download_button(bot_name=bot_name)
else:
buttons_builder.add_download_button(session_id)
buttons_builder.add_close_button(session_id)
request_context.statbox(action='show', duration=time.time() - start_time)
await event.respond(view, buttons=buttons_builder.build())
async with safe_execution(error_log=request_context.error_log, level=logging.DEBUG):
await event.delete()

View File

@ -2,22 +2,26 @@ import asyncio
import re
import time
from abc import ABC
from typing import Union
from grpc import StatusCode
from grpc.experimental.aio import AioRpcError
from library.telegram.base import RequestContext
from nexus.bot.exceptions import (
BannedUserError,
MessageHasBeenDeletedError,
from library.telegram.common import close_button
from library.telegram.utils import safe_execution
from nexus.bot.exceptions import BannedUserError
from nexus.bot.widgets.search_widget import (
InlineSearchWidget,
SearchWidget,
)
from nexus.bot.widgets.search_widget import SearchWidget
from nexus.translations import t
from nexus.views.telegram.common import close_button
from nexus.views.telegram.registry import parse_typed_document_to_view
from nexus.views.telegram.base_holder import BaseHolder
from nexus.views.telegram.common import encode_deep_query
from telethon import (
Button,
events,
functions,
)
from telethon.tl.types import InlineQueryPeerTypeSameBotPM
from .base import (
BaseCallbackQueryHandler,
@ -26,122 +30,68 @@ from .base import (
class BaseSearchHandler(BaseHandler, ABC):
def preprocess_query(self, query):
return query.replace(f'@{self.application.config["telegram"]["bot_name"]}', '').strip()
async def do_search(
async def setup_widget(
self,
event: events.ChatAction,
request_context: RequestContext,
prefetch_message,
query: str,
is_group_mode: bool = False,
is_shortpath_enabled: bool = False,
):
) -> tuple[str, list[Union[list[Button]], list[Button]]]:
session_id = self.generate_session_id()
message_id = prefetch_message.id
request_context.add_default_fields(is_group_mode=is_group_mode, mode='search', session_id=session_id)
request_context.add_default_fields(
is_group_mode=request_context.is_group_mode(),
mode='search',
session_id=session_id,
)
start_time = time.time()
language = request_context.chat.language
bot_name = self.application.config['telegram']['bot_name']
try:
search_widget = await SearchWidget.create(
application=self.application,
chat=request_context.chat,
session_id=session_id,
message_id=message_id,
request_id=request_context.request_id,
query=query,
is_group_mode=is_group_mode,
is_group_mode=request_context.is_group_mode(),
)
except AioRpcError as e:
actions = [
self.application.telegram_client.delete_messages(
request_context.chat.chat_id,
[message_id],
)
]
if e.code() == StatusCode.INVALID_ARGUMENT:
too_difficult_picture_url = self.application.config['application'].get('too_difficult_picture_url', '')
if e.details() == 'url_query_error':
actions.append(
event.reply(
t('INVALID_QUERY_ERROR', language=request_context.chat.language).format(
too_difficult_picture_url=too_difficult_picture_url,
),
buttons=[close_button()],
)
)
elif e.details() == 'invalid_query_error':
actions.append(
event.reply(
t('INVALID_SYNTAX_ERROR', language=request_context.chat.language).format(
too_difficult_picture_url=too_difficult_picture_url,
),
buttons=[close_button()],
)
)
return await asyncio.gather(*actions)
return t('INVALID_SYNTAX_ERROR', language).format(
too_difficult_picture_url=self.application.config['application'].get('too_difficult_picture_url', ''),
), [close_button()]
elif e.code() == StatusCode.CANCELLED:
maintenance_picture_url = self.application.config['application'].get('maintenance_picture_url', '')
request_context.error_log(e)
actions.append(event.reply(
t('MAINTENANCE', language=request_context.chat.language).format(
maintenance_picture_url=maintenance_picture_url,
),
buttons=[close_button()],
))
return await asyncio.gather(*actions)
await asyncio.gather(*actions)
return t('MAINTENANCE', language).format(
maintenance_picture_url=self.application.config['application'].get('maintenance_picture_url', ''),
), [close_button()],
request_context.error_log(e)
raise e
action = 'documents_found'
if len(search_widget.scored_documents) == 0:
action = 'documents_not_found'
request_context.statbox(
action=action,
action='documents_retrieved',
duration=time.time() - start_time,
query=f'page:0 query:{query}',
query=query,
page=0,
scored_documents=len(search_widget.scored_documents),
)
if len(search_widget.scored_documents) == 1 and is_shortpath_enabled:
scored_document = search_widget.scored_documents[0]
document_view = parse_typed_document_to_view(scored_document.typed_document)
# Second (re-)fetching is required to retrieve duplicates
document_view = await self.resolve_document(
index_alias=scored_document.typed_document.WhichOneof('document'),
document_id=document_view.id,
holder = BaseHolder.create(search_widget.scored_documents[0].typed_document)
view = holder.view_builder(language).add_view(bot_name=bot_name).build()
buttons = holder.buttons_builder(language).add_default_layout(
bot_name=bot_name,
session_id=session_id,
position=0,
session_id=session_id,
request_context=request_context,
)
view, buttons = document_view.get_view(
language=request_context.chat.language,
session_id=session_id,
bot_name=self.application.config['telegram']['bot_name'],
with_buttons=not is_group_mode,
)
return await asyncio.gather(
self.application.telegram_client.edit_message(
request_context.chat.chat_id,
message_id,
view,
buttons=buttons,
),
)
).build()
return view, buttons
serp, buttons = await search_widget.render()
return await self.application.telegram_client.edit_message(
request_context.chat.chat_id,
message_id,
serp,
buttons=buttons,
link_preview=False,
)
return await search_widget.render(message_id=message_id)
class SearchHandler(BaseSearchHandler):
filter = events.NewMessage(incoming=True, pattern=re.compile('^(/search\\s+)?(.*)', flags=re.DOTALL))
filter = events.NewMessage(incoming=True, pattern=re.compile(r'^(/search(?:@\w+)?\s+)?(.*)', flags=re.DOTALL))
is_group_handler = True
should_reset_last_widget = False
is_subscription_required_for_handler = True
@ -154,46 +104,46 @@ class SearchHandler(BaseSearchHandler):
def parse_pattern(self, event: events.ChatAction):
search_prefix = event.pattern_match.group(1)
query = self.preprocess_query(event.pattern_match.group(2))
is_group_mode = event.is_group or event.is_channel
query = event.pattern_match.group(2).strip()
return search_prefix, query, is_group_mode
return search_prefix, query
async def handler(self, event: events.ChatAction, request_context: RequestContext):
language = request_context.chat.language
try:
self.check_search_ban_timeout(user_id=str(request_context.chat.chat_id))
except BannedUserError as e:
request_context.error_log(e)
return await event.reply(t(
'BANNED_FOR_SECONDS',
language=request_context.chat.language
).format(
seconds=e.ban_timeout,
reason=t(
'BAN_MESSAGE_TOO_MANY_REQUESTS',
language=request_context.chat.language
),
))
search_prefix, query, is_group_mode = self.parse_pattern(event)
async with safe_execution(error_log=request_context.error_log):
return await event.reply(t('BANNED_FOR_SECONDS', language).format(
seconds=e.ban_timeout,
reason=t('BAN_MESSAGE_TOO_MANY_REQUESTS', language),
))
search_prefix, query = self.parse_pattern(event)
if is_group_mode and not search_prefix:
if request_context.is_group_mode() and not search_prefix:
return
if not is_group_mode and search_prefix:
if request_context.is_personal_mode() and search_prefix:
query = event.raw_text
prefetch_message = await event.reply(
t("SEARCHING", language=request_context.chat.language),
t("SEARCHING", language),
)
self.application.user_manager.last_widget[request_context.chat.chat_id] = prefetch_message.id
try:
await self.do_search(
event=event,
text, buttons = await self.setup_widget(
request_context=request_context,
prefetch_message=prefetch_message,
query=query,
is_group_mode=is_group_mode,
is_shortpath_enabled=True,
)
return await self.application.telegram_client.edit_message(
request_context.chat.chat_id,
prefetch_message.id,
text,
buttons=buttons,
link_preview=False,
)
except (AioRpcError, asyncio.CancelledError) as e:
await asyncio.gather(
event.delete(),
@ -202,60 +152,91 @@ class SearchHandler(BaseSearchHandler):
raise e
class InlineSearchHandler(BaseSearchHandler):
filter = events.InlineQuery()
stop_propagation = False
async def handler(self, event, request_context: RequestContext):
if event.query.peer_type == InlineQueryPeerTypeSameBotPM():
await event.answer()
return
builder = event.builder
session_id = self.generate_session_id()
try:
if len(event.text) <= 3:
await event.answer([])
raise events.StopPropagation()
inline_search_widget = await InlineSearchWidget.create(
application=self.application,
chat=request_context.chat,
session_id=session_id,
request_id=request_context.request_id,
query=event.text,
is_group_mode=request_context.is_group_mode(),
)
items = inline_search_widget.render(builder=builder)
encoded_query = encode_deep_query(event.text)
if len(encoded_query) < 32:
await event.answer(
items,
private=True,
switch_pm=self.application.config['telegram']['bot_name'],
switch_pm_param=encoded_query,
)
else:
await event.answer(items)
except AioRpcError as e:
if e.code() == StatusCode.INVALID_ARGUMENT or e.code() == StatusCode.CANCELLED:
await event.answer([])
raise e
raise events.StopPropagation()
class SearchEditHandler(BaseSearchHandler):
filter = events.MessageEdited(incoming=True, pattern=re.compile('^(/search\\s+)?(.*)', flags=re.DOTALL))
filter = events.MessageEdited(incoming=True, pattern=re.compile(r'^(/search(?:@\w+)\s+)?(.*)', flags=re.DOTALL))
is_group_handler = True
should_reset_last_widget = False
def parse_pattern(self, event: events.ChatAction):
search_prefix = event.pattern_match.group(1)
query = self.preprocess_query(event.pattern_match.group(2))
is_group_mode = event.is_group or event.is_channel
return search_prefix, query, is_group_mode
async def get_last_messages_in_chat(self, event: events.ChatAction):
return await self.application.telegram_client(functions.messages.GetMessagesRequest(
id=list(range(event.id + 1, event.id + 10)))
)
query = event.pattern_match.group(2).strip()
return search_prefix, query
async def handler(self, event: events.ChatAction, request_context: RequestContext):
search_prefix, query, is_group_mode = self.parse_pattern(event)
search_prefix, query = self.parse_pattern(event)
request_context.add_default_fields(mode='search_edit')
if is_group_mode and not search_prefix:
if request_context.is_group_mode() and not search_prefix:
return
if not is_group_mode and search_prefix:
if request_context.is_personal_mode() and search_prefix:
query = event.raw_text
last_messages = await self.get_last_messages_in_chat(event)
try:
if not last_messages:
raise MessageHasBeenDeletedError()
for next_message in last_messages.messages:
if next_message.is_reply and event.id == next_message.reply_to_msg_id:
request_context.statbox(action='resolved')
return await self.do_search(
event=event,
request_context=request_context,
prefetch_message=next_message,
query=query,
is_group_mode=is_group_mode,
)
raise MessageHasBeenDeletedError()
except MessageHasBeenDeletedError as e:
request_context.error_log(e)
return await event.reply(
t('REPLY_MESSAGE_HAS_BEEN_DELETED', language=request_context.chat.language),
)
for next_message in await self.get_last_messages_in_chat(event):
if next_message.is_reply and event.id == next_message.reply_to_msg_id:
request_context.statbox(action='resolved')
text, buttons = await self.setup_widget(
request_context=request_context,
prefetch_message=next_message,
query=query,
)
return await self.application.telegram_client.edit_message(
request_context.chat.chat_id,
next_message.id,
text,
buttons=buttons,
link_preview=False,
)
return await event.reply(
t('REPLY_MESSAGE_HAS_BEEN_DELETED', request_context.chat.language),
)
class SearchPagingHandler(BaseCallbackQueryHandler):
filter = events.CallbackQuery(pattern='^/search_([A-Za-z0-9]+)_([0-9]+)_([0-9]+)$')
should_reset_last_widget = False
def preprocess_query(self, query):
return query.replace(f'@{self.application.config["telegram"]["bot_name"]}', '').strip()
def parse_pattern(self, event: events.ChatAction):
session_id = event.pattern_match.group(1).decode()
message_id = int(event.pattern_match.group(2).decode())
@ -274,41 +255,39 @@ class SearchPagingHandler(BaseCallbackQueryHandler):
return await event.answer()
reply_message = await message.get_reply_message()
if not reply_message:
return await event.respond(
t('REPLY_MESSAGE_HAS_BEEN_DELETED', request_context.chat.language),
)
query = reply_message.raw_text.replace(f'@{self.application.config["telegram"]["bot_name"]}', '').strip()
try:
if not reply_message:
raise MessageHasBeenDeletedError()
query = self.preprocess_query(reply_message.raw_text)
search_widget = await SearchWidget.create(
application=self.application,
chat=request_context.chat,
session_id=session_id,
message_id=message_id,
request_id=request_context.request_id,
query=query,
page=page,
)
except MessageHasBeenDeletedError:
return await event.respond(
t('REPLY_MESSAGE_HAS_BEEN_DELETED', language=request_context.chat.language),
)
except AioRpcError as e:
if e.code() == StatusCode.INVALID_ARGUMENT or e.code() == StatusCode.CANCELLED:
request_context.error_log(e)
return await event.answer(
t('MAINTENANCE_WO_PIC', language=request_context.chat.language),
t('MAINTENANCE_WO_PIC', request_context.chat.language),
)
raise e
action = 'documents_found'
if len(search_widget.scored_documents) == 0:
action = 'documents_not_found'
request_context.statbox(
action=action,
action='documents_retrieved',
duration=time.time() - start_time,
query=f'page:{page} query:{query}',
query=query,
page=page,
scored_documents=len(search_widget.scored_documents),
)
serp, buttons = await search_widget.render()
serp, buttons = await search_widget.render(message_id=message_id)
return await asyncio.gather(
event.answer(),
message.edit(serp, buttons=buttons, link_preview=False)

128
nexus/bot/handlers/seed.py Normal file
View File

@ -0,0 +1,128 @@
import io
import re
from library.telegram.base import RequestContext
from library.telegram.common import close_button
from library.telegram.utils import safe_execution
from nexus.translations import t
from nlptools.izihawa_nlptools.utils import cast_string_to_single_string
from telethon import events
from telethon.tl.types import DocumentAttributeFilename
from .base import BaseHandler
class SeedHandler(BaseHandler):
filter = events.NewMessage(
incoming=True,
pattern=re.compile(r'^/(r)?seed(?:@\w+)?'
r'(?:(?:\s+(\d+))?(?:\s+(\d+))?(\n+.*)?)?$'),
)
is_group_handler = False
async def handler(self, event: events.ChatAction, request_context: RequestContext):
session_id = self.generate_session_id()
request_context.add_default_fields(mode='seed', session_id=session_id)
random_seed = True if event.pattern_match.group(1) else False
if string_offset := event.pattern_match.group(2):
offset = int(string_offset.strip() or 0)
else:
offset = 0
if string_limit := event.pattern_match.group(3):
limit = min(int(string_limit.strip()), 10000)
else:
limit = offset
offset = 0
original_query = ''
if string_query := event.pattern_match.group(4):
original_query = string_query.strip()
query = f'+({original_query}) +ipfs_multihashes:[* TO *]'
else:
query = '+ipfs_multihashes:[* TO *]'
if not string_query and not string_limit and not string_offset:
request_context.statbox(action='help')
return await event.reply(t('SEED_HELP', language=request_context.chat.language), buttons=[close_button()])
wait_message = await event.respond(t('SEED_GENERATION', language=request_context.chat.language))
async with safe_execution(error_log=request_context.error_log):
await event.delete()
request_context.statbox(
action='request',
offset=offset,
limit=limit,
query=query,
)
if random_seed:
meta_search_response = await self.application.meta_api_client.meta_search(
index_aliases=['scitech', ],
query=query,
collectors=[{
'reservoir_sampling': {
'limit': limit,
'fields': ['ipfs_multihashes', 'doi', 'md5'],
}
}, {
'count': {}
}],
skip_cache_loading=True,
skip_cache_saving=True,
query_tags=['seed'],
)
documents = meta_search_response.collector_outputs[0].reservoir_sampling.random_documents
count = meta_search_response.collector_outputs[1].count.count
else:
meta_search_response = await self.application.meta_api_client.meta_search(
index_aliases=['scitech', ],
query=query,
collectors=[{
'top_docs': {
'limit': limit,
'offset': offset,
'scorer': {'eval_expr': '-updated_at'},
'fields': ['ipfs_multihashes', 'doi', 'md5'],
}
}, {
'count': {}
}],
query_tags=['seed'],
)
documents = meta_search_response.collector_outputs[0].top_docs.scored_documents
count = meta_search_response.collector_outputs[1].count.count
buffer = io.BytesIO()
for document in documents:
buffer.write(document.document.encode())
buffer.write(b'\n')
buffer.flush()
casted_query = cast_string_to_single_string(original_query)
if not casted_query:
casted_query = 'cids'
filename = f'{casted_query[:16]}-{offset}-{limit}-{count}.cids.txt'
oneliner = f'cat {filename} | jq -c -r ".ipfs_multihashes[0]" | xargs -I{{}} ipfs pin add {{}}'
query_head = f'`{original_query}`\n\n' if original_query else ''
offset_head = f'**Offset:** {offset}\n' if not random_seed else ''
await self.application.telegram_client.send_file(
attributes=[DocumentAttributeFilename(filename)],
buttons=[close_button()],
caption=f'{query_head}'
f'{offset_head}'
f'**Limit:** {limit}\n'
f'**Total:** {count}\n\n'
f'**One-liner:** \n'
f'`{oneliner}`',
entity=request_context.chat.chat_id,
file=buffer.getvalue(),
reply_to=event,
)
buffer.close()
async with safe_execution(error_log=request_context.error_log):
await self.application.telegram_client.delete_messages(request_context.chat.chat_id, [wait_message.id])

View File

@ -17,9 +17,9 @@ class ShortlinkHandler(BaseHandler):
request_context.statbox(action='start', mode='shortlink', query=query)
try:
bot_name = self.application.config["telegram"]["bot_name"]
bot_name = self.application.config['telegram']['bot_name']
text = encode_query_to_deep_link(query, bot_name)
except TooLongQueryError:
text = t('TOO_LONG_QUERY_FOR_SHORTLINK', language=request_context.chat.language),
text = t('TOO_LONG_QUERY_FOR_SHORTLINK', request_context.chat.language)
return await event.reply(f'`{text}`', link_preview=False)

View File

@ -30,14 +30,26 @@ class StartHandler(BaseSearchHandler):
request_context.statbox(action='query', mode='start', query=query)
request_message = await self.application.telegram_client.send_message(event.chat, query)
prefetch_message = await request_message.reply(
t("SEARCHING", language=request_context.chat.language),
t("SEARCHING", request_context.chat.language),
)
self.application.user_manager.last_widget[request_context.chat.chat_id] = prefetch_message.id
text, buttons = await self.setup_widget(
request_context=request_context,
prefetch_message=prefetch_message,
query=query,
is_shortpath_enabled=True,
)
edit_action = self.application.telegram_client.edit_message(
request_context.chat.chat_id,
prefetch_message.id,
text,
buttons=buttons,
link_preview=False,
)
await asyncio.gather(
event.delete(),
self.do_search(event, request_context, prefetch_message, query=query,
is_shortpath_enabled=True),
edit_action,
)
else:
request_context.statbox(action='show', mode='start')
await event.reply(t('HELP', language=request_context.chat.language))
await event.reply(t('HELP', request_context.chat.language))

View File

@ -1,44 +1,125 @@
import asyncio
import io
import re
import zipfile
from izihawa_nlptools.regex import DOI_REGEX
from library.telegram.base import RequestContext
from library.telegram.common import close_button
from nexus.bot.exceptions import UnknownFileFormatError
from nexus.hub.proto import submitter_service_pb2 as submitter_service_pb
from nexus.translations import t
from nexus.views.telegram.common import close_button
from telethon import events
from .base import BaseHandler
class SubmitHandler(BaseHandler):
filter = events.NewMessage(func=lambda e: e.document, incoming=True)
is_group_handler = False
filter = events.NewMessage(
func=lambda e: e.document and e.document.mime_type in ('application/pdf', 'application/zip'),
incoming=True
)
is_group_handler = True
writing_handler = True
async def handler(self, event: events.ChatAction, request_context: RequestContext):
def get_doi_hint(self, message, reply_message):
doi_hint = None
if message.raw_text:
doi_regex = re.search(DOI_REGEX, message.raw_text)
if doi_regex:
doi_hint = doi_regex.group(1) + '/' + doi_regex.group(2)
if not doi_hint and reply_message:
doi_regex = re.search(DOI_REGEX, reply_message.raw_text)
if doi_regex:
doi_hint = doi_regex.group(1) + '/' + doi_regex.group(2)
return doi_hint
async def handler(self, event, request_context: RequestContext):
session_id = self.generate_session_id()
request_context.add_default_fields(session_id=session_id)
request_context.statbox(action='show', mode='submit')
request_context.statbox(action='show', mode='submit', mime_type=event.document.mime_type)
if event.document.mime_type != 'application/pdf':
request_context.statbox(action='unknown_file_format')
request_context.error_log(UnknownFileFormatError(format=event.document.mime_type))
return await asyncio.gather(
event.reply(
t('UNKNOWN_FILE_FORMAT_ERROR', language=request_context.chat.language),
buttons=[close_button()],
),
event.delete(),
)
reply_to = None
message = event
reply_message = await event.get_reply_message()
if reply_message:
reply_to = reply_message.id
return await asyncio.gather(
self.application.hub_client.submit(
telegram_document=bytes(event.document),
telegram_file_id=event.file.id,
chat=request_context.chat,
request_id=request_context.request_id,
session_id=session_id,
bot_name=request_context.bot_name,
),
event.delete(),
doi_hint = self.get_doi_hint(message=message, reply_message=reply_message)
doi_hint_priority = '' in message.raw_text
user_id = message.sender_id
request_context.statbox(
action='analyzed',
mode='submit',
doi_hint=doi_hint,
doi_hint_priority=doi_hint_priority,
reply_to=reply_to,
)
match event.document.mime_type:
case 'application/pdf':
return await self.application.hub_client.submit(
file=submitter_service_pb.TelegramFile(
document=bytes(event.document),
file_id=event.file.id,
message_id=event.id,
),
chat=request_context.chat,
bot_name=request_context.bot_name,
reply_to=reply_to,
request_id=request_context.request_id,
session_id=session_id,
doi_hint=doi_hint,
doi_hint_priority=doi_hint_priority,
uploader_id=user_id,
)
case 'application/zip':
try:
if request_context.is_personal_mode():
file_data = await self.application.telegram_client.download_document(
document=event.document,
file=bytes,
)
request_context.statbox(action='unpack', mode='submit', size=len(file_data))
with zipfile.ZipFile(io.BytesIO(file_data), 'r') as zf:
for filename in zf.namelist():
if not filename.lower().endswith('.pdf'):
continue
nested_file = zf.read(filename)
request_context.statbox(
action='unpacked_file',
mode='submit',
filename=filename,
size=len(nested_file),
)
await self.application.hub_client.submit(
file=submitter_service_pb.PlainFile(
data=nested_file,
filename=filename,
),
chat=request_context.chat,
bot_name=request_context.bot_name,
reply_to=reply_to,
request_id=request_context.request_id,
session_id=session_id,
uploader_id=user_id,
)
else:
await event.reply(t('ZIP_FILES_ARE_NOT_SUPPORTED_IN_GROUP_MODE', request_context.chat.language))
finally:
return await event.delete()
case _:
request_context.statbox(action='unknown_file_format')
request_context.error_log(UnknownFileFormatError(format=event.document.mime_type))
return await asyncio.gather(
event.reply(
t('UNKNOWN_FILE_FORMAT_ERROR', request_context.chat.language),
buttons=None if request_context.is_group_mode() else [close_button()],
),
event.delete(),
)
class EditSubmitHandler(SubmitHandler):
filter = events.MessageEdited(func=lambda e: e.document, incoming=True)

View File

@ -1,54 +0,0 @@
from library.telegram.base import RequestContext
from nexus.bot.widgets.document_list_widget import DocumentListWidget
from nexus.translations import t
from telethon import events
from .base import BaseHandler
class TopMissedHandler(BaseHandler):
filter = events.NewMessage(incoming=True, pattern='^/tm$')
is_group_handler = False
should_reset_last_widget = False
async def do_request(self, request_context: RequestContext, session_id: str, message_id: int, page: int):
response = await self.application.meta_api_client.top_missed(
page=page,
page_size=10,
session_id=session_id,
request_id=request_context.request_id,
)
document_list_widget = DocumentListWidget(
application=self.application,
chat=request_context.chat,
typed_documents=response.typed_documents,
cmd='tm',
has_next=response.has_next,
session_id=session_id,
message_id=message_id,
request_id=request_context.request_id,
page=page,
)
serp, buttons = await document_list_widget.render()
return await self.application.telegram_client.edit_message(
request_context.chat.chat_id,
message_id,
serp,
buttons=buttons,
link_preview=False,
)
async def handler(self, event, request_context: RequestContext):
session_id = self.generate_session_id()
request_context.add_default_fields(mode='top_missed', session_id=session_id)
request_context.statbox()
prefetch_message = await event.reply(t("SEARCHING", language=request_context.chat.language))
message_id = prefetch_message.id
return await self.do_request(
request_context=request_context,
session_id=session_id,
message_id=message_id,
page=0,
)

View File

@ -0,0 +1,190 @@
import datetime
import io
import re
import pandas as pd
import seaborn as sns
from dateparser import parse
from izihawa_utils.pb_to_json import MessageToDict
from library.telegram.base import RequestContext
from library.telegram.common import close_button
from matplotlib import pyplot as plt
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from telethon import events
from ...translations import t
from .base import BaseHandler
COLOR = (40/256, 64/256, 145/256)
sns.set(rc={'figure.figsize': (8, 7)})
sns.set_theme(style='darkgrid')
def parse_date(d):
if d == '*':
return d
f = datetime.datetime.fromtimestamp(int(d))
return f'{f.year}'
def derive_range(date_start: datetime.datetime, date_end: datetime.datetime):
days = (date_end - date_start).days
if days < 60:
ranges = pd.period_range(start=date_start, end=date_end, freq='D')
labels = [f'{period.month:02}-{period.day:02}' for period in ranges]
elif days < 365 * 4:
ranges = pd.period_range(start=date_start, end=date_end, freq='M')
labels = [f'{period.year}-{period.month:02}' for period in ranges]
elif days < 365 * 10:
ranges = pd.period_range(start=date_start, end=date_end, freq='Q')
labels = [f'{period.year}-{period.month:02}' for period in ranges]
elif days < 365 * 30:
ranges = pd.period_range(start=date_start, end=date_end, freq='Y')
labels = [f'{period.year}' for period in ranges]
else:
ranges = pd.period_range(start=date_start, end=date_end, freq='5Y')
labels = [f'{period.year}' for period in ranges]
timestamps = [period.to_timestamp().timestamp() for period in ranges]
query_ranges = list(map(lambda x: {"from": str(int(x[0])), "to": str(int(x[1]))}, zip(timestamps, timestamps[1:])))
return query_ranges, labels[:-1]
class TrendsHelpHandler(BaseHandler):
filter = events.NewMessage(
incoming=True,
pattern=re.compile(r'^/trends$')
)
async def handler(self, event: events.ChatAction, request_context: RequestContext):
return await event.reply(t('TRENDS_HELP', language=request_context.chat.language), buttons=[close_button()])
class TrendsBaseHandler(BaseHandler):
async def process(self, event: events.ChatAction, request_context: RequestContext):
date_start = event.pattern_match.group(1)
date_end = event.pattern_match.group(2)
queries = [query for query in event.pattern_match.group(3).split('\n') if query]
request_context.statbox(
action='show',
date_range=[date_start, date_end],
queries=queries,
)
date_start = parse(date_start, settings={'PREFER_DAY_OF_MONTH': 'first'})
date_end = parse(date_end, settings={'PREFER_DAY_OF_MONTH': 'first'})
query_ranges, labels = derive_range(date_start, date_end)
request_context.statbox(
action='ranges',
query_ranges=query_ranges,
labels=labels,
)
series = {}
for query in queries:
aggregation = await self.application.meta_api_client.meta_search(
index_aliases=['scimag'],
query=query,
collectors=[{
'aggregation': {'aggregations': {
'topics_per_year': {
'bucket': {
'range': {
'field': 'issued_at',
'ranges': query_ranges,
},
'sub_aggregation': {
'topics': {
'metric': {
'stats': {
'field': 'issued_at',
}
}
}
}
}
}
}}}
],
user_id=str(request_context.chat.chat_id),
query_tags=['trends'],
)
request_context.statbox(
action='aggregation',
aggregation=MessageToDict(aggregation),
)
docs = []
for output in aggregation.collector_outputs:
for bucket in output.aggregation.aggregation_results['topics_per_year'].bucket.range.buckets[1:-1]:
docs.append(int(bucket.doc_count))
series[query] = pd.Series(docs)
data = pd.DataFrame({'date': labels, **series})
data = data.set_index('date')
fig, ax = plt.subplots()
sns.lineplot(data=data, ax=ax, linewidth=2)
ax.set_title('Science Trends', fontdict={'fontsize': 32}, color=COLOR)
ax.legend()
ax.text(0.01, 0.01, 'https://t.me/nexus_media', transform=ax.transAxes,
fontsize=10, color=COLOR, alpha=0.4)
ax.set(xlabel='', ylabel='# of publications')
for item in ax.get_xticklabels():
item.set_rotation(75)
with io.BytesIO() as plot_file:
FigureCanvas(fig).print_png(plot_file)
plot_file.seek(0)
return await self.send_figure(event, request_context, plot_file)
async def send_figure(self, event, request_context, plot_file):
raise NotImplementedError()
class TrendsHandler(TrendsBaseHandler):
filter = events.NewMessage(
incoming=True,
pattern=re.compile(r'^/trends(?:@\w+)?\s+(.*)\s+to\s+(.*)\n+([\S\s]*)$')
)
is_group_handler = True
async def send_figure(self, event, request_context, plot_file):
return await event.reply(
file=plot_file,
buttons=[close_button()] if request_context.is_personal_mode() else None,
)
async def handler(self, event: events.ChatAction, request_context: RequestContext):
request_context.add_default_fields(mode='trends')
return await self.process(event, request_context)
class TrendsEditHandler(TrendsBaseHandler):
filter = events.MessageEdited(
incoming=True,
pattern=re.compile(r'^/trends(?:@\w+)?\s+(.*)\s+to\s+(.*)\n+([\S\s]*)$')
)
is_group_handler = True
async def send_figure(self, event, request_context, plot_file):
for next_message in await self.get_last_messages_in_chat(event):
if next_message.is_reply and event.id == next_message.reply_to_msg_id:
request_context.statbox(action='resolved')
return await self.application.telegram_client.edit_message(
request_context.chat.chat_id,
next_message.id,
file=plot_file,
buttons=[close_button()] if request_context.is_personal_mode() else None,
link_preview=False,
)
async def handler(self, event: events.ChatAction, request_context: RequestContext):
request_context.add_default_fields(mode='trends_edit')
return await self.process(event, request_context)

View File

@ -1,8 +1,10 @@
import asyncio
import time
from library.telegram.base import RequestContext
from nexus.bot.exceptions import MessageHasBeenDeletedError
from library.telegram.utils import safe_execution
from nexus.translations import t
from nexus.views.telegram.base_holder import BaseHolder
from telethon import (
events,
functions,
@ -12,9 +14,12 @@ from telethon.errors import MessageIdInvalidError
from .base import BaseHandler
def is_earlier_than_2_days(message):
return time.time() - time.mktime(message.date.timetuple()) < 2 * 24 * 60 * 60 - 10
class ViewHandler(BaseHandler):
filter = events.NewMessage(incoming=True, pattern='^/v([ab])([sr])?_([A-Za-z0-9]+)_([0-9]+)_([0-9]+)_'
'([0-9]+)')
filter = events.NewMessage(incoming=True, pattern='^/v([ab])([sr])?_([A-Za-z0-9]+)_([0-9]+)_([0-9]+)_([0-9]+)')
should_reset_last_widget = False
def parse_pattern(self, event: events.ChatAction):
@ -24,94 +29,87 @@ class ViewHandler(BaseHandler):
old_message_id = int(event.pattern_match.group(4))
document_id = int(event.pattern_match.group(5))
position = int(event.pattern_match.group(6))
page = int(position / self.application.config['application']['page_size'])
return index_alias, session_id, old_message_id, document_id, position, page
async def process_widgeting(self, has_found_old_widget, old_message_id, request_context: RequestContext):
if has_found_old_widget:
message_id = old_message_id
link_preview = None
async def get_message(self, message_id):
get_message_request = functions.messages.GetMessagesRequest(id=[message_id])
messages = await self.application.telegram_client(get_message_request)
return messages.messages[0]
async def process_widgeting(self, has_found_old_widget, old_message, request_context: RequestContext):
if has_found_old_widget and is_earlier_than_2_days(old_message):
message_id = old_message.id
else:
old_message = (await self.application.telegram_client(
functions.messages.GetMessagesRequest(id=[old_message_id])
)).messages[0]
prefetch_message = await self.application.telegram_client.send_message(
request_context.chat.chat_id,
t("SEARCHING", language=request_context.chat.language),
t("SEARCHING", request_context.chat.language),
reply_to=old_message.reply_to_msg_id,
)
self.application.user_manager.last_widget[request_context.chat.chat_id] = prefetch_message.id
message_id = prefetch_message.id
link_preview = True
return message_id, link_preview
return message_id
async def compose_back_command(
self,
session_id,
message_id,
page,
):
async def compose_back_command(self, session_id, message_id, page):
return f'/search_{session_id}_{message_id}_{page}'
async def handler(self, event: events.ChatAction, request_context: RequestContext):
index_alias, session_id, old_message_id, document_id, position, page = self.parse_pattern(event)
request_context.add_default_fields(mode='view', session_id=session_id)
request_context.statbox(action='view', document_id=document_id, position=position, index_alias=index_alias)
request_context.statbox(
action='view',
document_id=document_id,
position=position,
index_alias=index_alias
)
old_message = await self.get_message(old_message_id)
has_found_old_widget = old_message_id == self.application.user_manager.last_widget.get(request_context.chat.chat_id)
language = request_context.chat.language
try:
message_id, link_preview = await self.process_widgeting(
message_id = await self.process_widgeting(
has_found_old_widget=has_found_old_widget,
old_message_id=old_message_id,
request_context=request_context
old_message=old_message,
request_context=request_context,
)
document_view = await self.resolve_document(
typed_document_pb = await self.resolve_document(
index_alias,
document_id,
position,
session_id,
request_context,
)
try:
back_command = await self.compose_back_command(
session_id=session_id,
message_id=message_id,
page=page,
)
except MessageHasBeenDeletedError:
return await event.respond(
t('REPLY_MESSAGE_HAS_BEEN_DELETED', language=request_context.chat.language),
)
holder = BaseHolder.create(typed_document_pb=typed_document_pb)
back_command = await self.compose_back_command(session_id=session_id, message_id=message_id, page=page)
view, buttons = document_view.get_view(
language=request_context.chat.language,
session_id=session_id,
bot_name=self.application.config['telegram']['bot_name'],
position=position,
back_command=back_command,
promo = self.application.promotioner.choose_promotion(language).format(
related_channel=self.application.config['telegram']['related_channel'],
twitter_contact_url=self.application.config['twitter']['contact_url'],
)
view_builder = holder.view_builder(language).add_view(
bot_name=self.application.config['telegram']['bot_name']
).add_new_line(2).add(promo, escaped=True)
buttons = holder.buttons_builder(language).add_back_button(back_command).add_default_layout(
bot_name=self.application.config['telegram']['bot_name'],
session_id=session_id,
position=position,
).build()
actions = [
self.application.telegram_client.edit_message(
request_context.chat.chat_id,
message_id,
view,
view_builder.build(),
buttons=buttons,
link_preview=link_preview,
link_preview=view_builder.has_cover,
),
event.delete(),
]
if not has_found_old_widget:
actions.append(
self.application.telegram_client.delete_messages(
request_context.chat.chat_id,
[old_message_id],
)
)
async with safe_execution(error_log=request_context.error_log):
await self.application.telegram_client.delete_messages(request_context.chat.chat_id, [old_message_id])
return await asyncio.gather(*actions)
except MessageIdInvalidError:
await event.reply(t("VIEWS_CANNOT_BE_SHARED", language=request_context.chat.language))
await event.reply(t("VIEWS_CANNOT_BE_SHARED", language))

View File

@ -8,11 +8,9 @@ from nexus.bot.configs import get_config
def main(config):
configure_logging(config)
if config['metrics']['enabled']:
from library.metrics_server import MetricsServer
MetricsServer(config['metrics']).fork_process()
asyncio.set_event_loop(uvloop.new_event_loop())
asyncio.get_event_loop().run_until_complete(TelegramApplication(config=config).start_and_wait())
loop = uvloop.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(TelegramApplication(config=config).start_and_wait())
if __name__ == '__main__':

View File

@ -5,8 +5,9 @@ class Promotioner:
"""
Promotioner is used to select promotion randomly based on weights of every promotion.
"""
def __init__(self, promotions: list[dict]):
def __init__(self, promotions: list[dict], default_promotion_index: int = 0):
self.promotions = promotions
self.default_promotion_index = default_promotion_index
self.partial_sums: list = [self.promotions[0]['weight']]
for promotion in self.promotions[1:]:
self.partial_sums.append(promotion['weight'] + self.partial_sums[-1])
@ -18,4 +19,10 @@ class Promotioner:
continue
if language in promotion['texts']:
return promotion['texts'][language]
return promotion['texts']['en']
elif promotion.get('local', False):
default_promotion = self.promotions[self.default_promotion_index]
if language in default_promotion['texts']:
return default_promotion['texts'][language]
return default_promotion['texts']['en']
else:
return promotion['texts']['en']

View File

@ -0,0 +1,131 @@
from typing import Optional
from idm.api.proto import (
chat_manager_service_pb2,
profile_service_pb2,
subscription_manager_service_pb2,
)
from izihawa_nlptools.utils import escape_format
from library.telegram.common import close_button
from nexus.bot.application import TelegramApplication
from nexus.views.telegram.common import (
TooLongQueryError,
encode_query_to_deep_link,
)
from telethon import Button
def limits(text, limit, with_dots: bool = False):
if len(text) > limit:
text = text[:limit]
if with_dots:
text += '...'
return text
class ProfileWidget:
def __init__(
self,
application: TelegramApplication,
request_context,
profile: profile_service_pb2.GetProfileResponse,
):
self.application = application
self.profile = profile
self.request_context = request_context
# ToDo: deduplicate functions
def encode_link(self, bot_name, text, query):
try:
encoded_query = encode_query_to_deep_link(query, bot_name)
return f'[{text}]({encoded_query})'
except TooLongQueryError:
return text
def get_deep_tag_link(self, bot_name, tag):
query = f'tags:"{tag}"'
return self.encode_link(bot_name, tag, query)
def get_deep_issn_link(self, bot_name, text, issns):
query = ['order_by:date']
for issn in issns[:2]:
query.append(f'issn:{issn}')
return self.encode_link(bot_name, text=escape_format(text), query=' '.join(query))
def encode_rating(self):
if self.profile.uploads_count > 1000:
return '🏆'
elif self.profile.uploads_count > 100:
return '🥇'
elif self.profile.uploads_count > 10:
return '🥈'
elif self.profile.uploads_count > 0:
return '🥉'
else:
return '💩'
def encode_subscription(self, subscription: subscription_manager_service_pb2.Subscription):
match subscription.subscription_type:
case subscription_manager_service_pb2.Subscription.Type.CUSTOM:
return f'`{subscription.subscription_query}`'
case subscription_manager_service_pb2.Subscription.Type.DIGEST:
return f'🥘 Daily digest'
case subscription_manager_service_pb2.Subscription.Type.DOI:
return f'🔬 `{subscription.subscription_query}`'
case _:
return f'{subscription.subscription_query}'
async def render(self) -> tuple[str, Optional[list]]:
profile_view = f'Nexus Rating: {self.encode_rating()}'
if self.profile.most_popular_tags:
links = [
self.get_deep_tag_link(
bot_name=self.application.config['telegram']['bot_name'],
tag=escape_format(tag)
) for tag in self.profile.most_popular_tags
]
profile_view += ('\n\nInterested in: ' + " - ".join(links))
if self.request_context.is_personal_mode() or self.profile.is_connectome_enabled:
if self.profile.most_popular_series:
links = [
'- ' + self.get_deep_issn_link(
bot_name=self.application.config['telegram']['bot_name'],
text=series.name,
issns=series.issns,
) for series in self.profile.most_popular_series
]
profile_view += ('\n\nMost read journals:\n' + "\n".join(links))
if self.profile.downloaded_documents[:5]:
display_documents = []
for downloaded_document in self.profile.downloaded_documents[:5]:
title = limits(escape_format(downloaded_document.title), limit=100, with_dots=True)
link = self.encode_link(
bot_name=self.application.config['telegram']['bot_name'],
text=title,
query=f"id:{downloaded_document.id}"
)
display_documents.append(f'- {link}')
profile_view += ('\n\nLast read:\n' + "\n".join(display_documents))
if self.request_context.is_personal_mode() and self.profile.subscriptions:
display_subscriptions = []
for subscription in self.profile.subscriptions[:5]:
display_subscriptions.append('- ' + self.encode_subscription(subscription))
profile_view += ('\n\nSubscriptions:\n' + "\n".join(display_subscriptions))
if len(self.profile.subscriptions) > 5:
profile_view += f'\n`and {len(self.profile.subscriptions) - 5} more...`'
if self.request_context.is_personal_mode():
if self.profile.is_connectome_enabled:
profile_view += f'\n\nYou can hide your profile from others in /settings'
else:
profile_view += f'\n\nYou can make your profile visible in /settings'
digest_button = Button.inline(
'✨ Digest',
data='/digest',
)
return profile_view, [digest_button, close_button()] if self.request_context.is_personal_mode() else None

View File

@ -1,29 +1,38 @@
import logging
import mimetypes
import sys
from typing import Optional
from idm.api.proto.chat_manager_service_pb2 import Chat as ChatPb
from library.telegram.common import close_button
from nexus.bot.application import TelegramApplication
from nexus.meta_api.proto.search_service_pb2 import \
ScoredDocument as ScoredDocumentPb
from nexus.translations import t
from nexus.views.telegram.base_holder import BaseHolder
from nexus.views.telegram.common import (
TooLongQueryError,
close_button,
encode_query_to_deep_link,
)
from nexus.views.telegram.registry import parse_typed_document_to_view
from telethon import Button
from telethon.tl.types import (
DocumentAttributeImageSize,
InputWebDocument,
)
class SearchWidget:
class BaseSearchWidget:
"""
Presents markup for the SERP.
"""
query_tags = ['search']
def __init__(
self,
application: TelegramApplication,
chat: ChatPb,
session_id: str,
message_id: int,
request_id: str,
query: str,
page: int = 0,
@ -32,28 +41,26 @@ class SearchWidget:
self.application = application
self.chat = chat
self.session_id = session_id
self.message_id = message_id
self.request_id = request_id
self.query = query
self.page = page
self.is_group_mode = is_group_mode
@staticmethod
@classmethod
async def create(
cls,
application: TelegramApplication,
chat: ChatPb,
session_id: str,
message_id: int,
request_id: str,
query: str,
page: int = 0,
is_group_mode: bool = False,
) -> 'SearchWidget':
search_widget_view = SearchWidget(
):
search_widget_view = cls(
application=application,
chat=chat,
session_id=session_id,
message_id=message_id,
request_id=request_id,
query=query,
page=page,
@ -72,8 +79,17 @@ class SearchWidget:
session_id=self.session_id,
user_id=str(self.chat.chat_id),
language=self.chat.language,
query_tags=self.query_tags,
)
@property
def query_language(self) -> str:
return self._search_response.query_language
@property
def count(self) -> int:
return self._search_response.count
@property
def has_next(self) -> bool:
return self._search_response.has_next
@ -82,30 +98,41 @@ class SearchWidget:
def scored_documents(self) -> list[ScoredDocumentPb]:
return self._search_response.scored_documents
async def render(self) -> tuple[str, Optional[list]]:
if not len(self.scored_documents):
return t('COULD_NOT_FIND_ANYTHING', language=self.chat.language), [close_button(self.session_id)]
class SearchWidget(BaseSearchWidget):
query_tags = ['search']
async def render(self, message_id) -> tuple[str, Optional[list]]:
if len(self.scored_documents) == 0:
return t('COULD_NOT_FIND_ANYTHING', self.chat.language), [close_button(self.session_id)]
serp_elements = []
bot_name = self.application.config['telegram']['bot_name']
for scored_document in self.scored_documents:
view = parse_typed_document_to_view(scored_document.typed_document)
if not self.is_group_mode:
view_command = view.get_view_command(
holder = BaseHolder.create(scored_document.typed_document, scored_document.snippets)
if self.is_group_mode:
view_command = holder.get_deep_id_link(bot_name, text='⬇️')
else:
view_command = holder.get_view_command(
session_id=self.session_id,
message_id=self.message_id,
message_id=message_id,
position=scored_document.position,
)
else:
view_command = view.get_deep_link(bot_name, text='⬇️')
serp_elements.append(
view.get_snippet(
language=self.chat.language,
view_command=view_command,
limit=512 + 128,
)
holder
.view_builder(self.chat.language)
.add_short_description()
.add_snippet()
.add_new_line()
.add(view_command, escaped=True)
.add_doi_link(with_leading_pipe=True, text='doi.org')
.add_references_counter(bot_name=bot_name, with_leading_pipe=True)
.add_filedata(with_leading_pipe=True)
.build()
)
serp_elements.append(f"__{t('FOUND_N_ITEMS', self.chat.language).format(count=self.count)}__")
serp = '\n\n'.join(serp_elements)
if self.is_group_mode:
@ -115,22 +142,23 @@ class SearchWidget:
bot_name,
)
serp = (
f"{serp}\n\n**{t('DOWNLOAD_AND_SEARCH_MORE', language=self.chat.language)}: **"
f"{serp}\n\n**{t('DOWNLOAD_AND_SEARCH_MORE', self.chat.language)}: **"
f'[@{bot_name}]'
f'({encoded_query})'
)
except TooLongQueryError:
serp = (
f"{serp}\n\n**{t('DOWNLOAD_AND_SEARCH_MORE', language=self.chat.language)}: **"
f"{serp}\n\n**{t('DOWNLOAD_AND_SEARCH_MORE', self.chat.language)}: **"
f'[@{bot_name}]'
f'(https://t.me/{bot_name})'
)
if not self.is_group_mode:
promo = self.application.promotioner.choose_promotion(language=self.chat.language).format(
related_channel=self.application.config['telegram']['related_channel'],
)
serp = f'{serp}\n\n{promo}\n'
promotion_language = self.query_language or self.chat.language
promo = self.application.promotioner.choose_promotion(promotion_language).format(
related_channel=self.application.config['telegram']['related_channel'],
twitter_contact_url=self.application.config['twitter']['contact_url'],
)
serp = f'{serp}\n\n{promo}\n'
buttons = None
if not self.is_group_mode:
@ -139,19 +167,58 @@ class SearchWidget:
buttons = [
Button.inline(
text='<<1' if self.page > 1 else ' ',
data=f'/search_{self.session_id}_{self.message_id}_0' if self.page > 1 else '/noop',
data=f'/search_{self.session_id}_{message_id}_0' if self.page > 1 else '/noop',
),
Button.inline(
text=f'<{self.page}' if self.page > 0 else ' ',
data=f'/search_{self.session_id}_{self.message_id}_{self.page - 1}'
data=f'/search_{self.session_id}_{message_id}_{self.page - 1}'
if self.page > 0 else '/noop',
),
Button.inline(
text=f'{self.page + 2}>' if self.has_next else ' ',
data=f'/search_{self.session_id}_{self.message_id}_{self.page + 1}'
data=f'/search_{self.session_id}_{message_id}_{self.page + 1}'
if self.has_next else '/noop',
)
]
buttons.append(close_button(self.session_id))
return serp, buttons
class InlineSearchWidget(BaseSearchWidget):
query_tags = ['inline_search']
def render(self, builder) -> list:
items = []
bot_name = self.application.config['telegram']['bot_name']
for scored_document in self.scored_documents:
holder = BaseHolder.create(scored_document.typed_document)
title = holder.view_builder(self.chat.language).add_icon().add_title(bold=False).limits(140).build()
description = (
holder.view_builder(self.chat.language)
.add_filedata().add_new_line().add_locator(markup=False).limits(160).build()
)
response_text = holder.view_builder(self.chat.language).add_short_description().build()
buttons = holder.buttons_builder(self.chat.language).add_remote_download_button(bot_name=bot_name).build()
cover_url = holder.get_thumb_url()
thumb = None
if cover_url:
mimetype = mimetypes.guess_type(cover_url)[0]
if mimetype:
thumb = InputWebDocument(
url=cover_url,
size=-1,
mime_type=mimetype,
attributes=[DocumentAttributeImageSize(24, 24)]
)
items.append(builder.article(
title,
id=str(holder.id),
text=response_text,
description=description,
thumb=thumb,
buttons=buttons,
))
return items

View File

@ -50,6 +50,7 @@ class SettingsWidget:
'sl': self._switch_language,
'ssm': self._switch_system_messaging,
'sd': self._switch_discovery,
'sc': self._switch_connectome,
}
async def _switch_language(self, target_language: str):
@ -60,6 +61,14 @@ class SettingsWidget:
)
return self.chat
async def _switch_connectome(self, is_connectome_enabled: str):
self.chat = await self.application.idm_client.update_chat(
chat_id=self.chat.chat_id,
is_connectome_enabled=bool(int(is_connectome_enabled)),
request_id=self.request_id,
)
return self.chat
async def _switch_system_messaging(self, is_system_messaging_enabled: str):
self.chat = await self.application.idm_client.update_chat(
chat_id=self.chat.chat_id,
@ -82,13 +91,15 @@ class SettingsWidget:
return old_chat != self.chat
async def render(self):
text = t('SETTINGS_TEMPLATE', language=self.chat.language).format(
text = t('SETTINGS_TEMPLATE', self.chat.language).format(
bot_version=self.application.config['application']['bot_version'],
nexus_version=self.application.config['application']['nexus_version'],
language=top_languages.get(self.chat.language, self.chat.language),
)
if not self.is_group_mode and self.application.config['application']['views']['settings']['has_discovery_button']:
text = f"{text}\n\n{t('NEXUS_DISCOVERY_DESCRIPTION', language=self.chat.language)}"
text = f"{text}\n\n{t('NEXUS_DISCOVERY_DESCRIPTION', self.chat.language)}"
if not self.is_group_mode and self.application.config['application']['views']['settings']['has_connectome_button']:
text = f"{text}\n\n{t('NEXUS_CONNECTOME_DESCRIPTION', self.chat.language)}"
buttons = []
if self.has_language_buttons:
buttons.append([])
@ -105,24 +116,23 @@ class SettingsWidget:
if self.is_group_mode:
return text, buttons
if self.application.config['application']['views']['settings']['has_system_messaging_button']:
buttons.append([
Button.inline(
text=(
f'{t("SYSTEM_MESSAGING_OPTION", language=self.chat.language)}: '
f'{boolean_emoji[self.chat.is_system_messaging_enabled]}'
),
data=f'/settings_ssm_{1 - int(self.chat.is_system_messaging_enabled)}'
)
])
last_line = []
if self.application.config['application']['views']['settings']['has_discovery_button']:
buttons.append([
Button.inline(
text=(
f'{t("DISCOVERY_OPTION", language=self.chat.language)}: '
f'{boolean_emoji[self.chat.is_discovery_enabled]}'
),
data=f'/settings_sd_{1 - int(self.chat.is_discovery_enabled)}'
)
])
last_line.append(Button.inline(
text=(
f'{t("DISCOVERY_OPTION", self.chat.language)}: '
f'{boolean_emoji[self.chat.is_discovery_enabled]}'
),
data=f'/settings_sd_{1 - int(self.chat.is_discovery_enabled)}'
))
if self.application.config['application']['views']['settings']['has_connectome_button']:
last_line.append(Button.inline(
text=(
f'{t("CONNECTOME_OPTION", self.chat.language)}: '
f'{boolean_emoji[self.chat.is_connectome_enabled]}'
),
data=f'/settings_sc_{1 - int(self.chat.is_connectome_enabled)}'
))
if last_line:
buttons.append(last_line)
return text, buttons

View File

@ -1,8 +1,6 @@
---
# yamllint disable rule:key-ordering
default_fields: ["abstract", "authors", "language", "title", "tags"]
key_field: "id"
multi_fields: ["authors", "ipfs_multihashes", "issns", "references", "tags"]
default_fields: ["abstract", "authors", "container_title", "content", "tags", "title"]
schema:
- name: id
type: i64
@ -49,10 +47,28 @@ schema:
record: position
tokenizer: summa
stored: true
- name: created_at
type: i64
options:
fast: single
fieldnorms: false
indexed: true
stored: true
- name: ipfs_multihashes
type: text
options:
indexing:
fieldnorms: false
record: basic
tokenizer: raw
stored: true
- name: issns
type: text
options:
indexing: null
indexing:
fieldnorms: true
record: basic
tokenizer: raw
stored: true
- name: issue
type: text
@ -62,6 +78,7 @@ schema:
- name: issued_at
type: i64
options:
fast: single
fieldnorms: false
indexed: true
stored: true
@ -79,11 +96,12 @@ schema:
fieldnorms: false
indexed: false
stored: true
- name: ref_by_count
type: i64
- name: referenced_by_count
type: u64
options:
fast: single
fieldnorms: false
indexed: false
indexed: true
stored: true
- name: references
type: text
@ -93,6 +111,14 @@ schema:
record: basic
tokenizer: raw
stored: false
- name: references
type: text
options:
indexing:
fieldnorms: true
record: basic
tokenizer: raw
stored: false
- name: scimag_bulk_id
type: i64
options:
@ -103,6 +129,7 @@ schema:
type: text
options:
indexing:
fast: multi
fieldnorms: true
record: position
tokenizer: summa
@ -115,9 +142,16 @@ schema:
record: position
tokenizer: summa
stored: true
- name: type
type: text
options:
fieldnorms: false
indexed: false
stored: true
- name: updated_at
type: i64
options:
fast: single
fieldnorms: false
indexed: true
stored: true
@ -132,3 +166,54 @@ schema:
fieldnorms: true
indexed: true
stored: true
- name: content
type: text
options:
indexing:
fieldnorms: true
record: position
tokenizer: summa
stored: false
- name: page_rank
type: f64
options:
fast: single
fieldnorms: false
indexed: true
stored: true
- name: isbns
type: text
options:
indexing:
fieldnorms: true
record: basic
tokenizer: raw
stored: true
multi_fields: ["authors", "ipfs_multihashes", "isbns", "issns", "references", "tags"]
primary_key: "id"
stop_words: ['a', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'for', 'from', 'if', 'in', 'is', 'it', 'of', 'on', 'or',
's', 'that', 'the', 'these', 'this', 'to', 'was', 'were', 'which', 'with', 'aber', 'alle', 'allem',
'allen', 'aller', 'alles', 'als', 'also', 'am', 'an',
'ander', 'andere', 'anderem', 'anderen', 'anderer', 'anderes', 'anderm', 'andern', 'anderr', 'anders',
'auch', 'auf', 'aus', 'bei', 'bin', 'bis', 'bist', 'da', 'dann', 'der', 'den', 'des', 'dem', 'das', 'dass',
'daß', 'derselbe', 'derselben', 'denselben', 'desselben', 'demselben', 'dieselbe', 'dieselben', 'dasselbe',
'dazu', 'dein', 'deine', 'deinem', 'deinen', 'deiner', 'deines', 'denn', 'derer', 'dessen', 'dich', 'dir',
'du', 'dies', 'diese', 'diesem', 'diesen', 'dieser', 'dieses', 'doch', 'dort', 'durch', 'ein', 'eine',
'einem', 'einen', 'einer', 'eines', 'einig', 'einige', 'einigem', 'einigen', 'einiger', 'einiges',
'einmal', 'er', 'ihn', 'ihm', 'es', 'etwas', 'euer', 'eure', 'eurem', 'euren', 'eurer', 'eures', 'für',
'gegen', 'gewesen', 'hab', 'habe', 'haben', 'hat', 'hatte', 'hatten', 'hier', 'hin', 'hinter', 'ich',
'mich', 'mir', 'ihr', 'ihre', 'ihrem', 'ihren', 'ihrer', 'ihres', 'euch', 'im', 'in', 'indem', 'ins',
'ist', 'jede', 'jedem', 'jeden', 'jeder', 'jedes', 'jene', 'jenem', 'jenen', 'jener', 'jenes', 'jetzt',
'kann', 'kein', 'keine', 'keinem', 'keinen', 'keiner', 'keines', 'können', 'könnte', 'machen', 'man',
'manche', 'manchem', 'manchen', 'mancher', 'manches', 'mein', 'meine', 'meinem', 'meinen', 'meiner',
'meines', 'mit', 'muss', 'musste', 'nach', 'nicht', 'nichts', 'noch', 'nun', 'nur', 'ob', 'oder', 'ohne',
'sehr', 'sein', 'seine', 'seinem', 'seinen', 'seiner', 'seines', 'selbst', 'sich', 'sie', 'ihnen', 'sind',
'so', 'solche', 'solchem', 'solchen', 'solcher', 'solches', 'soll', 'sollte', 'sondern', 'sonst', 'um',
'und', 'uns', 'unsere', 'unserem', 'unseren', 'unser', 'unseres', 'unter', 'viel', 'vom', 'von', 'vor',
'während', 'waren', 'warst', 'weg', 'weil', 'weiter', 'welche', 'welchem', 'welchen', 'welcher', 'welches',
'wenn', 'werde', 'werden', 'wie', 'wieder', 'wir', 'wird', 'wirst', 'wo', 'wollen', 'wollte', 'würde',
'würden', 'zu', 'zum', 'zur', 'zwar', 'zwischen', 'и', 'в', 'во', 'не', 'что', 'он', 'на', 'я', 'с', 'со',
'как', 'а', 'то', 'все', 'она', 'так', 'его', 'но', 'да', 'ты', 'к', 'у', 'же', 'вы', 'за', 'бы', 'по',
'ее', 'мне', 'было', 'вот', 'от', 'о', 'из', 'ему', 'ей', 'им', 'de', 'la', 'que', 'el', 'en', 'y', 'a',
'los', 'del', 'se', 'las', 'por', 'un', 'para', 'con', 'una', 'su', 'al', 'lo', 'como', 'más', 'pero',
'sus', 'le', 'ya', 'o', 'este', 'sí']

View File

@ -1,8 +1,6 @@
---
# yamllint disable rule:key-ordering
default_fields: ["authors", "description", "tags", "title"]
key_field: "id"
multi_fields: ["authors", "ipfs_multihashes", "isbns", "tags"]
schema:
- name: id
type: i64
@ -68,8 +66,10 @@ schema:
- name: ipfs_multihashes
type: text
options:
fieldnorms: false
indexed: false
indexing:
fieldnorms: false
record: basic
tokenizer: raw
stored: true
- name: isbns
type: text
@ -115,12 +115,16 @@ schema:
- name: pages
type: i64
options:
indexed: false
fieldnorms: false
indexed: true
stored: true
- name: extension
type: text
options:
indexing: null
indexing:
fieldnorms: false
record: basic
tokenizer: raw
stored: true
- name: md5
type: text
@ -169,3 +173,36 @@ schema:
fieldnorms: true
indexed: true
stored: true
- name: periodical
type: text
options:
indexing: null
stored: true
multi_fields: ["authors", "ipfs_multihashes", "isbns", "tags"]
primary_key: "id"
stop_words: ['a', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'for', 'from', 'if', 'in', 'is', 'it', 'of', 'on', 'or',
's', 'that', 'the', 'their', 'these', 'this', 'to', 'was', 'were', 'with', 'aber', 'alle', 'allem',
'allen', 'aller', 'alles', 'als', 'also', 'am', 'an',
'ander', 'andere', 'anderem', 'anderen', 'anderer', 'anderes', 'anderm', 'andern', 'anderr', 'anders',
'auch', 'auf', 'aus', 'bei', 'bin', 'bis', 'bist', 'da', 'dann', 'der', 'den', 'des', 'dem', 'das', 'dass',
'daß', 'derselbe', 'derselben', 'denselben', 'desselben', 'demselben', 'dieselbe', 'dieselben', 'dasselbe',
'dazu', 'dein', 'deine', 'deinem', 'deinen', 'deiner', 'deines', 'denn', 'derer', 'dessen', 'dich', 'dir',
'du', 'dies', 'diese', 'diesem', 'diesen', 'dieser', 'dieses', 'doch', 'dort', 'durch', 'ein', 'eine',
'einem', 'einen', 'einer', 'eines', 'einig', 'einige', 'einigem', 'einigen', 'einiger', 'einiges',
'einmal', 'er', 'ihn', 'ihm', 'es', 'etwas', 'euer', 'eure', 'eurem', 'euren', 'eurer', 'eures', 'für',
'gegen', 'gewesen', 'hab', 'habe', 'haben', 'hat', 'hatte', 'hatten', 'hier', 'hin', 'hinter', 'ich',
'mich', 'mir', 'ihr', 'ihre', 'ihrem', 'ihren', 'ihrer', 'ihres', 'euch', 'im', 'in', 'indem', 'ins',
'ist', 'jede', 'jedem', 'jeden', 'jeder', 'jedes', 'jene', 'jenem', 'jenen', 'jener', 'jenes', 'jetzt',
'kann', 'kein', 'keine', 'keinem', 'keinen', 'keiner', 'keines', 'können', 'könnte', 'machen', 'man',
'manche', 'manchem', 'manchen', 'mancher', 'manches', 'mein', 'meine', 'meinem', 'meinen', 'meiner',
'meines', 'mit', 'muss', 'musste', 'nach', 'nicht', 'nichts', 'noch', 'nun', 'nur', 'ob', 'oder', 'ohne',
'sehr', 'sein', 'seine', 'seinem', 'seinen', 'seiner', 'seines', 'selbst', 'sich', 'sie', 'ihnen', 'sind',
'so', 'solche', 'solchem', 'solchen', 'solcher', 'solches', 'soll', 'sollte', 'sondern', 'sonst', 'um',
'und', 'uns', 'unsere', 'unserem', 'unseren', 'unser', 'unseres', 'unter', 'viel', 'vom', 'von', 'vor',
'während', 'waren', 'warst', 'weg', 'weil', 'weiter', 'welche', 'welchem', 'welchen', 'welcher', 'welches',
'wenn', 'werde', 'werden', 'wie', 'wieder', 'wir', 'wird', 'wirst', 'wo', 'wollen', 'wollte', 'würde',
'würden', 'zu', 'zum', 'zur', 'zwar', 'zwischen', 'и', 'в', 'во', 'не', 'что', 'он', 'на', 'я', 'с', 'со',
'как', 'а', 'то', 'все', 'она', 'так', 'его', 'но', 'да', 'ты', 'к', 'у', 'же', 'вы', 'за', 'бы', 'по',
'ее', 'мне', 'было', 'вот', 'от', 'о', 'из', 'ему', 'ей', 'им', 'de', 'la', 'que', 'el', 'en', 'y', 'a',
'los', 'del', 'se', 'las', 'por', 'un', 'para', 'con', 'una', 'su', 'al', 'lo', 'como', 'más', 'pero',
'sus', 'le', 'ya', 'o', 'este', 'sí']

View File

@ -1,72 +0,0 @@
load("@io_bazel_rules_docker//container:container.bzl", "container_push")
load("@io_bazel_rules_docker//nodejs:image.bzl", "nodejs_image")
load("@npm//nuxt3:index.bzl", "nuxi")
files = [
"app.vue",
"nuxt.config.ts",
] + glob([
"components/**/*.vue",
"layouts/**/*.vue",
"plugins/**/*.js",
])
deps = [
"@npm//axios",
"@npm//bootstrap-vue-3",
"@npm//pug",
"@npm//pug-plain-loader",
"@npm//sass",
"@npm//sass-loader",
"@npm//vue",
"@npm//@types/node",
"//nexus/meta_api/js/client",
"//nexus/views/js",
]
nuxi(
name = "web-dev",
args = [
"dev",
"nexus/cognitron/web",
],
data = files + deps,
)
nuxi(
name = ".output",
args = [
"build",
"nexus/cognitron/web",
"--buildDir=$(@D)",
],
data = files + deps,
output_dir = True,
)
nodejs_image(
name = "image",
base = "//images/production:base-nodejs-image",
data = [":.output"],
entry_point = ".output/server/index.mjs",
)
container_push(
name = "push-public-latest",
format = "Docker",
image = ":image",
registry = "registry.hub.docker.com",
repository = "thesuperpirate/nexus-cognitron-web",
tag = "latest",
)
container_push(
name = "push-public-testing",
format = "Docker",
image = ":image",
registry = "registry.hub.docker.com",
repository = "thesuperpirate/nexus-cognitron-web",
tag = "testing",
)

View File

@ -1,29 +0,0 @@
# Nuxt 3 Minimal Starter
We recommend to look at the [documentation](https://v3.nuxtjs.org).
## Setup
Make sure to install the dependencies
```bash
yarn install
```
## Development
Start the development server on http://localhost:3000
```bash
yarn dev
```
## Production
Build the application for production:
```bash
yarn build
```
Checkout the [deployment documentation](https://v3.nuxtjs.org/docs/deployment).

View File

@ -1,4 +0,0 @@
<template lang="pug">
div
NuxtWelcome
</template>

View File

@ -1,46 +0,0 @@
<template lang="pug">
div.document
v-scimag(v-if="document.schema === 'scimag'" :document="document")
v-scitech(v-if="document.schema === 'scitech'" :document="document")
</template>
<script>
export default {
name: 'Document',
props: {
document: {
type: Object,
required: true
}
}
}
</script>
<style lang="scss">
.document {
.top {
display: flex;
justify-content: space-between;
h6 {
margin-right: 10px;
margin-bottom: 0;
}
img {
max-height: 200px;
max-width: 200px;
object-fit: contain;
width: auto;
}
}
padding: 30px 0;
table {
font-size: 12px;
tr {
word-break: break-all;
}
th {
white-space: nowrap;
}
}
}
</style>

View File

@ -1,32 +0,0 @@
<template lang="pug">
ul
li(v-for='document in documents')
v-scimag-search-item(v-if="document.index_alias == 'scimag'", :document='document', :key='document.id')
v-scitech-search-item(v-if="document.index_alias == 'scitech'", :document='document', :key='document.id')
</template>
<script>
import VScimagSearchItem from '@/components/v-scimag-search-item'
import VScitechSearchItem from '@/components/v-scitech-search-item'
export default {
name: 'SearchList',
components: { VScimagSearchItem, VScitechSearchItem },
props: {
documents: {
type: Array,
required: true
}
}
}
</script>
<style scoped lang="scss">
li {
padding-bottom: 15px;
padding-left: 0;
&:after {
content: none;
}
}
</style>

View File

@ -1,19 +0,0 @@
<template lang="pug">
nav.navbar.fixed-bottom.ml-auto
ul.navbar-nav.ml-auto
li.nav-item
| Powered by&nbsp;
a(href="https://github.com/nexus-stc/hyperboria") Nexus STC
| , 2025
</template>
<script>
export default {
name: 'VFooter',
data () {
return {
query: ''
}
}
}
</script>

View File

@ -1,25 +0,0 @@
<template lang="pug">
nav.navbar.navbar-light.bg-light
b-container
nuxt-link(to="/" title="Go to search!").logo
| > Nexus Cognitron
a.nav-link(href="https://t.me/nexus_search" title="News")
| News
</template>
<script>
export default {
name: 'VHeader',
data () {
return {
query: ''
}
}
}
</script>
<style scoped lang="scss">
a {
padding: 5px 0;
}
</style>

View File

@ -1,70 +0,0 @@
<template lang="pug">
div.d-flex
div
nuxt-link(:to="{ name: 'documents-index-name-id', params: { schema: document.schema, id: document.id }}") {{ document.icon }} {{ document.title }}
.detail
div
i.mr-1 DOI:
span {{ document.doi }}
div(v-if='document.getFirstAuthors(false, 1)')
span {{ document.getFirstAuthors(false, 1) }} {{ issuedAt }}
.gp
span.el.text-uppercase {{ document.getFormattedFiledata() }}
</template>
<script>
import { getIssuedDate } from '@/plugins/helpers'
export default {
name: 'SearchItem',
props: {
document: {
type: Object,
required: true
}
},
computed: {
issuedAt: function () {
const date = getIssuedDate(this.document.issuedAt)
if (date != null) return '(' + date + ')'
return null
}
}
}
</script>
<style scoped lang="scss">
.el {
display: block;
line-height: 1em;
margin-right: 10px;
padding-right: 10px;
border-right: 1px solid;
&:last-child {
border-right: 0;
}
}
img {
margin-left: 15px;
max-width: 48px;
max-height: 48px;
object-fit: contain;
width: auto;
}
.key {
font-weight: bold;
}
.gp {
margin-top: 2px;
display: flex;
}
.detail {
font-size: 12px;
}
i {
text-transform: uppercase;
}
</style>

View File

@ -1,86 +0,0 @@
<template lang="pug">
div
.top
h6 {{ document.title }}
.top
i
h6 {{ document.getFormattedLocator() }}
table
tbody
v-tr(label="DOI", :value="document.doi")
v-tr(label="Description", :value="document.abstract", @max-length=300)
v-tr(label="Tags", :value="tags")
v-tr(label="ISSNS", :value="issns")
v-tr(label="ISBNS", :value="isbns")
v-tr(label="File", :value="document.getFormattedFiledata()")
v-tr-multi-link(label="Links", :links="links")
</template>
<script>
import { getIssuedDate } from '@/plugins/helpers'
import VTr from './v-tr'
import VTrMultiLink from './v-tr-multi-link'
export default {
name: 'VScimag',
components: { VTr, VTrMultiLink },
props: {
document: {
type: Object,
required: true
}
},
computed: {
pages () {
if (this.document.firstPage && this.document.lastPage && this.document.firstPage !== this.document.lastPage) {
return `${this.document.firstPage}-${this.document.lastPage}`
}
return null
},
page () {
if (this.document.firstPage) {
if (this.document.lastPage) {
if (this.document.firstPage === this.document.lastPage) {
return this.document.firstPage
}
} else {
return this.document.firstPage
}
} else if (this.document.lastPage) {
return this.document.lastPage
}
return null
},
issns () {
return (this.document.issnsList || []).join('; ')
},
isbns () {
return (this.document.isbnsList || []).join('; ')
},
issuedAt () {
return getIssuedDate(this.document.issuedAt)
},
ipfsUrl () {
if (!this.document.getIpfsMultihash()) return null
return `${this.$config.ipfs.gateway.url}/ipfs/${this.document.getIpfsMultihash()}?filename=${this.document.getFilename()}&download=true`
},
links () {
const links = []
if (this.ipfsUrl) {
links.push({
url: this.ipfsUrl,
value: 'IPFS.io'
})
} else {
links.push({
url: this.document.getTelegramLink(),
value: 'Nexus Bot'
})
}
return links
},
tags () {
return (this.document.tagsList || []).join('; ')
}
}
}
</script>

View File

@ -1,71 +0,0 @@
<template lang="pug">
div.d-flex
div
nuxt-link(:to="{ name: 'documents-index-name-id', params: { index_alias: document.index_alias, id: document.id }}") {{ document.icon }} {{ document.title }}
.detail
div
i.mr-1(v-if='document.doi') DOI:
span {{ document.doi }}
div(v-if='document.getFirstAuthors(false, 1)')
span {{ document.getFirstAuthors(false, 1) }} {{ issuedAt }}
.gp
span.el.text-uppercase {{ document.getFormattedFiledata() }}
</template>
<script>
import { getIssuedDate } from '@/plugins/helpers'
export default {
name: 'SearchItem',
props: {
document: {
type: Object,
required: true
}
},
computed: {
issuedAt: function () {
const date = getIssuedDate(this.document.issuedAt)
if (date != null) return '(' + date + ')'
return null
},
}
}
</script>
<style scoped lang="scss">
.el {
display: block;
line-height: 1em;
margin-right: 10px;
padding-right: 10px;
border-right: 1px solid;
&:last-child {
border-right: 0;
}
}
img {
margin-left: 15px;
max-width: 48px;
max-height: 48px;
object-fit: contain;
width: auto;
}
.key {
font-weight: bold;
}
.gp {
margin-top: 2px;
display: flex;
}
.detail {
font-size: 12px;
}
i {
text-transform: uppercase;
}
</style>

View File

@ -1,66 +0,0 @@
<template lang="pug">
div
.top
h6 {{ document.title }}
.top
i
h6 {{ document.getFormattedLocator() }}
table
tbody
v-tr(label="DOI", :value="document.doi")
v-tr(label="Description", :value="document.description", @max-length=300)
v-tr(label="Tags", :value="tags")
v-tr(label="ISBNS", :value="isbns")
v-tr(label="ISSNS", :value="issns")
v-tr(label="File", :value="document.getFormattedFiledata()")
v-tr-multi-link(label="Links", :links="links")
</template>
<script>
import { getIssuedDate } from '@/plugins/helpers'
import VTr from './v-tr'
import VTrMultiLink from './v-tr-multi-link'
export default {
name: 'VScitech',
components: { VTr, VTrMultiLink },
props: {
document: {
type: Object,
required: true
}
},
computed: {
isbns () {
return (this.document.isbnsList || []).join('; ')
},
issns () {
return (this.document.issnsList || []).join('; ')
},
issuedAt () {
return getIssuedDate(this.document.issuedAt)
},
ipfsUrl () {
if (!this.document.getIpfsMultihash()) return null
return `${this.$config.ipfs.gateway.url}/ipfs/${this.document.getIpfsMultihash()}?filename=${this.document.getFilename()}&download=true`
},
links () {
const links = []
if (this.ipfsUrl) {
links.push({
url: this.ipfsUrl,
value: 'IPFS.io'
})
} else {
links.push({
url: this.document.getTelegramLink(),
value: 'Nexus Bot'
})
}
return links
},
tags () {
return (this.document.tagsList || []).join('; ')
}
}
}
</script>

View File

@ -1,40 +0,0 @@
<template lang="pug">
tr
th {{ label }}
td
a(v-for="link in links" :href="link.url" download) {{ link.value }}
</template>
<script>
export default {
name: 'VTrMultiLink',
props: {
links: {
required: true,
type: Array
},
label: {
required: true,
type: String
}
},
data () {
return {
showAll: false
}
}
}
</script>
<style scoped lang="scss">
tr {
word-break: break-all;
}
th {
white-space: nowrap;
}
td > a {
margin-right: 10px;
}
</style>

View File

@ -1,55 +0,0 @@
<template lang="pug">
tr(v-show="value")
th {{ label }}
td(:class="valueClasses")
| {{ formattedValue }}
cite
a(href="javascript:void(null);" @click="showMore" v-if="shouldCollapseText") show more...
</template>
<script>
export default {
name: 'VTr',
props: {
label: {
type: String,
required: true,
default: ''
},
valueClasses: {
type: String,
required: false,
default: ''
},
value: {
type: [String, Number]
},
maxLength: {
type: Number,
default: 300
}
},
data () {
return {
showAll: false
}
},
computed: {
shouldCollapseText () {
return this.value && this.value.length > this.maxLength && !this.showAll
},
formattedValue () {
if (this.shouldCollapseText) {
return this.value.substr(0, this.maxLength)
} else {
return this.value
}
}
},
methods: {
showMore () {
this.showAll = true
}
}
}
</script>

View File

@ -1,33 +0,0 @@
import { defineNuxtConfig } from 'nuxt3'
let buildDir = process.argv.find((s) => s.startsWith('--buildDir='))
if (buildDir) {
buildDir = buildDir.substr('--buildDir='.length)
}
export default defineNuxtConfig({
head: {
title: 'Nexus Cognitron',
meta: [
{ charset: 'utf-8' },
{ name: 'viewport', content: 'width=device-width, initial-scale=1' },
{ hid: 'description', name: 'description', content: 'Biggest Library on both Earth and Mars' }
],
link: [
{ rel: 'icon', type: 'image/x-icon', href: '/favicon.ico' },
{ rel: 'apple-touch-icon', sizes: '180x180', href: '/apple-touch-icon.png' },
{ rel: 'icon', type: 'image/png', sizes: '32x32', href: '/favicon-32x32.png' },
{ rel: 'icon', type: 'image/png', sizes: '16x16', href: '/favicon-16x16.png' },
{ rel: 'manifest', href: '/site.webmanifest' },
{ rel: 'mask-icon', href: '/safari-pinned-tab.svg', color: '#5bbad5' },
{ name: 'msapplication-TileColor', content: '#603cba' },
{ name: 'theme-color', content: '#ffffff' }
]
},
nitro: {
preset: 'server',
output: {
dir: buildDir,
}
}
})

View File

@ -1,10 +0,0 @@
import dateFormat from 'dateformat'
export function getIssuedDate (unixtime) {
if (!unixtime) return null
try {
return dateFormat(new Date(unixtime * 1000), 'yyyy')
} catch (e) {
return null
}
}

View File

@ -1,41 +0,0 @@
import { ScimagView, ScitechView } from 'nexus-views-js'
import MetaApi from 'nexus-meta-api-js-client'
function getSchema (typedDocument) {
return Object.keys(typedDocument).filter(k => typedDocument[k] !== undefined)[0]
}
function indexNameToView (indexName, pb) {
if (indexName === 'scimag') {
return new ScimagView(pb)
} else if (indexName === 'scitech') {
return new ScitechView(pb)
}
}
class MetaApiWrapper {
constructor (metaApiConfig) {
this.metaApi = new MetaApi(metaApiConfig.url || ('http://' + window.location.host), metaApiConfig.hostname)
}
async get (indexName, id) {
const response = await this.metaApi.get(indexName, id)
return indexNameToView(indexName, response[indexName])
}
async search (names, query, page, pageSize) {
const response = await this.metaApi.search(names, query, page, pageSize)
const documents = response.scoredDocumentsList.map((scoredDocument) => {
const indexName = getSchema(scoredDocument.typedDocument)
return indexNameToView(indexName, scoredDocument.typedDocument[indexName])
})
return {
hasNext: response.hasNext,
documents: documents
}
}
}
export default ({ $config }, inject) => {
const metaApiWrapper = new MetaApiWrapper($config.meta_api)
inject('meta_api', metaApiWrapper)
}

View File

@ -1,6 +0,0 @@
const ALNUMWHITESPACE_REGEX = /\P{L}/gu
const MULTIWHITESPACE_REGEX = /\s+/g
export function castStringToSingleString (s) {
return s.replace(ALNUMWHITESPACE_REGEX, ' ').replace(MULTIWHITESPACE_REGEX, '-')
}

View File

@ -1,4 +0,0 @@
{
// https://v3.nuxtjs.org/concepts/typescript
"extends": "./.nuxt/tsconfig.json"
}

View File

@ -18,6 +18,7 @@ py3_image(
data = [
"configs/base.yaml",
"configs/logging.yaml",
"configs/pylon.yaml",
],
main = "main.py",
srcs_version = "PY3ONLY",
@ -29,9 +30,13 @@ py3_image(
requirement("cchardet"),
requirement("orjson"),
requirement("prometheus-client"),
requirement("pycryptodome"),
requirement("pypika"),
requirement("python-socks"),
requirement("pytz"),
requirement("tenacity"),
requirement("uvloop"),
"//idm/api/aioclient",
"//idm/api/proto:proto_py",
requirement("aiogrobid"),
"//library/aiogrpctools",
@ -45,6 +50,7 @@ py3_image(
"//nexus/meta_api/aioclient",
"//nexus/models/proto:proto_py",
"//nexus/pylon",
"//nexus/translations",
"//nexus/views/telegram",
],
)

View File

@ -1,27 +1,39 @@
from typing import Optional
from typing import (
Optional,
Union,
)
from aiogrpcclient import BaseGrpcClient
from idm.api.proto.chat_manager_service_pb2 import Chat as ChatPb
from nexus.hub.proto.delivery_service_pb2 import \
StartDeliveryRequest as StartDeliveryRequestPb
from nexus.hub.proto.delivery_service_pb2 import \
StartDeliveryResponse as StartDeliveryResponsePb
from nexus.hub.proto.delivery_service_pb2_grpc import DeliveryStub
from nexus.hub.proto.submitter_service_pb2 import \
SubmitRequest as SubmitRequestPb
from nexus.hub.proto.submitter_service_pb2 import \
SubmitResponse as SubmitResponsePb
from nexus.hub.proto.submitter_service_pb2_grpc import SubmitterStub
from nexus.hub.proto import (
delivery_service_pb2,
delivery_service_pb2_grpc,
submitter_service_pb2,
submitter_service_pb2_grpc,
)
from nexus.models.proto.typed_document_pb2 import \
TypedDocument as TypedDocumentPb
class HubGrpcClient(BaseGrpcClient):
stub_clses = {
'delivery': DeliveryStub,
'submitter': SubmitterStub,
'delivery': delivery_service_pb2_grpc.DeliveryStub,
'submitter': submitter_service_pb2_grpc.SubmitterStub,
}
async def get_availability_data(
self,
document_id: int,
request_id: Optional[str] = None,
session_id: Optional[str] = None,
) -> delivery_service_pb2.GetAvailabilityDataResponse:
return await self.stubs['delivery'].get_availability_data(
delivery_service_pb2.GetAvailabilityDataRequest(
document_id=document_id,
),
metadata=(('request-id', request_id), ('session-id', session_id))
)
async def start_delivery(
self,
typed_document_pb: TypedDocumentPb,
@ -29,9 +41,9 @@ class HubGrpcClient(BaseGrpcClient):
bot_name: str,
request_id: Optional[str] = None,
session_id: Optional[str] = None,
) -> StartDeliveryResponsePb:
) -> delivery_service_pb2.StartDeliveryResponse:
return await self.stubs['delivery'].start_delivery(
StartDeliveryRequestPb(
delivery_service_pb2.StartDeliveryRequest(
typed_document=typed_document_pb,
chat=chat,
bot_name=bot_name,
@ -41,19 +53,26 @@ class HubGrpcClient(BaseGrpcClient):
async def submit(
self,
telegram_document: bytes,
telegram_file_id: str,
file: Union[submitter_service_pb2.PlainFile, submitter_service_pb2.TelegramFile],
chat: ChatPb,
bot_name: str,
reply_to: Optional[int] = None,
doi_hint: Optional[str] = None,
doi_hint_priority: bool = False,
request_id: Optional[str] = None,
session_id: Optional[str] = None,
) -> SubmitResponsePb:
return await self.stubs['submitter'].submit(
SubmitRequestPb(
telegram_document=telegram_document,
telegram_file_id=telegram_file_id,
chat=chat,
bot_name=bot_name,
),
metadata=(('request-id', request_id), ('session-id', session_id))
uploader_id: Optional[int] = None
) -> submitter_service_pb2.SubmitResponse:
request = submitter_service_pb2.SubmitRequest(
chat=chat,
bot_name=bot_name,
reply_to=reply_to,
doi_hint=doi_hint,
doi_hint_priority=doi_hint_priority,
uploader_id=uploader_id,
)
if isinstance(file, submitter_service_pb2.PlainFile):
request.plain.CopyFrom(file)
if isinstance(file, submitter_service_pb2.TelegramFile):
request.telegram.CopyFrom(file)
return await self.stubs['submitter'].submit(request, metadata=(('request-id', request_id), ('session-id', session_id)))

View File

@ -6,6 +6,7 @@ def get_config():
return Configurator([
'nexus/hub/configs/base.yaml',
'nexus/hub/configs/%s.yaml?' % env.type,
'nexus/hub/configs/pylon.yaml',
'nexus/hub/configs/logging.yaml',
], env_prefix='NEXUS_HUB')

Some files were not shown because too many files have changed in this diff Show More