hyperboria/nexus/meta_api/word_transformers.py
the-superpirate 43be16e4bc - [nexus] Update schema
- [nexus] Remove outdated protos
  - [nexus] Development
  - [nexus] Development
  - [nexus] Development
  - [nexus] Development
  - [nexus] Development
  - [nexus] Refactor views
  - [nexus] Update aiosumma
  - [nexus] Add tags
  - [nexus] Development
  - [nexus] Update repository
  - [nexus] Update repository
  - [nexus] Update dependencies
  - [nexus] Update dependencies
  - [nexus] Fixes for MetaAPI
  - [nexus] Support for new queries
  - [nexus] Adopt new versions of search
  - [nexus] Improving Nexus
  - [nexus] Various fixes
  - [nexus] Add profile
  - [nexus] Fixes for ingestion
  - [nexus] Refactorings and bugfixes
  - [idm] Add profile methods
  - [nexus] Fix stalled nexus-meta bugs
  - [nexus] Various bugfixes
  - [nexus] Restore IDM API functionality

GitOrigin-RevId: a0842345a6dde5b321279ab5510a50c0def0e71a
2022-09-02 19:15:47 +03:00

105 lines
3.0 KiB
Python

import re
from aiosumma.parser.elements import (
Group,
Minus,
Plus,
Range,
SearchField,
Word,
)
from aiosumma.tree_transformers import (
ContextWordTreeTransformer,
ValuePredicateWordTreeTransformer,
ValueWordTreeTransformer,
)
from aiosumma.tree_transformers.base import TreeTransformer
from izihawa_nlptools.regex import ISBN_REGEX
scimag_word_transformer = ContextWordTreeTransformer(
node_value={'🔬', '⚗️'},
context_transform=lambda context: context.index_aliases.append('scimag'),
)
scitech_word_transformer = ContextWordTreeTransformer(
node_value={'📚', '📕', '📖'},
context_transform=lambda context: context.index_aliases.append('scitech'),
)
class ExplainWordTransformer(ValueWordTreeTransformer):
def __init__(self):
super().__init__(node_value='🔑')
def transform(self, node, context, parents, predicate_result):
context.explain = True
return None
explain_word_transformer = ExplainWordTransformer()
class YearWordTransformer(ValuePredicateWordTreeTransformer):
def node_predicate(self, node):
return re.match(r'^\d{4}$', node.value)
def is_single_member_of_group(self, parents):
return parents and isinstance(parents[-1], Group) and len(parents[-1]) == 1
def transform(self, node, context, parents, predicate_result):
year = int(node.value)
if not parents or self.is_single_member_of_group(parents):
return node
if 1800 < year < 2100:
context.set_query_point_of_time(year=year)
return node
class EditionWordTransformer(ValuePredicateWordTreeTransformer):
def node_predicate(self, node):
return re.match(r'^(\d+)(st|nd|rd|th)$', node.value)
def transform(self, node, context, parents, predicate_result):
edition = predicate_result.group(1)
if 1 <= int(edition) < 50:
return SearchField('edition', Word(edition))
return node
class IsbnWordTransformer(ValuePredicateWordTreeTransformer):
def node_predicate(self, node):
return re.match(ISBN_REGEX, node.value)
def transform(self, node, context, parents, predicate_result):
isbn = predicate_result[0].replace('-', '')
context.is_exploration = False
return SearchField('isbns', Word(isbn))
class LanguageWordTransformer(ValuePredicateWordTreeTransformer):
languages = {
'🇪🇹': 'am',
'🇦🇪': 'ar',
'🇩🇪': 'de',
'🇬🇧': 'en',
'🏴󠁧󠁢󠁥󠁮󠁧󠁿': 'en',
'🇪🇸': 'es',
'🇮🇷': 'fa',
'🇮🇳': 'hi',
'🇮🇩': 'id',
'🇮🇹': 'it',
'🇯🇵': 'ja',
'🇲🇾': 'ms',
'🇧🇷': 'pb',
'🇷🇺': 'ru',
'🇹🇯': 'tg',
'🇺🇦': 'uk',
'🇺🇿': 'uz',
}
def node_predicate(self, node):
return node.value in self.languages
def transform(self, node, context, parents, predicate_result):
return SearchField('language', Word(self.languages[node.value]))