mirror of
https://github.com/nexus-stc/hyperboria
synced 2024-12-02 16:02:53 +01:00
43be16e4bc
- [nexus] Remove outdated protos - [nexus] Development - [nexus] Development - [nexus] Development - [nexus] Development - [nexus] Development - [nexus] Refactor views - [nexus] Update aiosumma - [nexus] Add tags - [nexus] Development - [nexus] Update repository - [nexus] Update repository - [nexus] Update dependencies - [nexus] Update dependencies - [nexus] Fixes for MetaAPI - [nexus] Support for new queries - [nexus] Adopt new versions of search - [nexus] Improving Nexus - [nexus] Various fixes - [nexus] Add profile - [nexus] Fixes for ingestion - [nexus] Refactorings and bugfixes - [idm] Add profile methods - [nexus] Fix stalled nexus-meta bugs - [nexus] Various bugfixes - [nexus] Restore IDM API functionality GitOrigin-RevId: a0842345a6dde5b321279ab5510a50c0def0e71a
105 lines
3.0 KiB
Python
105 lines
3.0 KiB
Python
import re
|
|
|
|
from aiosumma.parser.elements import (
|
|
Group,
|
|
Minus,
|
|
Plus,
|
|
Range,
|
|
SearchField,
|
|
Word,
|
|
)
|
|
from aiosumma.tree_transformers import (
|
|
ContextWordTreeTransformer,
|
|
ValuePredicateWordTreeTransformer,
|
|
ValueWordTreeTransformer,
|
|
)
|
|
from aiosumma.tree_transformers.base import TreeTransformer
|
|
from izihawa_nlptools.regex import ISBN_REGEX
|
|
|
|
scimag_word_transformer = ContextWordTreeTransformer(
|
|
node_value={'🔬', '⚗️'},
|
|
context_transform=lambda context: context.index_aliases.append('scimag'),
|
|
)
|
|
|
|
scitech_word_transformer = ContextWordTreeTransformer(
|
|
node_value={'📚', '📕', '📖'},
|
|
context_transform=lambda context: context.index_aliases.append('scitech'),
|
|
)
|
|
|
|
|
|
class ExplainWordTransformer(ValueWordTreeTransformer):
|
|
def __init__(self):
|
|
super().__init__(node_value='🔑')
|
|
|
|
def transform(self, node, context, parents, predicate_result):
|
|
context.explain = True
|
|
return None
|
|
|
|
|
|
explain_word_transformer = ExplainWordTransformer()
|
|
|
|
|
|
class YearWordTransformer(ValuePredicateWordTreeTransformer):
|
|
def node_predicate(self, node):
|
|
return re.match(r'^\d{4}$', node.value)
|
|
|
|
def is_single_member_of_group(self, parents):
|
|
return parents and isinstance(parents[-1], Group) and len(parents[-1]) == 1
|
|
|
|
def transform(self, node, context, parents, predicate_result):
|
|
year = int(node.value)
|
|
if not parents or self.is_single_member_of_group(parents):
|
|
return node
|
|
if 1800 < year < 2100:
|
|
context.set_query_point_of_time(year=year)
|
|
return node
|
|
|
|
|
|
class EditionWordTransformer(ValuePredicateWordTreeTransformer):
|
|
def node_predicate(self, node):
|
|
return re.match(r'^(\d+)(st|nd|rd|th)$', node.value)
|
|
|
|
def transform(self, node, context, parents, predicate_result):
|
|
edition = predicate_result.group(1)
|
|
if 1 <= int(edition) < 50:
|
|
return SearchField('edition', Word(edition))
|
|
return node
|
|
|
|
|
|
class IsbnWordTransformer(ValuePredicateWordTreeTransformer):
|
|
def node_predicate(self, node):
|
|
return re.match(ISBN_REGEX, node.value)
|
|
|
|
def transform(self, node, context, parents, predicate_result):
|
|
isbn = predicate_result[0].replace('-', '')
|
|
context.is_exploration = False
|
|
return SearchField('isbns', Word(isbn))
|
|
|
|
|
|
class LanguageWordTransformer(ValuePredicateWordTreeTransformer):
|
|
languages = {
|
|
'🇪🇹': 'am',
|
|
'🇦🇪': 'ar',
|
|
'🇩🇪': 'de',
|
|
'🇬🇧': 'en',
|
|
'🏴': 'en',
|
|
'🇪🇸': 'es',
|
|
'🇮🇷': 'fa',
|
|
'🇮🇳': 'hi',
|
|
'🇮🇩': 'id',
|
|
'🇮🇹': 'it',
|
|
'🇯🇵': 'ja',
|
|
'🇲🇾': 'ms',
|
|
'🇧🇷': 'pb',
|
|
'🇷🇺': 'ru',
|
|
'🇹🇯': 'tg',
|
|
'🇺🇦': 'uk',
|
|
'🇺🇿': 'uz',
|
|
}
|
|
|
|
def node_predicate(self, node):
|
|
return node.value in self.languages
|
|
|
|
def transform(self, node, context, parents, predicate_result):
|
|
return SearchField('language', Word(self.languages[node.value]))
|