mirror of
https://github.com/nexus-stc/hyperboria
synced 2025-02-04 14:47:48 +01:00
105 lines
3.0 KiB
Python
105 lines
3.0 KiB
Python
|
import re
|
||
|
|
||
|
from aiosumma.parser.elements import (
|
||
|
Group,
|
||
|
Minus,
|
||
|
Plus,
|
||
|
Range,
|
||
|
SearchField,
|
||
|
Word,
|
||
|
)
|
||
|
from aiosumma.tree_transformers import (
|
||
|
ContextWordTreeTransformer,
|
||
|
ValuePredicateWordTreeTransformer,
|
||
|
ValueWordTreeTransformer,
|
||
|
)
|
||
|
from aiosumma.tree_transformers.base import TreeTransformer
|
||
|
from izihawa_nlptools.regex import ISBN_REGEX
|
||
|
|
||
|
scimag_word_transformer = ContextWordTreeTransformer(
|
||
|
node_value={'🔬', '⚗️'},
|
||
|
context_transform=lambda context: context.index_aliases.append('scimag'),
|
||
|
)
|
||
|
|
||
|
scitech_word_transformer = ContextWordTreeTransformer(
|
||
|
node_value={'📚', '📕', '📖'},
|
||
|
context_transform=lambda context: context.index_aliases.append('scitech'),
|
||
|
)
|
||
|
|
||
|
|
||
|
class ExplainWordTransformer(ValueWordTreeTransformer):
|
||
|
def __init__(self):
|
||
|
super().__init__(node_value='🔑')
|
||
|
|
||
|
def transform(self, node, context, parents, predicate_result):
|
||
|
context.explain = True
|
||
|
return None
|
||
|
|
||
|
|
||
|
explain_word_transformer = ExplainWordTransformer()
|
||
|
|
||
|
|
||
|
class YearWordTransformer(ValuePredicateWordTreeTransformer):
|
||
|
def node_predicate(self, node):
|
||
|
return re.match(r'^\d{4}$', node.value)
|
||
|
|
||
|
def is_single_member_of_group(self, parents):
|
||
|
return parents and isinstance(parents[-1], Group) and len(parents[-1]) == 1
|
||
|
|
||
|
def transform(self, node, context, parents, predicate_result):
|
||
|
year = int(node.value)
|
||
|
if not parents or self.is_single_member_of_group(parents):
|
||
|
return node
|
||
|
if 1800 < year < 2100:
|
||
|
context.set_query_point_of_time(year=year)
|
||
|
return node
|
||
|
|
||
|
|
||
|
class EditionWordTransformer(ValuePredicateWordTreeTransformer):
|
||
|
def node_predicate(self, node):
|
||
|
return re.match(r'^(\d+)(st|nd|rd|th)$', node.value)
|
||
|
|
||
|
def transform(self, node, context, parents, predicate_result):
|
||
|
edition = predicate_result.group(1)
|
||
|
if 1 <= int(edition) < 50:
|
||
|
return SearchField('edition', Word(edition))
|
||
|
return node
|
||
|
|
||
|
|
||
|
class IsbnWordTransformer(ValuePredicateWordTreeTransformer):
|
||
|
def node_predicate(self, node):
|
||
|
return re.match(ISBN_REGEX, node.value)
|
||
|
|
||
|
def transform(self, node, context, parents, predicate_result):
|
||
|
isbn = predicate_result[0].replace('-', '')
|
||
|
context.is_exploration = False
|
||
|
return SearchField('isbns', Word(isbn))
|
||
|
|
||
|
|
||
|
class LanguageWordTransformer(ValuePredicateWordTreeTransformer):
|
||
|
languages = {
|
||
|
'🇪🇹': 'am',
|
||
|
'🇦🇪': 'ar',
|
||
|
'🇩🇪': 'de',
|
||
|
'🇬🇧': 'en',
|
||
|
'🏴': 'en',
|
||
|
'🇪🇸': 'es',
|
||
|
'🇮🇷': 'fa',
|
||
|
'🇮🇳': 'hi',
|
||
|
'🇮🇩': 'id',
|
||
|
'🇮🇹': 'it',
|
||
|
'🇯🇵': 'ja',
|
||
|
'🇲🇾': 'ms',
|
||
|
'🇧🇷': 'pb',
|
||
|
'🇷🇺': 'ru',
|
||
|
'🇹🇯': 'tg',
|
||
|
'🇺🇦': 'uk',
|
||
|
'🇺🇿': 'uz',
|
||
|
}
|
||
|
|
||
|
def node_predicate(self, node):
|
||
|
return node.value in self.languages
|
||
|
|
||
|
def transform(self, node, context, parents, predicate_result):
|
||
|
return SearchField('language', Word(self.languages[node.value]))
|