hyperboria/nexus/meta_api/word_transformers.py

import re

from aiosumma.parser.elements import (
    Group,
    Minus,
    Plus,
    Range,
    SearchField,
    Word,
)
from aiosumma.tree_transformers import (
    ContextWordTreeTransformer,
    ValuePredicateWordTreeTransformer,
    ValueWordTreeTransformer,
)
from aiosumma.tree_transformers.base import TreeTransformer
from izihawa_nlptools.regex import ISBN_REGEX

scimag_word_transformer = ContextWordTreeTransformer(
    node_value={'🔬', '⚗️'},
    context_transform=lambda context: context.index_aliases.append('scimag'),
)

scitech_word_transformer = ContextWordTreeTransformer(
    node_value={'📚', '📕', '📖'},
    context_transform=lambda context: context.index_aliases.append('scitech'),
)


class ExplainWordTransformer(ValueWordTreeTransformer):
    def __init__(self):
        super().__init__(node_value='🔑')

    def transform(self, node, context, parents, predicate_result):
        context.explain = True
        return None


explain_word_transformer = ExplainWordTransformer()


class YearWordTransformer(ValuePredicateWordTreeTransformer):
    def node_predicate(self, node):
        return re.match(r'^\d{4}$', node.value)

    def is_single_member_of_group(self, parents):
        return parents and isinstance(parents[-1], Group) and len(parents[-1]) == 1

    def transform(self, node, context, parents, predicate_result):
        year = int(node.value)
        if not parents or self.is_single_member_of_group(parents):
            return node
        if 1800 < year < 2100:
            context.set_query_point_of_time(year=year)
        return node


class EditionWordTransformer(ValuePredicateWordTreeTransformer):
    def node_predicate(self, node):
        return re.match(r'^(\d+)(st|nd|rd|th)$', node.value)

    def transform(self, node, context, parents, predicate_result):
        edition = predicate_result.group(1)
        if 1 <= int(edition) < 50:
            return SearchField('edition', Word(edition))
        return node


class IsbnWordTransformer(ValuePredicateWordTreeTransformer):
    def node_predicate(self, node):
        return re.match(ISBN_REGEX, node.value)

    def transform(self, node, context, parents, predicate_result):
        isbn = predicate_result[0].replace('-', '')
        context.is_exploration = False
        return SearchField('isbns', Word(isbn))


class LanguageWordTransformer(ValuePredicateWordTreeTransformer):
    languages = {
        '🇪🇹': 'am',
        '🇦🇪': 'ar',
        '🇩🇪': 'de',
        '🇬🇧': 'en',
        '🏴󠁧󠁢󠁥󠁮󠁧󠁿': 'en',
        '🇪🇸': 'es',
        '🇮🇷': 'fa',
        '🇮🇳': 'hi',
        '🇮🇩': 'id',
        '🇮🇹': 'it',
        '🇯🇵': 'ja',
        '🇲🇾': 'ms',
        '🇧🇷': 'pb',
        '🇷🇺': 'ru',
        '🇹🇯': 'tg',
        '🇺🇦': 'uk',
        '🇺🇿': 'uz',
    }

    def node_predicate(self, node):
        return node.value in self.languages

    def transform(self, node, context, parents, predicate_result):
        return SearchField('language', Word(self.languages[node.value]))
- [nexus] Update schema - [nexus] Remove outdated protos - [nexus] Development - [nexus] Development - [nexus] Development - [nexus] Development - [nexus] Development - [nexus] Refactor views - [nexus] Update aiosumma - [nexus] Add tags - [nexus] Development - [nexus] Update repository - [nexus] Update repository - [nexus] Update dependencies - [nexus] Update dependencies - [nexus] Fixes for MetaAPI - [nexus] Support for new queries - [nexus] Adopt new versions of search - [nexus] Improving Nexus - [nexus] Various fixes - [nexus] Add profile - [nexus] Fixes for ingestion - [nexus] Refactorings and bugfixes - [idm] Add profile methods - [nexus] Fix stalled nexus-meta bugs - [nexus] Various bugfixes - [nexus] Restore IDM API functionality GitOrigin-RevId: a0842345a6dde5b321279ab5510a50c0def0e71a 2022-09-02 18:44:56 +03:00			`import re`

			`from aiosumma.parser.elements import (`
			`Group,`
			`Minus,`
			`Plus,`
			`Range,`
			`SearchField,`
			`Word,`
			`)`
			`from aiosumma.tree_transformers import (`
			`ContextWordTreeTransformer,`
			`ValuePredicateWordTreeTransformer,`
			`ValueWordTreeTransformer,`
			`)`
			`from aiosumma.tree_transformers.base import TreeTransformer`
			`from izihawa_nlptools.regex import ISBN_REGEX`

			`scimag_word_transformer = ContextWordTreeTransformer(`
			`node_value={'🔬', '⚗️'},`
			`context_transform=lambda context: context.index_aliases.append('scimag'),`
			`)`

			`scitech_word_transformer = ContextWordTreeTransformer(`
			`node_value={'📚', '📕', '📖'},`
			`context_transform=lambda context: context.index_aliases.append('scitech'),`
			`)`


			`class ExplainWordTransformer(ValueWordTreeTransformer):`
			`def __init__(self):`
			`super().__init__(node_value='🔑')`

			`def transform(self, node, context, parents, predicate_result):`
			`context.explain = True`
			`return None`


			`explain_word_transformer = ExplainWordTransformer()`


			`class YearWordTransformer(ValuePredicateWordTreeTransformer):`
			`def node_predicate(self, node):`
			`return re.match(r'^\d{4}$', node.value)`

			`def is_single_member_of_group(self, parents):`
			`return parents and isinstance(parents[-1], Group) and len(parents[-1]) == 1`

			`def transform(self, node, context, parents, predicate_result):`
			`year = int(node.value)`
			`if not parents or self.is_single_member_of_group(parents):`
			`return node`
			`if 1800 < year < 2100:`
			`context.set_query_point_of_time(year=year)`
			`return node`


			`class EditionWordTransformer(ValuePredicateWordTreeTransformer):`
			`def node_predicate(self, node):`
			`return re.match(r'^(\d+)(st\|nd\|rd\|th)$', node.value)`

			`def transform(self, node, context, parents, predicate_result):`
			`edition = predicate_result.group(1)`
			`if 1 <= int(edition) < 50:`
			`return SearchField('edition', Word(edition))`
			`return node`


			`class IsbnWordTransformer(ValuePredicateWordTreeTransformer):`
			`def node_predicate(self, node):`
			`return re.match(ISBN_REGEX, node.value)`

			`def transform(self, node, context, parents, predicate_result):`
			`isbn = predicate_result[0].replace('-', '')`
			`context.is_exploration = False`
			`return SearchField('isbns', Word(isbn))`


			`class LanguageWordTransformer(ValuePredicateWordTreeTransformer):`
			`languages = {`
			`'🇪🇹': 'am',`
			`'🇦🇪': 'ar',`
			`'🇩🇪': 'de',`
			`'🇬🇧': 'en',`
			`'🏴󠁧󠁢󠁥󠁮󠁧󠁿': 'en',`
			`'🇪🇸': 'es',`
			`'🇮🇷': 'fa',`
			`'🇮🇳': 'hi',`
			`'🇮🇩': 'id',`
			`'🇮🇹': 'it',`
			`'🇯🇵': 'ja',`
			`'🇲🇾': 'ms',`
			`'🇧🇷': 'pb',`
			`'🇷🇺': 'ru',`
			`'🇹🇯': 'tg',`
			`'🇺🇦': 'uk',`
			`'🇺🇿': 'uz',`
			`}`

			`def node_predicate(self, node):`
			`return node.value in self.languages`

			`def transform(self, node, context, parents, predicate_result):`
			`return SearchField('language', Word(self.languages[node.value]))`