mirror of
https://github.com/nexus-stc/hyperboria
synced 2024-11-23 03:26:50 +01:00
- fix(nexus): Fix DOI detection in messages
GitOrigin-RevId: e4d4519221319c26134d8cbbd011eca2def2b6b3
This commit is contained in:
parent
00c6c6ffff
commit
6b69d092fa
@ -1,5 +1,5 @@
|
||||
# Nexus Search: Meta API
|
||||
|
||||
```
|
||||
NEXUS_META_API_summa.url=http://summa bazel run -c opt binary
|
||||
NEXUS_META_API_summa.url=http://summa bazel run binary
|
||||
```
|
@ -5,6 +5,7 @@ from nexus.nlptools.regex import (
|
||||
DOI_REGEX,
|
||||
ISBN_REGEX,
|
||||
NID_REGEX,
|
||||
ONLY_DOI_REGEX,
|
||||
URL_REGEX,
|
||||
)
|
||||
|
||||
@ -20,10 +21,10 @@ class QueryClass(Enum):
|
||||
|
||||
|
||||
def check_doi(query) -> (QueryClass, str):
|
||||
# ToDo: rewrite normally, just hotfixed
|
||||
if query.startswith('references:'):
|
||||
return
|
||||
if r := re.search(DOI_REGEX, query):
|
||||
if (
|
||||
((r := re.search(DOI_REGEX, query)) and re.search(URL_REGEX, query))
|
||||
or re.search(ONLY_DOI_REGEX, query)
|
||||
):
|
||||
doi = (r[1] + '/' + r[2]).lower()
|
||||
return {
|
||||
'doi': doi,
|
||||
|
@ -29,4 +29,5 @@ DOI_REGEX = re.compile(r'(10.\d{4,9})\s?/\s?([-._;()<>/:A-Za-z0-9]+[^.?\s])')
|
||||
ISBN_REGEX = re.compile(r'^(?:[iI][sS][bB][nN]\:?\s*)?((97(8|9))?\-?\d{9}(\d|X))$')
|
||||
MD5_REGEX = re.compile(r'([A-Fa-f0-9]{32})')
|
||||
NID_REGEX = re.compile(r'(?:[Nn][Ii][Dd]\s?:?\s*)([0-9]+)')
|
||||
ONLY_DOI_REGEX = re.compile(r'^(10.\d{4,9})\s?/\s?([-._;()<>/:A-Za-z0-9]+[^.?\s])$')
|
||||
PUBMED_ID_REGEX = re.compile(r'(?:(?:https?://)?(?:www.)?ncbi.nlm.nih.gov/pubmed/|[Pp][Mm][Ii][Dd]\s?:?\s*)([0-9]+)')
|
||||
|
Loading…
Reference in New Issue
Block a user