hyperboria/nexus/pylon/resolvers/doi_org_request.py
the-superpirate 43be16e4bc - [nexus] Update schema
- [nexus] Remove outdated protos
  - [nexus] Development
  - [nexus] Development
  - [nexus] Development
  - [nexus] Development
  - [nexus] Development
  - [nexus] Refactor views
  - [nexus] Update aiosumma
  - [nexus] Add tags
  - [nexus] Development
  - [nexus] Update repository
  - [nexus] Update repository
  - [nexus] Update dependencies
  - [nexus] Update dependencies
  - [nexus] Fixes for MetaAPI
  - [nexus] Support for new queries
  - [nexus] Adopt new versions of search
  - [nexus] Improving Nexus
  - [nexus] Various fixes
  - [nexus] Add profile
  - [nexus] Fixes for ingestion
  - [nexus] Refactorings and bugfixes
  - [idm] Add profile methods
  - [nexus] Fix stalled nexus-meta bugs
  - [nexus] Various bugfixes
  - [nexus] Restore IDM API functionality

GitOrigin-RevId: a0842345a6dde5b321279ab5510a50c0def0e71a
2022-09-02 19:15:47 +03:00

63 lines
2.0 KiB
Python

import json
import logging
from typing import (
AsyncIterable,
Dict,
List,
Optional,
)
import jq
from nexus.pylon.prepared_request import PreparedRequest
from nexus.pylon.proxy_manager import ProxyManager
from nexus.pylon.resolvers.base import BaseResolver
class DoiOrgRequestResolver(BaseResolver):
def __init__(
self,
selector='.link[0].URL',
format_string='{selected}',
timeout: float = 10.0,
resolve_timeout: float = 10.0,
proxy_list: Optional[List] = None,
proxy_manager: Optional[ProxyManager] = None,
):
super().__init__(proxy_list=proxy_list, proxy_manager=proxy_manager)
self.selector = jq.compile(selector)
self.format_string = format_string
self.timeout = timeout
self.resolve_timeout = resolve_timeout
def __str__(self):
return f'{self.__class__.__name__}(selector = {self.selector}, format_string = {self.format_string})'
async def resolve_through_doi_org(self, params):
async with self.get_session() as session:
doi_url = f'https://doi.org/{params["doi"]}'
async with PreparedRequest(
method='get',
url=doi_url,
timeout=self.resolve_timeout,
headers={'Accept': 'application/json'}
).execute_with(session=session) as resp:
return await resp.json()
async def resolve(self, params: Dict) -> AsyncIterable[PreparedRequest]:
body = await self.resolve_through_doi_org(params)
try:
selected = json.loads(self.selector.input(body).text())
except ValueError as e:
logging.getLogger('error').error({
'action': 'error',
'mode': 'pylon',
'error': str(e)
})
return
if selected:
yield PreparedRequest(
method='get',
url=self.format_string.format(selected=selected),
timeout=self.timeout,
)