mirror of
https://github.com/nexus-stc/hyperboria
synced 2024-12-30 13:35:49 +01:00
7389f85ef2
- Move fancy naming to Hub - Adopt translations - Fix long authors list in bot - Support CJK in scitech - Fixed pylon bug with decoding improper unicode ch... GitOrigin-RevId: 74f73c44f749a71cb65dd5ddd3416f32a83d329f
31 lines
928 B
Python
31 lines
928 B
Python
import re
|
|
from typing import AsyncIterable
|
|
|
|
from nexus.pylon.exceptions import RegexNotFoundError
|
|
|
|
from nexus.pylon.sources.base import (
|
|
DoiSource,
|
|
PreparedRequest,
|
|
)
|
|
|
|
|
|
class ResearchSquareSource(DoiSource):
|
|
base_url = 'https://dx.doi.org'
|
|
|
|
async def resolve(self) -> AsyncIterable[PreparedRequest]:
|
|
async with self.get_resolve_session() as session:
|
|
url = f'{self.base_url}/{self.doi}'
|
|
async with session.get(
|
|
url,
|
|
timeout=self.resolve_timeout
|
|
) as resp:
|
|
download_page = await resp.text()
|
|
match = re.search(
|
|
r'\"(https://www\.researchsquare\.com/article/[^\"]+\.pdf)\"',
|
|
download_page,
|
|
re.IGNORECASE,
|
|
)
|
|
if not match:
|
|
raise RegexNotFoundError(url=url)
|
|
yield PreparedRequest(method='get', url=match.group(1))
|