2021-01-08 21:08:47 +01:00
|
|
|
import re
|
2021-04-12 19:47:21 +02:00
|
|
|
from typing import (
|
|
|
|
AsyncIterable,
|
|
|
|
Callable,
|
|
|
|
)
|
2021-01-08 21:08:47 +01:00
|
|
|
|
|
|
|
from library.logging import error_log
|
|
|
|
from nexus.pylon.exceptions import RegexNotFoundError
|
|
|
|
|
|
|
|
from .base import (
|
|
|
|
Md5Source,
|
|
|
|
PreparedRequest,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
class LibgenNewSource(Md5Source):
|
2021-04-12 19:47:21 +02:00
|
|
|
async def resolve(self, error_log_func: Callable = error_log) -> AsyncIterable[PreparedRequest]:
|
2021-01-08 21:08:47 +01:00
|
|
|
async with self.get_resolve_session() as session:
|
|
|
|
url = f'{self.base_url}/main/{self.md5.upper()}'
|
2021-04-12 19:47:21 +02:00
|
|
|
async with PreparedRequest(
|
|
|
|
method='get',
|
|
|
|
url=url,
|
2021-01-08 21:08:47 +01:00
|
|
|
timeout=self.resolve_timeout
|
2021-04-12 19:47:21 +02:00
|
|
|
).execute_with(session) as resp:
|
2021-01-08 21:08:47 +01:00
|
|
|
downloaded_page = await resp.text()
|
|
|
|
match_ipfs = re.search(
|
|
|
|
'https://ipfs.io/ipfs/[A-Za-z0-9]+',
|
|
|
|
downloaded_page,
|
|
|
|
re.IGNORECASE,
|
|
|
|
)
|
|
|
|
if match_ipfs:
|
2021-04-12 19:47:21 +02:00
|
|
|
yield PreparedRequest(method='get', url=match_ipfs.group(), ssl=self.ssl, timeout=self.timeout)
|
2021-01-08 21:08:47 +01:00
|
|
|
match_cf = re.search(
|
|
|
|
'https://cloudflare-ipfs.com/ipfs/[A-Za-z0-9]+',
|
|
|
|
downloaded_page,
|
|
|
|
re.IGNORECASE,
|
|
|
|
)
|
|
|
|
if match_cf:
|
2021-04-12 19:47:21 +02:00
|
|
|
yield PreparedRequest(method='get', url=match_cf.group(), ssl=self.ssl, timeout=self.timeout)
|
2021-01-08 21:08:47 +01:00
|
|
|
match_infura = re.search(
|
|
|
|
'https://ipfs.infura.io/ipfs/[A-Za-z0-9]+',
|
|
|
|
downloaded_page,
|
|
|
|
re.IGNORECASE,
|
|
|
|
)
|
|
|
|
if match_infura:
|
2021-04-12 19:47:21 +02:00
|
|
|
yield PreparedRequest(method='get', url=match_infura.group(), ssl=self.ssl, timeout=self.timeout)
|
2021-01-08 21:08:47 +01:00
|
|
|
|
|
|
|
if not match_cf or not match_infura or not match_ipfs:
|
2021-04-12 19:47:21 +02:00
|
|
|
error_log_func(RegexNotFoundError(url=url))
|
2021-01-08 21:08:47 +01:00
|
|
|
|
|
|
|
|
|
|
|
class LibraryLolSource(LibgenNewSource):
|
|
|
|
base_url = 'http://library.lol'
|
2021-04-12 19:47:21 +02:00
|
|
|
resolve_timeout = 20
|
2021-01-08 21:08:47 +01:00
|
|
|
ssl = False
|
2021-04-12 19:47:21 +02:00
|
|
|
timeout = 120
|