2021-01-08 21:08:47 +01:00
|
|
|
from typing import (
|
|
|
|
AsyncIterable,
|
2021-04-12 19:47:21 +02:00
|
|
|
Callable,
|
2021-01-08 21:08:47 +01:00
|
|
|
Iterable,
|
|
|
|
Optional,
|
|
|
|
)
|
|
|
|
|
|
|
|
from aiokit import AioThing
|
|
|
|
from library.logging import error_log
|
|
|
|
from nexus.pylon.exceptions import (
|
|
|
|
DownloadError,
|
|
|
|
NotFoundError,
|
|
|
|
)
|
|
|
|
from nexus.pylon.proto.file_pb2 import FileResponse as FileResponsePb
|
|
|
|
from nexus.pylon.sources import (
|
|
|
|
BaseSource,
|
|
|
|
LibgenDoiSource,
|
|
|
|
LibgenMd5Source,
|
|
|
|
LibraryLolSource,
|
2021-02-21 11:48:18 +01:00
|
|
|
SciHubDoSource,
|
2021-01-08 21:08:47 +01:00
|
|
|
SciHubSeSource,
|
|
|
|
)
|
|
|
|
from nexus.pylon.sources.specific import get_specific_sources_for_doi
|
|
|
|
|
|
|
|
|
|
|
|
class PylonClient(AioThing):
|
|
|
|
def __init__(self, proxy: Optional[str] = None, resolve_proxy: Optional[str] = None):
|
|
|
|
super().__init__()
|
|
|
|
self.proxy = proxy
|
|
|
|
self.resolve_proxy = resolve_proxy
|
|
|
|
|
|
|
|
async def by_doi(
|
|
|
|
self,
|
|
|
|
doi: str,
|
|
|
|
md5: Optional[str] = None,
|
2021-04-12 19:47:21 +02:00
|
|
|
error_log_func: Callable = error_log,
|
2021-01-08 21:08:47 +01:00
|
|
|
) -> AsyncIterable[FileResponsePb]:
|
|
|
|
sources = []
|
|
|
|
sources.extend(get_specific_sources_for_doi(doi, proxy=self.proxy, resolve_proxy=self.resolve_proxy))
|
|
|
|
sources.extend([
|
2021-02-21 11:48:18 +01:00
|
|
|
SciHubDoSource(doi=doi, md5=md5, proxy=self.proxy, resolve_proxy=self.resolve_proxy),
|
2021-01-08 21:08:47 +01:00
|
|
|
SciHubSeSource(doi=doi, md5=md5, proxy=self.proxy, resolve_proxy=self.resolve_proxy),
|
|
|
|
LibgenDoiSource(doi=doi, md5=md5, proxy=self.proxy, resolve_proxy=self.resolve_proxy),
|
|
|
|
])
|
|
|
|
sources = filter(lambda x: x.is_enabled, sources)
|
2021-04-12 19:47:21 +02:00
|
|
|
async for resp in self.download(sources=sources, error_log_func=error_log_func):
|
2021-01-08 21:08:47 +01:00
|
|
|
yield resp
|
|
|
|
|
|
|
|
async def by_md5(
|
|
|
|
self,
|
|
|
|
md5: str,
|
2021-04-12 19:47:21 +02:00
|
|
|
error_log_func: Callable = error_log,
|
2021-01-08 21:08:47 +01:00
|
|
|
) -> AsyncIterable[FileResponsePb]:
|
|
|
|
sources = filter(lambda x: x.is_enabled, [
|
|
|
|
LibraryLolSource(md5=md5, proxy=self.proxy, resolve_proxy=self.resolve_proxy),
|
|
|
|
LibgenMd5Source(md5=md5, proxy=self.proxy, resolve_proxy=self.resolve_proxy),
|
|
|
|
])
|
2021-04-12 19:47:21 +02:00
|
|
|
async for resp in self.download(sources=sources, error_log_func=error_log_func):
|
2021-01-08 21:08:47 +01:00
|
|
|
yield resp
|
|
|
|
|
2021-04-12 19:47:21 +02:00
|
|
|
async def download_source(self, source, error_log_func: Callable = error_log) -> AsyncIterable[FileResponsePb]:
|
|
|
|
yield FileResponsePb(status=FileResponsePb.Status.RESOLVING, source=source.base_url)
|
|
|
|
async for prepared_file_request in source.resolve(error_log_func=error_log_func):
|
|
|
|
try:
|
|
|
|
async for resp in source.execute_prepared_file_request(prepared_file_request=prepared_file_request):
|
2021-01-08 21:08:47 +01:00
|
|
|
yield resp
|
|
|
|
return
|
2021-04-12 19:47:21 +02:00
|
|
|
except DownloadError as e:
|
|
|
|
error_log_func(e)
|
|
|
|
continue
|
|
|
|
raise DownloadError(error='not_found', source=str(source))
|
2021-01-08 21:08:47 +01:00
|
|
|
|
2021-04-12 19:47:21 +02:00
|
|
|
async def download(self, sources: Iterable[BaseSource], error_log_func: Callable = error_log) -> AsyncIterable[FileResponsePb]:
|
2021-01-08 21:08:47 +01:00
|
|
|
for source in sources:
|
|
|
|
try:
|
|
|
|
await source.start()
|
2021-04-12 19:47:21 +02:00
|
|
|
async for resp in self.download_source(source, error_log_func=error_log_func):
|
2021-01-08 21:08:47 +01:00
|
|
|
yield resp
|
|
|
|
return
|
|
|
|
except DownloadError as e:
|
2021-04-12 19:47:21 +02:00
|
|
|
error_log_func(e)
|
2021-01-08 21:08:47 +01:00
|
|
|
continue
|
|
|
|
finally:
|
|
|
|
await source.stop()
|
|
|
|
raise NotFoundError()
|