[BilibiliChannel] Fix pagination

Closes #222

ccca21d7f5
Coauthored by: nao20010128nao, pukkandan
This commit is contained in:
pukkandan 2021-04-22 04:13:04 +05:30
parent f4536226c1
commit 6efb071135
No known key found for this signature in database
GPG Key ID: 0F00D95A001F4698

View File

@ -2,6 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import hashlib import hashlib
import itertools
import json import json
import re import re
@ -498,28 +499,40 @@ def _real_extract(self, url):
class BilibiliChannelIE(InfoExtractor): class BilibiliChannelIE(InfoExtractor):
_VALID_URL = r'https?://space.bilibili\.com/(?P<id>\d+)' _VALID_URL = r'https?://space.bilibili\.com/(?P<id>\d+)'
# May need to add support for pagination? Need to find a user with many video uploads to test _API_URL = "https://api.bilibili.com/x/space/arc/search?mid=%s&pn=%d&jsonp=jsonp"
_API_URL = "https://api.bilibili.com/x/space/arc/search?mid=%s&pn=1&ps=25&jsonp=jsonp" _TESTS = [{
_TEST = {} # TODO: Add tests 'url': 'https://space.bilibili.com/3985676/video',
'info_dict': {},
'playlist_mincount': 112,
}]
def _entries(self, list_id):
count, max_count = 0, None
for page_num in itertools.count(1):
data = self._parse_json(
self._download_webpage(
self._API_URL % (list_id, page_num), list_id,
note='Downloading page %d' % page_num),
list_id)['data']
max_count = max_count or try_get(data, lambda x: x['page']['count'])
entries = try_get(data, lambda x: x['list']['vlist'])
if not entries:
return
for entry in entries:
yield self.url_result(
'https://www.bilibili.com/video/%s' % entry['bvid'],
BiliBiliIE.ie_key(), entry['bvid'])
count += len(entries)
if max_count and count >= max_count:
return
def _real_extract(self, url): def _real_extract(self, url):
list_id = self._match_id(url) list_id = self._match_id(url)
json_str = self._download_webpage(self._API_URL % list_id, "None") return self.playlist_result(self._entries(list_id), list_id)
json_parsed = json.loads(json_str)
entries = [{
'_type': 'url',
'ie_key': BiliBiliIE.ie_key(),
'url': ('https://www.bilibili.com/video/%s' %
entry['bvid']),
'id': entry['bvid'],
} for entry in json_parsed['data']['list']['vlist']]
return {
'_type': 'playlist',
'id': list_id,
'entries': entries
}
class BiliBiliSearchIE(SearchInfoExtractor): class BiliBiliSearchIE(SearchInfoExtractor):