[extractor/vk] Fix extractor (#4128)

Closes #4437
Authored by: Mehavoid
This commit is contained in:
Mehavoid 2022-07-27 23:31:03 +03:00 committed by GitHub
parent bfbb5a1bb1
commit 59f63c8f0f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,11 +1,17 @@
import collections
import hashlib
import re
from .common import InfoExtractor
from .dailymotion import DailymotionIE
from .odnoklassniki import OdnoklassnikiIE
from .pladform import PladformIE
from .vimeo import VimeoIE
from .youtube import YoutubeIE
from ..compat import compat_urlparse
from ..utils import (
clean_html,
ExtractorError,
clean_html,
get_element_by_class,
int_or_none,
orderedSet,
@ -13,19 +19,29 @@
str_to_int,
unescapeHTML,
unified_timestamp,
update_url_query,
url_or_none,
urlencode_postdata,
)
from .dailymotion import DailymotionIE
from .odnoklassniki import OdnoklassnikiIE
from .pladform import PladformIE
from .vimeo import VimeoIE
from .youtube import YoutubeIE
class VKBaseIE(InfoExtractor):
_NETRC_MACHINE = 'vk'
def _download_webpage_handle(self, url_or_request, video_id, *args, fatal=True, **kwargs):
response = super()._download_webpage_handle(url_or_request, video_id, *args, fatal=fatal, **kwargs)
challenge_url, cookie = response[1].geturl() if response else '', None
if challenge_url.startswith('https://vk.com/429.html?'):
cookie = self._get_cookies(challenge_url).get('hash429')
if not cookie:
return response
hash429 = hashlib.md5(cookie.value.encode('ascii')).hexdigest()
self._request_webpage(
update_url_query(challenge_url, {'key': hash429}), video_id, fatal=fatal,
note='Resolving WAF challenge', errnote='Failed to bypass WAF challenge')
return super()._download_webpage_handle(url_or_request, video_id, *args, fatal=True, **kwargs)
def _perform_login(self, username, password):
login_page, url_handle = self._download_webpage_handle(
'https://vk.com', None, 'Downloading login page')
@ -51,11 +67,14 @@ def _perform_login(self, username, password):
'Unable to login, incorrect username and/or password', expected=True)
def _download_payload(self, path, video_id, data, fatal=True):
endpoint = f'https://vk.com/{path}.php'
data['al'] = 1
code, payload = self._download_json(
'https://vk.com/%s.php' % path, video_id,
data=urlencode_postdata(data), fatal=fatal,
headers={'X-Requested-With': 'XMLHttpRequest'})['payload']
endpoint, video_id, data=urlencode_postdata(data), fatal=fatal,
headers={
'Referer': endpoint,
'X-Requested-With': 'XMLHttpRequest',
})['payload']
if code == '3':
self.raise_login_required()
elif code == '8':
@ -84,17 +103,20 @@ class VKIE(VKBaseIE):
_TESTS = [
{
'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
'md5': '7babad3b85ea2e91948005b1b8b0cb84',
'info_dict': {
'id': '-77521_162222515',
'ext': 'mp4',
'title': 'ProtivoGunz - Хуёвая песня',
'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
'uploader_id': '-77521',
'uploader_id': '39545378',
'duration': 195,
'timestamp': 1329049880,
'upload_date': '20120212',
'comment_count': int,
'like_count': int,
'thumbnail': r're:https?://.+\.jpg$',
},
'params': {'skip_download': 'm3u8'},
},
{
'url': 'http://vk.com/video205387401_165548505',
@ -107,12 +129,14 @@ class VKIE(VKBaseIE):
'duration': 9,
'timestamp': 1374364108,
'upload_date': '20130720',
'comment_count': int,
'like_count': int,
'thumbnail': r're:https?://.+\.jpg$',
}
},
{
'note': 'Embedded video',
'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa',
'md5': '7babad3b85ea2e91948005b1b8b0cb84',
'info_dict': {
'id': '-77521_162222515',
'ext': 'mp4',
@ -121,8 +145,10 @@ class VKIE(VKBaseIE):
'duration': 195,
'upload_date': '20120212',
'timestamp': 1329049880,
'uploader_id': '-77521',
'uploader_id': '39545378',
'thumbnail': r're:https?://.+\.jpg$',
},
'params': {'skip_download': 'm3u8'},
},
{
# VIDEO NOW REMOVED
@ -176,8 +202,13 @@ class VKIE(VKBaseIE):
'ext': 'mp4',
'title': '8 серия (озвучка)',
'duration': 8383,
'comment_count': int,
'uploader': 'Dizi2021',
'like_count': int,
'timestamp': 1640162189,
'upload_date': '20211222',
'view_count': int,
'uploader_id': '-93049196',
'thumbnail': r're:https?://.+\.jpg$',
},
},
{
@ -204,10 +235,23 @@ class VKIE(VKBaseIE):
'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
'duration': 178,
'upload_date': '20130116',
'upload_date': '20130117',
'uploader': "Children's Joy Foundation Inc.",
'uploader_id': 'thecjf',
'view_count': int,
'channel_id': 'UCgzCNQ11TmR9V97ECnhi3gw',
'availability': 'public',
'like_count': int,
'live_status': 'not_live',
'playable_in_embed': True,
'channel': 'Children\'s Joy Foundation Inc.',
'uploader_url': 'http://www.youtube.com/user/thecjf',
'thumbnail': r're:https?://.+\.jpg$',
'tags': 'count:27',
'start_time': 0.0,
'categories': ['Nonprofits & Activism'],
'channel_url': 'https://www.youtube.com/channel/UCgzCNQ11TmR9V97ECnhi3gw',
'age_limit': 0,
},
},
{
@ -223,9 +267,7 @@ class VKIE(VKBaseIE):
'uploader_id': 'x1p5vl5',
'timestamp': 1473877246,
},
'params': {
'skip_download': True,
},
'skip': 'Removed'
},
{
# video key is extra_data not url\d+
@ -240,9 +282,7 @@ class VKIE(VKBaseIE):
'timestamp': 1454859345,
'upload_date': '20160207',
},
'params': {
'skip_download': True,
},
'skip': 'Removed',
},
{
# finished live stream, postlive_mp4
@ -253,11 +293,12 @@ class VKIE(VKBaseIE):
'title': 'ИгроМир 2016 День 1 — Игромания Утром',
'uploader': 'Игромания',
'duration': 5239,
# TODO: use act=show to extract view_count
# 'view_count': int,
'upload_date': '20160929',
'uploader_id': '-387766',
'timestamp': 1475137527,
'thumbnail': r're:https?://.+\.jpg$',
'comment_count': int,
'like_count': int,
},
'params': {
'skip_download': True,
@ -317,7 +358,7 @@ def _real_extract(self, url):
mv_data = {}
if video_id:
data = {
'act': 'show_inline',
'act': 'show',
'video': video_id,
}
# Some videos (removed?) can only be downloaded with list id specified