[nova:embed] Fix extraction (closes #24700)

This commit is contained in:
Sergey M․ 2020-04-09 03:52:29 +07:00
parent dcc8522fdb
commit 5caf88ccb4
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -6,6 +6,7 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html, clean_html,
determine_ext,
int_or_none, int_or_none,
js_to_json, js_to_json,
qualities, qualities,
@ -33,42 +34,76 @@ def _real_extract(self, url):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
bitrates = self._parse_json( duration = None
self._search_regex(
r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
video_id, transform_source=js_to_json)
QUALITIES = ('lq', 'mq', 'hq', 'hd')
quality_key = qualities(QUALITIES)
formats = [] formats = []
for format_id, format_list in bitrates.items():
if not isinstance(format_list, list): player = self._parse_json(
format_list = [format_list] self._search_regex(
for format_url in format_list: r'Player\.init\s*\([^,]+,\s*({.+?})\s*,\s*{.+?}\s*\)\s*;',
format_url = url_or_none(format_url) webpage, 'player', default='{}'), video_id, fatal=False)
if not format_url: if player:
continue for format_id, format_list in player['tracks'].items():
if format_id == 'hls': if not isinstance(format_list, list):
formats.extend(self._extract_m3u8_formats( format_list = [format_list]
format_url, video_id, ext='mp4', for format_dict in format_list:
entry_protocol='m3u8_native', m3u8_id='hls', if not isinstance(format_dict, dict):
fatal=False)) continue
continue format_url = url_or_none(format_dict.get('src'))
f = { format_type = format_dict.get('type')
'url': format_url, ext = determine_ext(format_url)
} if (format_type == 'application/x-mpegURL'
f_id = format_id or format_id == 'HLS' or ext == 'm3u8'):
for quality in QUALITIES: formats.extend(self._extract_m3u8_formats(
if '%s.mp4' % quality in format_url: format_url, video_id, 'mp4',
f_id += '-%s' % quality entry_protocol='m3u8_native', m3u8_id='hls',
f.update({ fatal=False))
'quality': quality_key(quality), elif (format_type == 'application/dash+xml'
'format_note': quality.upper(), or format_id == 'DASH' or ext == 'mpd'):
formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id='dash', fatal=False))
else:
formats.append({
'url': format_url,
}) })
break duration = int_or_none(player.get('duration'))
f['format_id'] = f_id else:
formats.append(f) # Old path, not actual as of 08.04.2020
bitrates = self._parse_json(
self._search_regex(
r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
video_id, transform_source=js_to_json)
QUALITIES = ('lq', 'mq', 'hq', 'hd')
quality_key = qualities(QUALITIES)
for format_id, format_list in bitrates.items():
if not isinstance(format_list, list):
format_list = [format_list]
for format_url in format_list:
format_url = url_or_none(format_url)
if not format_url:
continue
if format_id == 'hls':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, ext='mp4',
entry_protocol='m3u8_native', m3u8_id='hls',
fatal=False))
continue
f = {
'url': format_url,
}
f_id = format_id
for quality in QUALITIES:
if '%s.mp4' % quality in format_url:
f_id += '-%s' % quality
f.update({
'quality': quality_key(quality),
'format_note': quality.upper(),
})
break
f['format_id'] = f_id
formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)
title = self._og_search_title( title = self._og_search_title(
@ -81,7 +116,8 @@ def _real_extract(self, url):
r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
'thumbnail', fatal=False, group='value') 'thumbnail', fatal=False, group='value')
duration = int_or_none(self._search_regex( duration = int_or_none(self._search_regex(
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False)) r'videoDuration\s*:\s*(\d+)', webpage, 'duration',
default=duration))
return { return {
'id': video_id, 'id': video_id,