[ie/niconico] Directly download live timeshift videos; WebSocket fixes

Major changes:

- Make a downloader for live timeshift videos. Time-based download rate
  limit applies. RetryManager-based error recovery applies.
- Fix the incorrect url for WebSocket reconnection.
- Correctly close the WebSocket connection.
- [!] Apply "FFmpegFixupM3u8PP" for both non-timeshift and timeshift
  MPEG-TS files by adding "m3u8_*" prefixes and inheriting from "HlsFD".
- [!] Change the protocol from "hls+fmp4" to "hls" in "startWatching"
  WebSocket requests because I didn't see it in my test.

Minor changes:

- Support metadata extraction when no formats.
- Set "live_status" instead of "is_live".
- Clean up "info_dict": Change WebSocket configs to private to hide them
  from users; extract common fields and remove unused ones.
- Update a download test.
This commit is contained in:
Mozi 2024-03-10 02:17:57 +00:00
parent 263a4b55ac
commit 16974726a4
3 changed files with 229 additions and 79 deletions

View File

@ -30,7 +30,7 @@ from .hls import HlsFD
from .http import HttpFD from .http import HttpFD
from .ism import IsmFD from .ism import IsmFD
from .mhtml import MhtmlFD from .mhtml import MhtmlFD
from .niconico import NiconicoDmcFD, NiconicoLiveFD from .niconico import NiconicoDmcFD, NiconicoLiveFD, NiconicoLiveTimeshiftFD
from .rtmp import RtmpFD from .rtmp import RtmpFD
from .rtsp import RtspFD from .rtsp import RtspFD
from .websocket import WebSocketFragmentFD from .websocket import WebSocketFragmentFD
@ -50,7 +50,8 @@ PROTOCOL_MAP = {
'ism': IsmFD, 'ism': IsmFD,
'mhtml': MhtmlFD, 'mhtml': MhtmlFD,
'niconico_dmc': NiconicoDmcFD, 'niconico_dmc': NiconicoDmcFD,
'niconico_live': NiconicoLiveFD, 'm3u8_niconico_live': NiconicoLiveFD,
'm3u8_niconico_live_timeshift': NiconicoLiveTimeshiftFD,
'fc2_live': FC2LiveFD, 'fc2_live': FC2LiveFD,
'websocket_frag': WebSocketFragmentFD, 'websocket_frag': WebSocketFragmentFD,
'youtube_live_chat': YoutubeLiveChatFD, 'youtube_live_chat': YoutubeLiveChatFD,

View File

@ -1,12 +1,23 @@
import contextlib
import json import json
import math
import threading import threading
import time import time
from . import get_suitable_downloader from . import get_suitable_downloader
from .common import FileDownloader from .common import FileDownloader
from .external import FFmpegFD from .external import FFmpegFD
from ..downloader.hls import HlsFD
from ..networking import Request from ..networking import Request
from ..utils import DownloadError, str_or_none, try_get from ..networking.exceptions import RequestError
from ..utils import (
DownloadError,
RetryManager,
str_or_none,
traverse_obj,
try_get,
urljoin,
)
class NiconicoDmcFD(FileDownloader): class NiconicoDmcFD(FileDownloader):
@ -56,34 +67,33 @@ class NiconicoDmcFD(FileDownloader):
return success return success
class NiconicoLiveFD(FileDownloader): class NiconicoLiveBaseFD(FileDownloader):
""" Downloads niconico live without being stopped """ _WEBSOCKET_RECONNECT_DELAY = 10
def real_download(self, filename, info_dict): @contextlib.contextmanager
video_id = info_dict['video_id'] def _ws_context(self, info_dict):
ws_url = info_dict['url'] """ Hold a WebSocket object and release it when leaving """
ws_extractor = info_dict['ws']
ws_origin_host = info_dict['origin']
live_quality = info_dict.get('live_quality', 'high')
live_latency = info_dict.get('live_latency', 'high')
dl = FFmpegFD(self.ydl, self.params or {})
new_info_dict = info_dict.copy() video_id = info_dict['id']
new_info_dict.update({ live_latency = info_dict['live_latency']
'protocol': 'm3u8', self.ws = info_dict['__ws']
})
self.m3u8_lock = threading.Event()
self.m3u8_url = info_dict['manifest_url']
self.m3u8_lock.set()
def communicate_ws(reconnect): def communicate_ws(reconnect):
if reconnect: if reconnect:
ws = self.ydl.urlopen(Request(ws_url, headers={'Origin': f'https://{ws_origin_host}'})) self.ws = self.ydl.urlopen(Request(
self.ws.url, headers={'Origin': self.ws.wsw.request.headers['Origin']}))
if self.ydl.params.get('verbose', False): if self.ydl.params.get('verbose', False):
self.to_screen('[debug] Sending startWatching request') self.to_screen('[debug] Sending startWatching request')
ws.send(json.dumps({ self.ws.send(json.dumps({
'type': 'startWatching', 'type': 'startWatching',
'data': { 'data': {
'stream': { 'stream': {
'quality': live_quality, 'quality': 'abr',
'protocol': 'hls+fmp4', 'protocol': 'hls',
'latency': live_latency, 'latency': live_latency,
'chasePlay': False 'chasePlay': False
}, },
@ -94,11 +104,9 @@ class NiconicoLiveFD(FileDownloader):
'reconnect': True, 'reconnect': True,
} }
})) }))
else: with self.ws:
ws = ws_extractor
with ws:
while True: while True:
recv = ws.recv() recv = self.ws.recv()
if not recv: if not recv:
continue continue
data = json.loads(recv) data = json.loads(recv)
@ -106,35 +114,155 @@ class NiconicoLiveFD(FileDownloader):
continue continue
if data.get('type') == 'ping': if data.get('type') == 'ping':
# pong back # pong back
ws.send(r'{"type":"pong"}') self.ws.send(r'{"type":"pong"}')
ws.send(r'{"type":"keepSeat"}') self.ws.send(r'{"type":"keepSeat"}')
elif data.get('type') == 'stream':
self.m3u8_url = data['data']['uri']
self.m3u8_lock.set()
elif data.get('type') == 'disconnect': elif data.get('type') == 'disconnect':
self.write_debug(data) self.write_debug(data)
return True return
elif data.get('type') == 'error': elif data.get('type') == 'error':
self.write_debug(data) self.write_debug(data)
message = try_get(data, lambda x: x['body']['code'], str) or recv message = try_get(data, lambda x: x['body']['code'], str) or recv
return DownloadError(message) raise DownloadError(message)
elif self.ydl.params.get('verbose', False): elif self.ydl.params.get('verbose', False):
if len(recv) > 100: if len(recv) > 100:
recv = recv[:100] + '...' recv = recv[:100] + '...'
self.to_screen('[debug] Server said: %s' % recv) self.to_screen('[debug] Server said: %s' % recv)
stopped = threading.Event()
def ws_main(): def ws_main():
reconnect = False reconnect = False
while True: while not stopped.is_set():
try: try:
ret = communicate_ws(reconnect) communicate_ws(reconnect)
if ret is True: break # Disconnected
return except BaseException as e: # Including TransportError
except BaseException as e: if stopped.is_set():
self.to_screen('[%s] %s: Connection error occured, reconnecting after 10 seconds: %s' % ('niconico:live', video_id, str_or_none(e))) break
time.sleep(10)
continue self.m3u8_lock.clear() # m3u8 url may be changed
finally:
self.to_screen('[%s] %s: Connection error occured, reconnecting after %d seconds: %s' % ('niconico:live', video_id, self._WEBSOCKET_RECONNECT_DELAY, str_or_none(e)))
time.sleep(self._WEBSOCKET_RECONNECT_DELAY)
reconnect = True reconnect = True
self.m3u8_lock.set() # Release possible locks
thread = threading.Thread(target=ws_main, daemon=True) thread = threading.Thread(target=ws_main, daemon=True)
thread.start() thread.start()
return dl.download(filename, new_info_dict) try:
yield self
finally:
stopped.set()
self.ws.close()
thread.join()
def _master_m3u8_url(self):
""" Get the refreshed manifest url after WebSocket reconnection to prevent HTTP 403 """
self.m3u8_lock.wait()
return self.m3u8_url
class NiconicoLiveFD(NiconicoLiveBaseFD):
""" Downloads niconico live without being stopped """
def real_download(self, filename, info_dict):
with self._ws_context(info_dict):
new_info_dict = info_dict.copy()
new_info_dict.update({
'protocol': 'm3u8',
})
return FFmpegFD(self.ydl, self.params or {}).download(filename, new_info_dict)
class NiconicoLiveTimeshiftFD(NiconicoLiveBaseFD, HlsFD):
""" Downloads niconico live timeshift VOD """
_PER_FRAGMENT_DOWNLOAD_RATIO = 0.1
def real_download(self, filename, info_dict):
with self._ws_context(info_dict) as ws_context:
from ..extractor.niconico import NiconicoIE
ie = NiconicoIE(self.ydl)
video_id = info_dict['id']
# Get format index
for format_index, fmt in enumerate(info_dict['formats']):
if fmt['format_id'] == info_dict['format_id']:
break
# Get video info
total_duration = 0
fragment_duration = 0
for line in ie._download_webpage(info_dict['url'], video_id, note='Downloading m3u8').splitlines():
if '#STREAM-DURATION' in line:
total_duration = int(float(line.split(':')[1]))
if '#EXT-X-TARGETDURATION' in line:
fragment_duration = int(line.split(':')[1])
if not all({total_duration, fragment_duration}):
raise DownloadError('Unable to get required video info')
ctx = {
'filename': filename,
'total_frags': math.ceil(total_duration / fragment_duration),
}
self._prepare_and_start_frag_download(ctx, info_dict)
downloaded_duration = ctx['fragment_index'] * fragment_duration
while True:
if downloaded_duration > total_duration:
break
retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry)
for retry in retry_manager:
try:
# Refresh master m3u8 (if possible) and get the url of the previously-chose format
master_m3u8_url = ws_context._master_m3u8_url()
formats = ie._extract_m3u8_formats(
master_m3u8_url, video_id, query={"start": downloaded_duration}, live=False, note=False, fatal=False)
media_m3u8_url = traverse_obj(formats, (format_index, {dict}, 'url'), get_all=False)
if not media_m3u8_url:
raise DownloadError('Unable to get playlist')
# Get all fragments
media_m3u8 = ie._download_webpage(media_m3u8_url, video_id, note=False)
fragment_urls = traverse_obj(media_m3u8.splitlines(), (
lambda _, v: not v.startswith('#'), {lambda url: urljoin(media_m3u8_url, url)}))
with self.DurationLimiter(len(fragment_urls) * fragment_duration * self._PER_FRAGMENT_DOWNLOAD_RATIO):
for fragment_url in fragment_urls:
success = self._download_fragment(ctx, fragment_url, info_dict)
if not success:
return False
self._append_fragment(ctx, self._read_fragment(ctx))
downloaded_duration += fragment_duration
except (DownloadError, RequestError) as err: # Including HTTPError and TransportError
retry.error = err
continue
if retry_manager.error:
return False
return self._finish_frag_download(ctx, info_dict)
class DurationLimiter():
def __init__(self, target):
self.target = target
def __enter__(self):
self.start = time.time()
def __exit__(self, *exc):
remaining = self.target - (time.time() - self.start)
if remaining > 0:
time.sleep(remaining)

View File

@ -919,17 +919,30 @@ class NiconicoLiveIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'lv339533123', 'id': 'lv339533123',
'title': '激辛ペヤング食べます‪( ;ᯅ; )‬(歌枠オーディション参加中)', 'title': '激辛ペヤング食べます‪( ;ᯅ; )‬(歌枠オーディション参加中)',
'view_count': 1526, 'view_count': int,
'comment_count': 1772, 'comment_count': int,
'description': '初めましてもかって言います❕\nのんびり自由に適当に暮らしてます', 'description': '初めましてもかって言います❕\nのんびり自由に適当に暮らしてます',
'uploader': 'もか', 'uploader': 'もか',
'channel': 'ゲストさんのコミュニティ', 'channel': 'ゲストさんのコミュニティ',
'channel_id': 'co5776900', 'channel_id': 'co5776900',
'channel_url': 'https://com.nicovideo.jp/community/co5776900', 'channel_url': 'https://com.nicovideo.jp/community/co5776900',
'timestamp': 1670677328, 'timestamp': 1670677328,
'is_live': True, 'ext': None,
'live_latency': 'high',
'live_status': 'was_live',
'thumbnail': r're:^https://[\w.-]+/\w+/\w+',
'thumbnails': list,
'upload_date': '20221210',
}, },
'skip': 'livestream', 'params': {
'skip_download': True,
'ignore_no_formats_error': True,
},
'expected_warnings': [
'The live hasn\'t started yet or already ended.',
'No video formats found!',
'Requested format is not available',
],
}, { }, {
'url': 'https://live2.nicovideo.jp/watch/lv339533123', 'url': 'https://live2.nicovideo.jp/watch/lv339533123',
'only_matching': True, 'only_matching': True,
@ -943,36 +956,14 @@ class NiconicoLiveIE(InfoExtractor):
_KNOWN_LATENCY = ('high', 'low') _KNOWN_LATENCY = ('high', 'low')
def _real_extract(self, url): def _yield_formats(self, ws, video_id, latency, is_live):
video_id = self._match_id(url)
webpage, urlh = self._download_webpage_handle(f'https://live.nicovideo.jp/watch/{video_id}', video_id)
embedded_data = self._parse_json(unescapeHTML(self._search_regex(
r'<script\s+id="embedded-data"\s*data-props="(.+?)"', webpage, 'embedded data')), video_id)
ws_url = traverse_obj(embedded_data, ('site', 'relive', 'webSocketUrl'))
if not ws_url:
raise ExtractorError('The live hasn\'t started yet or already ended.', expected=True)
ws_url = update_url_query(ws_url, {
'frontend_id': traverse_obj(embedded_data, ('site', 'frontendId')) or '9',
})
hostname = remove_start(urlparse(urlh.url).hostname, 'sp.')
latency = try_get(self._configuration_arg('latency'), lambda x: x[0])
if latency not in self._KNOWN_LATENCY:
latency = 'high'
ws = self._request_webpage(
Request(ws_url, headers={'Origin': f'https://{hostname}'}),
video_id=video_id, note='Connecting to WebSocket server')
self.write_debug('[debug] Sending HLS server request') self.write_debug('[debug] Sending HLS server request')
ws.send(json.dumps({ ws.send(json.dumps({
'type': 'startWatching', 'type': 'startWatching',
'data': { 'data': {
'stream': { 'stream': {
'quality': 'abr', 'quality': 'abr',
'protocol': 'hls+fmp4', 'protocol': 'hls',
'latency': latency, 'latency': latency,
'chasePlay': False 'chasePlay': False
}, },
@ -1007,6 +998,35 @@ class NiconicoLiveIE(InfoExtractor):
recv = recv[:100] + '...' recv = recv[:100] + '...'
self.write_debug('Server said: %s' % recv) self.write_debug('Server said: %s' % recv)
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=is_live)
for fmt, q in zip(formats, reversed(qualities[1:])):
fmt.update({
'format_id': q,
'protocol': 'm3u8_niconico_live' if is_live else 'm3u8_niconico_live_timeshift',
})
yield fmt
def _real_extract(self, url):
video_id = self._match_id(url)
webpage, urlh = self._download_webpage_handle(f'https://live.nicovideo.jp/watch/{video_id}', video_id)
embedded_data = self._parse_json(unescapeHTML(self._search_regex(
r'<script\s+id="embedded-data"\s*data-props="(.+?)"', webpage, 'embedded data')), video_id)
ws = None
ws_url = traverse_obj(embedded_data, ('site', 'relive', 'webSocketUrl'))
if ws_url:
ws_url = update_url_query(ws_url, {
'frontend_id': traverse_obj(embedded_data, ('site', 'frontendId')) or '9',
})
hostname = remove_start(urlparse(urlh.url).hostname, 'sp.')
ws = self._request_webpage(
Request(ws_url, headers={'Origin': f'https://{hostname}'}),
video_id=video_id, note='Connecting to WebSocket server')
else:
self.raise_no_formats('The live hasn\'t started yet or already ended.', expected=True)
title = traverse_obj(embedded_data, ('program', 'title')) or self._html_search_meta( title = traverse_obj(embedded_data, ('program', 'title')) or self._html_search_meta(
('og:title', 'twitter:title'), webpage, 'live title', fatal=False) ('og:title', 'twitter:title'), webpage, 'live title', fatal=False)
@ -1031,16 +1051,15 @@ class NiconicoLiveIE(InfoExtractor):
**res, **res,
}) })
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True) live_status = {
for fmt, q in zip(formats, reversed(qualities[1:])): 'Before': 'is_live',
fmt.update({ 'Open': 'was_live',
'format_id': q, 'End': 'was_live',
'protocol': 'niconico_live', }.get(traverse_obj(embedded_data, ('programTimeshift', 'publication', 'status', {str})), 'is_live')
'ws': ws,
'video_id': video_id, latency = try_get(self._configuration_arg('latency'), lambda x: x[0])
'live_latency': latency, if latency not in self._KNOWN_LATENCY:
'origin': hostname, latency = 'high'
})
return { return {
'id': video_id, 'id': video_id,
@ -1055,7 +1074,9 @@ class NiconicoLiveIE(InfoExtractor):
}), }),
'description': clean_html(traverse_obj(embedded_data, ('program', 'description'))), 'description': clean_html(traverse_obj(embedded_data, ('program', 'description'))),
'timestamp': int_or_none(traverse_obj(embedded_data, ('program', 'openTime'))), 'timestamp': int_or_none(traverse_obj(embedded_data, ('program', 'openTime'))),
'is_live': True, 'live_status': live_status,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'formats': formats, 'formats': [*self._yield_formats(ws, video_id, latency, live_status == 'is_live')] if ws else None,
'live_latency': latency,
'__ws': ws,
} }