From 15dfb3929c3eaca897c85dc1ad792df3fbf5ebc7 Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" Date: Fri, 25 Feb 2022 11:16:23 +0900 Subject: [PATCH] [fc2:live] Add extractor (#2418) Authored by: Lesmiscore --- yt_dlp/downloader/__init__.py | 2 + yt_dlp/downloader/fc2.py | 41 +++++++++ yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/fc2.py | 150 +++++++++++++++++++++++++++++++++ yt_dlp/utils.py | 4 +- 5 files changed, 197 insertions(+), 1 deletion(-) create mode 100644 yt_dlp/downloader/fc2.py diff --git a/yt_dlp/downloader/__init__.py b/yt_dlp/downloader/__init__.py index 76841993b..96d484dee 100644 --- a/yt_dlp/downloader/__init__.py +++ b/yt_dlp/downloader/__init__.py @@ -30,6 +30,7 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N from .common import FileDownloader from .dash import DashSegmentsFD from .f4m import F4mFD +from .fc2 import FC2LiveFD from .hls import HlsFD from .http import HttpFD from .rtmp import RtmpFD @@ -58,6 +59,7 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N 'ism': IsmFD, 'mhtml': MhtmlFD, 'niconico_dmc': NiconicoDmcFD, + 'fc2_live': FC2LiveFD, 'websocket_frag': WebSocketFragmentFD, 'youtube_live_chat': YoutubeLiveChatFD, 'youtube_live_chat_replay': YoutubeLiveChatFD, diff --git a/yt_dlp/downloader/fc2.py b/yt_dlp/downloader/fc2.py new file mode 100644 index 000000000..157bcf23e --- /dev/null +++ b/yt_dlp/downloader/fc2.py @@ -0,0 +1,41 @@ +from __future__ import division, unicode_literals + +import threading + +from .common import FileDownloader +from .external import FFmpegFD + + +class FC2LiveFD(FileDownloader): + """ + Downloads FC2 live without being stopped.
+ Note, this is not a part of public API, and will be removed without notice. + DO NOT USE + """ + + def real_download(self, filename, info_dict): + ws = info_dict['ws'] + + heartbeat_lock = threading.Lock() + heartbeat_state = [None, 1] + + def heartbeat(): + try: + heartbeat_state[1] += 1 + ws.send('{"name":"heartbeat","arguments":{},"id":%d}' % heartbeat_state[1]) + except Exception: + self.to_screen('[fc2:live] Heartbeat failed') + + with heartbeat_lock: + heartbeat_state[0] = threading.Timer(30, heartbeat) + heartbeat_state[0]._daemonic = True + heartbeat_state[0].start() + + heartbeat() + + new_info_dict = info_dict.copy() + new_info_dict.update({ + 'ws': None, + 'protocol': 'live_ffmpeg', + }) + return FFmpegFD(self.ydl, self.params or {}).download(filename, new_info_dict) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index fafa56d7b..ef1d6c14d 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -478,6 +478,7 @@ from .fc2 import ( FC2IE, FC2EmbedIE, + FC2LiveIE, ) from .fczenit import FczenitIE from .filmmodu import FilmmoduIE diff --git a/yt_dlp/extractor/fc2.py b/yt_dlp/extractor/fc2.py index 2c19a0c6e..7fc6b0e3d 100644 --- a/yt_dlp/extractor/fc2.py +++ b/yt_dlp/extractor/fc2.py @@ -1,14 +1,21 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import ( compat_parse_qs, ) from ..utils import ( ExtractorError, + WebSocketsWrapper, + has_websockets, + js_to_json, sanitized_Request, + std_headers, traverse_obj, + update_url_query, urlencode_postdata, urljoin, ) @@ -147,3 +154,146 @@ def _real_extract(self, url): 'title': title, 'thumbnail': thumbnail, } + + +class FC2LiveIE(InfoExtractor): + _VALID_URL = r'https?://live\.fc2\.com/(?P\d+)' + IE_NAME = 'fc2:live' + + _TESTS = [{ + 'url': 'https://live.fc2.com/57892267/', + 'info_dict': { + 'id': '57892267', + 'title': 'どこまで・・・', + 'uploader': 'あつあげ', + 'uploader_id': '57892267', + 'thumbnail': r're:https?://.+fc2.+', + }, + 'skip': 'livestream', + }] + + def _real_extract(self, url): + if not has_websockets: + raise ExtractorError('websockets library is not available. Please install it.', expected=True) + video_id = self._match_id(url) + webpage = self._download_webpage('https://live.fc2.com/%s/' % video_id, video_id) + + self._set_cookie('live.fc2.com', 'js-player_size', '1') + + member_api = self._download_json( + 'https://live.fc2.com/api/memberApi.php', video_id, data=urlencode_postdata({ + 'channel': '1', + 'profile': '1', + 'user': '1', + 'streamid': video_id + }), note='Requesting member info') + + control_server = self._download_json( + 'https://live.fc2.com/api/getControlServer.php', video_id, note='Downloading ControlServer data', + data=urlencode_postdata({ + 'channel_id': video_id, + 'mode': 'play', + 'orz': '', + 'channel_version': member_api['data']['channel_data']['version'], + 'client_version': '2.1.0\n [1]', + 'client_type': 'pc', + 'client_app': 'browser_hls', + 'ipv6': '', + }), headers={'X-Requested-With': 'XMLHttpRequest'}) + self._set_cookie('live.fc2.com', 'l_ortkn', control_server['orz_raw']) + + ws_url = update_url_query(control_server['url'], {'control_token': control_server['control_token']}) + playlist_data = None + + self.to_screen('%s: Fetching HLS playlist info via WebSocket' % video_id) + ws = WebSocketsWrapper(ws_url, { + 'Cookie': str(self._get_cookies('https://live.fc2.com/'))[12:], + 'Origin': 'https://live.fc2.com', + 'Accept': '*/*', + 'User-Agent': std_headers['User-Agent'], + }) + ws.__enter__() + + self.write_debug('[debug] Sending HLS server request') + + while True: + recv = ws.recv() + if not recv: + continue + data = self._parse_json(recv, video_id, fatal=False) + if not data or not isinstance(data, dict): + continue + + if data.get('name') == 'connect_complete': + break + ws.send(r'{"name":"get_hls_information","arguments":{},"id":1}') + + while True: + recv = ws.recv() + if not recv: + continue + data = self._parse_json(recv, video_id, fatal=False) + if not data or not isinstance(data, dict): + continue + if data.get('name') == '_response_' and data.get('id') == 1: + self.write_debug('[debug] Goodbye.') + playlist_data = data + break + elif self._downloader.params.get('verbose', False): + if len(recv) > 100: + recv = recv[:100] + '...' + self.to_screen('[debug] Server said: %s' % recv) + + if not playlist_data: + raise ExtractorError('Unable to fetch HLS playlist info via WebSocket') + + formats = [] + for name, playlists in playlist_data['arguments'].items(): + if not isinstance(playlists, list): + continue + for pl in playlists: + if pl.get('status') == 0 and 'master_playlist' in pl.get('url'): + formats.extend(self._extract_m3u8_formats( + pl['url'], video_id, ext='mp4', m3u8_id=name, live=True, + headers={ + 'Origin': 'https://live.fc2.com', + 'Referer': url, + })) + + self._sort_formats(formats) + for fmt in formats: + fmt.update({ + 'protocol': 'fc2_live', + 'ws': ws, + }) + + title = self._html_search_meta(('og:title', 'twitter:title'), webpage, 'live title', fatal=False) + if not title: + title = self._html_extract_title(webpage, 'html title', fatal=False) + if title: + # remove service name in + title = re.sub(r'\s+-\s+.+$', '', title) + uploader = None + if title: + match = self._search_regex(r'^(.+?)\s*\[(.+?)\]$', title, 'title and uploader', default=None, group=(1, 2)) + if match and all(match): + title, uploader = match + + live_info_view = self._search_regex(r'(?s)liveInfoView\s*:\s*({.+?}),\s*premiumStateView', webpage, 'user info', fatal=False) or None + if live_info_view: + # remove jQuery code from object literal + live_info_view = re.sub(r'\$\(.+?\)[^,]+,', '"",', live_info_view) + live_info_view = self._parse_json(js_to_json(live_info_view), video_id) + + return { + 'id': video_id, + 'title': title or traverse_obj(live_info_view, 'title'), + 'description': self._html_search_meta( + ('og:description', 'twitter:description'), + webpage, 'live description', fatal=False) or traverse_obj(live_info_view, 'info'), + 'formats': formats, + 'uploader': uploader or traverse_obj(live_info_view, 'name'), + 'uploader_id': video_id, + 'thumbnail': traverse_obj(live_info_view, 'thumb'), + 'is_live': True, + } diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 012a115ba..6ec8da11b 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import asyncio +import atexit import base64 import binascii import calendar @@ -5348,6 +5349,7 @@ def __init__(self, url, headers=None): self.conn = compat_websockets.connect( url, extra_headers=headers, ping_interval=None, close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf')) + atexit.register(self.__exit__, None, None, None) def __enter__(self): self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop) @@ -5364,7 +5366,7 @@ def __exit__(self, type, value, traceback): return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop) finally: self.loop.close() - self.r_cancel_all_tasks(self.loop) + self._cancel_all_tasks(self.loop) # taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class