From a78e3a57951893a1b885d6c478d09d279101f6a2 Mon Sep 17 00:00:00 2001 From: siikamiika Date: Wed, 5 Aug 2020 01:02:23 +0300 Subject: [PATCH] support youtube live chat replay --- youtube_dl/downloader/__init__.py | 2 + youtube_dl/downloader/youtube_live_chat.py | 88 ++++++++++++++++++++++ youtube_dl/extractor/youtube.py | 8 ++ 3 files changed, 98 insertions(+) create mode 100644 youtube_dl/downloader/youtube_live_chat.py diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index 2e485df9d..4ae81f516 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -8,6 +8,7 @@ from .dash import DashSegmentsFD from .rtsp import RtspFD from .ism import IsmFD +from .youtube_live_chat import YoutubeLiveChatReplayFD from .external import ( get_external_downloader, FFmpegFD, @@ -26,6 +27,7 @@ 'f4m': F4mFD, 'http_dash_segments': DashSegmentsFD, 'ism': IsmFD, + 'youtube_live_chat_replay': YoutubeLiveChatReplayFD, } diff --git a/youtube_dl/downloader/youtube_live_chat.py b/youtube_dl/downloader/youtube_live_chat.py new file mode 100644 index 000000000..64d1d20b2 --- /dev/null +++ b/youtube_dl/downloader/youtube_live_chat.py @@ -0,0 +1,88 @@ +from __future__ import division, unicode_literals + +import re +import json + +from .fragment import FragmentFD + + +class YoutubeLiveChatReplayFD(FragmentFD): + """ Downloads YouTube live chat replays fragment by fragment """ + + FD_NAME = 'youtube_live_chat_replay' + + def real_download(self, filename, info_dict): + video_id = info_dict['video_id'] + self.to_screen('[%s] Downloading live chat' % self.FD_NAME) + + test = self.params.get('test', False) + + ctx = { + 'filename': filename, + 'live': True, + 'total_frags': None, + } + + def dl_fragment(url): + headers = info_dict.get('http_headers', {}) + return self._download_fragment(ctx, url, info_dict, headers) + + def parse_yt_initial_data(data): + raw_json = re.search(b'window\["ytInitialData"\]\s*=\s*(.*);', data).group(1) + return json.loads(raw_json) + + self._prepare_and_start_frag_download(ctx) + + success, raw_fragment = dl_fragment( + 'https://www.youtube.com/watch?v={}'.format(video_id)) + if not success: + return False + data = parse_yt_initial_data(raw_fragment) + continuation_id = data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'] + # no data yet but required to call _append_fragment + self._append_fragment(ctx, b'') + + first = True + offset = None + while continuation_id is not None: + data = None + if first: + url = 'https://www.youtube.com/live_chat_replay?continuation={}'.format(continuation_id) + success, raw_fragment = dl_fragment(url) + if not success: + return False + data = parse_yt_initial_data(raw_fragment) + else: + url = ('https://www.youtube.com/live_chat_replay/get_live_chat_replay' + + '?continuation={}'.format(continuation_id) + + '&playerOffsetMs={}'.format(offset - 5000) + + '&hidden=false' + + '&pbj=1') + success, raw_fragment = dl_fragment(url) + if not success: + return False + data = json.loads(raw_fragment)['response'] + + first = False + continuation_id = None + + live_chat_continuation = data['continuationContents']['liveChatContinuation'] + offset = None + processed_fragment = bytearray() + if 'actions' in live_chat_continuation: + for action in live_chat_continuation['actions']: + if 'replayChatItemAction' in action: + replay_chat_item_action = action['replayChatItemAction'] + offset = int(replay_chat_item_action['videoOffsetTimeMsec']) + processed_fragment.extend( + json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n') + continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation'] + + self._append_fragment(ctx, processed_fragment) + + if test or offset is None: + break + + self._finish_frag_download(ctx) + + return True diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b35bf03aa..e554702e7 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1462,6 +1462,14 @@ def _get_subtitles(self, video_id, webpage): 'ext': ext, }) sub_lang_list[lang] = sub_formats + # TODO check that live chat replay actually exists + sub_lang_list['live_chat'] = [ + { + 'video_id': video_id, + 'ext': 'json', + 'protocol': 'youtube_live_chat_replay', + }, + ] if not sub_lang_list: self._downloader.report_warning('video doesn\'t have subtitles') return {}