From 2325d03aa7bb80f56ba52cd6992258e44727b424 Mon Sep 17 00:00:00 2001 From: JC-Chung <52159296+JC-Chung@users.noreply.github.com> Date: Thu, 16 Nov 2023 07:23:18 +0800 Subject: [PATCH] [ie/twitcasting] Fix livestream detection (#8574) Authored by: JC-Chung --- yt_dlp/extractor/twitcasting.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py index 32a38c86e..85c7b20de 100644 --- a/yt_dlp/extractor/twitcasting.py +++ b/yt_dlp/extractor/twitcasting.py @@ -11,6 +11,7 @@ float_or_none, get_element_by_class, get_element_by_id, + int_or_none, parse_duration, qualities, str_to_int, @@ -241,6 +242,8 @@ class TwitCastingLiveIE(InfoExtractor): 'expected_exception': 'UserNotLive', }] + _PROTECTED_LIVE_RE = r'(?s)(\s*LIVE)' + def _real_extract(self, url): uploader_id = self._match_id(url) self.to_screen( @@ -248,24 +251,27 @@ def _real_extract(self, url): 'Pass "https://twitcasting.tv/{0}/show" to download the history'.format(uploader_id)) webpage = self._download_webpage(url, uploader_id) - current_live = self._search_regex( - (r'data-type="movie" data-id="(\d+)">', - r'tw-sound-flag-open-link" data-id="(\d+)" style=',), - webpage, 'current live ID', default=None) - if not current_live: + is_live = self._search_regex( # first pattern is for public live + (r'(data-is-onlive="true")', self._PROTECTED_LIVE_RE), webpage, 'is live?', default=None) + current_live = int_or_none(self._search_regex( + (r'data-type="movie" data-id="(\d+)">', # not available? + r'tw-sound-flag-open-link" data-id="(\d+)" style=', # not available? + r'data-movie-id="(\d+)"'), # if not currently live, value may be 0 + webpage, 'current live ID', default=None)) + if is_live and not current_live: # fetch unfiltered /show to find running livestreams; we can't get ID of the password-protected livestream above webpage = self._download_webpage( f'https://twitcasting.tv/{uploader_id}/show/', uploader_id, note='Downloading live history') - is_live = self._search_regex(r'(?s)(\s*LIVE)', webpage, 'is live?', default=None) + is_live = self._search_regex(self._PROTECTED_LIVE_RE, webpage, 'is live?', default=None) if is_live: # get the first live; running live is always at the first current_live = self._search_regex( - r'(?s)\d+)"\s*>.+?', + r'(?s)\d+)"\s*>.+?', webpage, 'current live ID 2', default=None, group='video_id') if not current_live: raise UserNotLive(video_id=uploader_id) - return self.url_result('https://twitcasting.tv/%s/movie/%s' % (uploader_id, current_live)) + return self.url_result(f'https://twitcasting.tv/{uploader_id}/movie/{current_live}', TwitCastingIE) class TwitCastingUserIE(InfoExtractor):