From f7c5a5e96756636379a0b1afbeadb08b9c643bef Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 21 Sep 2022 09:12:54 +0000 Subject: [PATCH] [extractor/tiktok] Fix TikTokIE (#4984) Authored by: bashonly --- yt_dlp/extractor/tiktok.py | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index c58538394..4a35a241c 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -25,7 +25,7 @@ class TikTokBaseIE(InfoExtractor): - _APP_VERSIONS = [('20.9.3', '293'), ('20.4.3', '243'), ('20.2.1', '221'), ('20.1.2', '212'), ('20.0.4', '204')] + _APP_VERSIONS = [('26.1.3', '260103'), ('26.1.2', '260102'), ('26.1.1', '260101'), ('25.6.2', '250602')] _WORKING_APP_VERSION = None _APP_NAME = 'trill' _AID = 1180 @@ -33,7 +33,6 @@ class TikTokBaseIE(InfoExtractor): _UPLOADER_URL_FORMAT = 'https://www.tiktok.com/@%s' _WEBPAGE_HOST = 'https://www.tiktok.com/' QUALITIES = ('360p', '540p', '720p', '1080p') - _session_initialized = False @staticmethod def _create_url(user_id, video_id): @@ -43,12 +42,6 @@ def _get_sigi_state(self, webpage, display_id): return self._parse_json(get_element_by_id( 'SIGI_STATE|sigi-persisted-data', webpage, escape_value=False), display_id) - def _real_initialize(self): - if self._session_initialized: - return - self._request_webpage(HEADRequest('https://www.tiktok.com'), None, note='Setting up session', fatal=False) - TikTokBaseIE._session_initialized = True - def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True, note='Downloading API JSON', errnote='Unable to download API page'): self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for _ in range(160))) @@ -289,7 +282,7 @@ def extract_addr(addr, add_meta={}): 'uploader_url': user_url, 'track': music_track, 'album': str_or_none(music_info.get('album')) or None, - 'artist': music_author, + 'artist': music_author or None, 'timestamp': int_or_none(aweme_detail.get('create_time')), 'formats': formats, 'subtitles': self.extract_subtitles(aweme_detail, aweme_id), @@ -522,7 +515,7 @@ class TikTokIE(TikTokBaseIE): 'repost_count': int, 'comment_count': int, }, - 'expected_warnings': ['trying feed workaround', 'Unable to find video in feed'] + 'skip': 'This video is unavailable', }, { # Auto-captions available 'url': 'https://www.tiktok.com/@hankgreen1/video/7047596209028074758', @@ -530,18 +523,11 @@ class TikTokIE(TikTokBaseIE): }] def _extract_aweme_app(self, aweme_id): - try: - aweme_detail = self._call_api('aweme/detail', {'aweme_id': aweme_id}, aweme_id, - note='Downloading video details', errnote='Unable to download video details').get('aweme_detail') - if not aweme_detail: - raise ExtractorError('Video not available', video_id=aweme_id) - except ExtractorError as e: - self.report_warning(f'{e.orig_msg}; trying feed workaround') - feed_list = self._call_api('feed', {'aweme_id': aweme_id}, aweme_id, - note='Downloading video feed', errnote='Unable to download video feed').get('aweme_list') or [] - aweme_detail = next((aweme for aweme in feed_list if str(aweme.get('aweme_id')) == aweme_id), None) - if not aweme_detail: - raise ExtractorError('Unable to find video in feed', video_id=aweme_id) + feed_list = self._call_api('feed', {'aweme_id': aweme_id}, aweme_id, + note='Downloading video feed', errnote='Unable to download video feed').get('aweme_list') or [] + aweme_detail = next((aweme for aweme in feed_list if str(aweme.get('aweme_id')) == aweme_id), None) + if not aweme_detail: + raise ExtractorError('Unable to find video in feed', video_id=aweme_id) return self._parse_aweme_video_app(aweme_detail) def _real_extract(self, url): @@ -572,6 +558,7 @@ def _real_extract(self, url): class TikTokUserIE(TikTokBaseIE): IE_NAME = 'tiktok:user' _VALID_URL = r'https?://(?:www\.)?tiktok\.com/@(?P[\w\.-]+)/?(?:$|[#?])' + _WORKING = False _TESTS = [{ 'url': 'https://tiktok.com/@corgibobaa?lang=en', 'playlist_mincount': 45, @@ -708,6 +695,7 @@ def _real_extract(self, url): class TikTokSoundIE(TikTokBaseListIE): IE_NAME = 'tiktok:sound' _VALID_URL = r'https?://(?:www\.)?tiktok\.com/music/[\w\.-]+-(?P[\d]+)[/?#&]?' + _WORKING = False _QUERY_NAME = 'music_id' _API_ENDPOINT = 'music/aweme' _TESTS = [{ @@ -731,6 +719,7 @@ class TikTokSoundIE(TikTokBaseListIE): class TikTokEffectIE(TikTokBaseListIE): IE_NAME = 'tiktok:effect' _VALID_URL = r'https?://(?:www\.)?tiktok\.com/sticker/[\w\.-]+-(?P[\d]+)[/?#&]?' + _WORKING = False _QUERY_NAME = 'sticker_id' _API_ENDPOINT = 'sticker/aweme' _TESTS = [{ @@ -750,6 +739,7 @@ class TikTokEffectIE(TikTokBaseListIE): class TikTokTagIE(TikTokBaseListIE): IE_NAME = 'tiktok:tag' _VALID_URL = r'https?://(?:www\.)?tiktok\.com/tag/(?P[^/?#&]+)' + _WORKING = False _QUERY_NAME = 'ch_id' _API_ENDPOINT = 'challenge/aweme' _TESTS = [{