From 71211790751084e120f76688b07d0edee072d3ab Mon Sep 17 00:00:00 2001 From: DEVENU <89598703+devenu85@users.noreply.github.com> Date: Sun, 21 Jul 2024 12:08:10 +0530 Subject: [PATCH 1/8] [ie/Amazonminitv] extractor fix --- yt_dlp/extractor/_extractors.py | 1 - 1 file changed, 1 deletion(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index d2140bc52..262dc4a9f 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -118,7 +118,6 @@ from .amazonminitv import ( AmazonMiniTVIE, AmazonMiniTVSeasonIE, - AmazonMiniTVSeriesIE, ) from .amcnetworks import AMCNetworksIE from .americastestkitchen import ( From 781459836929b9a6dc85659bc4da2fd63c49c8ec Mon Sep 17 00:00:00 2001 From: DEVENU <89598703+devenu85@users.noreply.github.com> Date: Sun, 21 Jul 2024 12:08:56 +0530 Subject: [PATCH 2/8] [ie/Amazonminitv] extractor fix --- yt_dlp/extractor/amazonminitv.py | 213 ++++++------------------------- 1 file changed, 40 insertions(+), 173 deletions(-) diff --git a/yt_dlp/extractor/amazonminitv.py b/yt_dlp/extractor/amazonminitv.py index 0590a344a..9e68582af 100644 --- a/yt_dlp/extractor/amazonminitv.py +++ b/yt_dlp/extractor/amazonminitv.py @@ -9,35 +9,28 @@ def _real_initialize(self): self._download_webpage( 'https://www.amazon.in/minitv', None, note='Fetching guest session cookies') - AmazonMiniTVBaseIE.session_id = self._get_cookies('https://www.amazon.in')['session-id'].value + AmazonMiniTVBaseIE.urtk = self._get_cookies('https://www.amazon.in')['urtk'].value def _call_api(self, asin, data=None, note=None): - device = {'clientId': 'ATVIN', 'deviceLocale': 'en_GB'} + query = { + 'contentId':asin, + } if data: - data['variables'].update({ - 'contentType': 'VOD', - 'sessionIdToken': self.session_id, - **device, - }) + query.update(data) resp = self._download_json( - f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}', + f'https://www.amazon.in/minitv-pr/api/web/page/title', asin, note=note, headers={ 'Content-Type': 'application/json', + 'accounttype': 'NEW_GUEST_ACCOUNT', 'currentpageurl': '/', 'currentplatform': 'dWeb', - }, data=json.dumps(data).encode() if data else None, - query=None if data else { - 'deviceType': 'A1WMMUXPCUJL4N', - 'contentId': asin, - **device, - }) + }, data=None, + query=query) if resp.get('errors'): raise ExtractorError(f'MiniTV said: {resp["errors"][0]["message"]}') - elif not data: - return resp - return resp['data'][data['operationName']] + return resp class AmazonMiniTVIE(AmazonMiniTVBaseIE): @@ -89,79 +82,21 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE): 'only_matching': True, }] - _GRAPHQL_QUERY_CONTENT = ''' -query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, $contentType: ContentType!, $clientId: String) { - content( - applicationContextInput: {deviceLocale: $deviceLocale, sessionIdToken: $sessionIdToken, clientId: $clientId} - contentId: $contentId - contentType: $contentType - ) { - contentId - name - ... on Episode { - contentId - vodType - name - images - description { - synopsis - contentLengthInSeconds - } - publicReleaseDateUTC - audioTracks - seasonId - seriesId - seriesName - seasonNumber - episodeNumber - timecode { - endCreditsTime - } - } - ... on MovieContent { - contentId - vodType - name - description { - synopsis - contentLengthInSeconds - } - images - publicReleaseDateUTC - audioTracks - } - } -}''' - def _real_extract(self, url): asin = f'amzn1.dv.gti.{self._match_id(url)}' prs = self._call_api(asin, note='Downloading playback info') + playback_info = prs['widgets'][0]['data']['playbackAssets']['manifestData'] + title_info = prs['widgets'][0]['data']['contentDetails'] + title_info_ = prs['metaData']['contentDetails'] formats, subtitles = [], {} - for type_, asset in prs['playbackAssets'].items(): - if not traverse_obj(asset, 'manifestUrl'): - continue - if type_ == 'hls': - m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles( - asset['manifestUrl'], asin, ext='mp4', entry_protocol='m3u8_native', - m3u8_id=type_, fatal=False) - formats.extend(m3u8_fmts) - subtitles = self._merge_subtitles(subtitles, m3u8_subs) - elif type_ == 'dash': - mpd_fmts, mpd_subs = self._extract_mpd_formats_and_subtitles( - asset['manifestUrl'], asin, mpd_id=type_, fatal=False) - formats.extend(mpd_fmts) - subtitles = self._merge_subtitles(subtitles, mpd_subs) - else: - self.report_warning(f'Unknown asset type: {type_}') + for mpd in playback_info: + mpd_fmts, mpd_subs = self._extract_mpd_formats_and_subtitles( + mpd['manifestURL'], asin, mpd_id=mpd['codec'], fatal=False) + formats.extend(mpd_fmts) + subtitles = self._merge_subtitles(subtitles, mpd_subs) - title_info = self._call_api( - asin, note='Downloading title info', data={ - 'operationName': 'content', - 'variables': {'contentId': asin}, - 'query': self._GRAPHQL_QUERY_CONTENT, - }) - credits_time = try_get(title_info, lambda x: x['timecode']['endCreditsTime'] / 1000) + credits_time = try_get(title_info, lambda x: x['skipData']['INTRO']['endTime']) is_episode = title_info.get('vodType') == 'EPISODE' return { @@ -171,22 +106,25 @@ def _real_extract(self, url): 'subtitles': subtitles, 'language': traverse_obj(title_info, ('audioTracks', 0)), 'thumbnails': [{ - 'id': type_, - 'url': url, - } for type_, url in (title_info.get('images') or {}).items()], - 'description': traverse_obj(title_info, ('description', 'synopsis')), - 'release_timestamp': int_or_none(try_get(title_info, lambda x: x['publicReleaseDateUTC'] / 1000)), + 'id': "thumbnailImage", + 'url': title_info.get('thumbnailImage'), + },{ + 'id':"seasonThumbnailImage", + 'url': title_info.get('thumbnailImage') + }], + 'description': traverse_obj(title_info_, ('description', 'synopsis')), + 'release_timestamp': int_or_none(try_get(title_info_, lambda x: x['publicReleaseDateUTC'] / 1000)), 'duration': traverse_obj(title_info, ('description', 'contentLengthInSeconds')), 'chapters': [{ 'start_time': credits_time, 'title': 'End Credits', }] if credits_time else [], - 'series': title_info.get('seriesName'), - 'series_id': title_info.get('seriesId'), - 'season_number': title_info.get('seasonNumber'), - 'season_id': title_info.get('seasonId'), + 'series': title_info_.get('seasonName') if is_episode else None, + 'series_id': title_info.get('seriesId') if is_episode else None, + 'season_number': title_info.get('seasonNumber') if is_episode else None, + 'season_id': title_info.get('seasonId') if is_episode else None, 'episode': title_info.get('name') if is_episode else None, - 'episode_number': title_info.get('episodeNumber'), + 'episode_number': title_info.get('episodeNumber') if is_episode else None, 'episode_id': asin if is_episode else None, } @@ -206,88 +144,17 @@ class AmazonMiniTVSeasonIE(AmazonMiniTVBaseIE): 'only_matching': True, }] - _GRAPHQL_QUERY = ''' -query getEpisodes($sessionIdToken: String!, $clientId: String, $episodeOrSeasonId: ID!, $deviceLocale: String) { - getEpisodes( - applicationContextInput: {sessionIdToken: $sessionIdToken, deviceLocale: $deviceLocale, clientId: $clientId} - episodeOrSeasonId: $episodeOrSeasonId - ) { - episodes { - ... on Episode { - contentId - name - images - seriesName - seasonId - seriesId - seasonNumber - episodeNumber - description { - synopsis - contentLengthInSeconds - } - publicReleaseDateUTC - } - } - } -} -''' - def _entries(self, asin): season_info = self._call_api( - asin, note='Downloading season info', data={ - 'operationName': 'getEpisodes', - 'variables': {'episodeOrSeasonId': asin}, - 'query': self._GRAPHQL_QUERY, - }) + asin, note='Downloading season info', + data={'cursor': '8e0cefec-e190-46ba-854d-1f3ca7978b4a:::'} + ) - for episode in season_info['episodes']: - yield self.url_result( - f'amazonminitv:{episode["contentId"]}', AmazonMiniTVIE, episode['contentId']) - - def _real_extract(self, url): - asin = f'amzn1.dv.gti.{self._match_id(url)}' - return self.playlist_result(self._entries(asin), asin) - - -class AmazonMiniTVSeriesIE(AmazonMiniTVBaseIE): - IE_NAME = 'amazonminitv:series' - _VALID_URL = r'amazonminitv:series:(?:amzn1\.dv\.gti\.)?(?P[a-f0-9-]+)' - IE_DESC = 'Amazon MiniTV Series, "minitv:series:" prefix' - _TESTS = [{ - 'url': 'amazonminitv:series:amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0', - 'playlist_mincount': 3, - 'info_dict': { - 'id': 'amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0', - }, - }, { - 'url': 'amazonminitv:series:56521d46-b040-4fd5-872e-3e70476a04b0', - 'only_matching': True, - }] - - _GRAPHQL_QUERY = ''' -query getSeasons($sessionIdToken: String!, $deviceLocale: String, $episodeOrSeasonOrSeriesId: ID!, $clientId: String) { - getSeasons( - applicationContextInput: {deviceLocale: $deviceLocale, sessionIdToken: $sessionIdToken, clientId: $clientId} - episodeOrSeasonOrSeriesId: $episodeOrSeasonOrSeriesId - ) { - seasons { - seasonId - } - } -} -''' - - def _entries(self, asin): - season_info = self._call_api( - asin, note='Downloading series info', data={ - 'operationName': 'getSeasons', - 'variables': {'episodeOrSeasonOrSeriesId': asin}, - 'query': self._GRAPHQL_QUERY, - }) - - for season in season_info['seasons']: - yield self.url_result(f'amazonminitv:season:{season["seasonId"]}', AmazonMiniTVSeasonIE, season['seasonId']) + for season in season_info['widgets'][0]['data']['options']: + if season['active']: + for episode in season['value']['data']['widgets'][0]['data']['widgets']: + yield self.url_result( + f'amazonminitv:{episode["data"]["contentId"]}', AmazonMiniTVIE, episode['data']['contentId']) def _real_extract(self, url): asin = f'amzn1.dv.gti.{self._match_id(url)}' From 7f36e690914197f945bb38f560edc426c824c336 Mon Sep 17 00:00:00 2001 From: DEVENU <89598703+devenu85@users.noreply.github.com> Date: Mon, 22 Jul 2024 19:58:47 +0530 Subject: [PATCH 3/8] Update amazonminitv.py --- yt_dlp/extractor/amazonminitv.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/amazonminitv.py b/yt_dlp/extractor/amazonminitv.py index 9e68582af..90af46f68 100644 --- a/yt_dlp/extractor/amazonminitv.py +++ b/yt_dlp/extractor/amazonminitv.py @@ -1,5 +1,3 @@ -import json - from .common import InfoExtractor from ..utils import ExtractorError, int_or_none, traverse_obj, try_get @@ -13,13 +11,13 @@ def _real_initialize(self): def _call_api(self, asin, data=None, note=None): query = { - 'contentId':asin, + 'contentId': asin, } if data: query.update(data) resp = self._download_json( - f'https://www.amazon.in/minitv-pr/api/web/page/title', + 'https://www.amazon.in/minitv-pr/api/web/page/title', asin, note=note, headers={ 'Content-Type': 'application/json', 'accounttype': 'NEW_GUEST_ACCOUNT', @@ -106,11 +104,11 @@ def _real_extract(self, url): 'subtitles': subtitles, 'language': traverse_obj(title_info, ('audioTracks', 0)), 'thumbnails': [{ - 'id': "thumbnailImage", + 'id': 'thumbnailImage', 'url': title_info.get('thumbnailImage'), },{ - 'id':"seasonThumbnailImage", - 'url': title_info.get('thumbnailImage') + 'id':'seasonThumbnailImage', + 'url': title_info.get('thumbnailImage'), }], 'description': traverse_obj(title_info_, ('description', 'synopsis')), 'release_timestamp': int_or_none(try_get(title_info_, lambda x: x['publicReleaseDateUTC'] / 1000)), From ae08e3b3a068872f763ffda897db393d0bb15b45 Mon Sep 17 00:00:00 2001 From: DEVENU <89598703+devenu85@users.noreply.github.com> Date: Mon, 22 Jul 2024 20:10:29 +0530 Subject: [PATCH 4/8] Update amazonminitv.py [code fix] --- yt_dlp/extractor/amazonminitv.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/amazonminitv.py b/yt_dlp/extractor/amazonminitv.py index 90af46f68..cddebcdb8 100644 --- a/yt_dlp/extractor/amazonminitv.py +++ b/yt_dlp/extractor/amazonminitv.py @@ -88,7 +88,7 @@ def _real_extract(self, url): title_info_ = prs['metaData']['contentDetails'] formats, subtitles = [], {} - for mpd in playback_info: + for mpd in playback_info: mpd_fmts, mpd_subs = self._extract_mpd_formats_and_subtitles( mpd['manifestURL'], asin, mpd_id=mpd['codec'], fatal=False) formats.extend(mpd_fmts) @@ -106,8 +106,8 @@ def _real_extract(self, url): 'thumbnails': [{ 'id': 'thumbnailImage', 'url': title_info.get('thumbnailImage'), - },{ - 'id':'seasonThumbnailImage', + }, { + 'id': 'seasonThumbnailImage', 'url': title_info.get('thumbnailImage'), }], 'description': traverse_obj(title_info_, ('description', 'synopsis')), From 696b1c7dc4c344a59c1eeb89a49f8c79dc0d732f Mon Sep 17 00:00:00 2001 From: DEVENU <89598703+devenu85@users.noreply.github.com> Date: Mon, 22 Jul 2024 20:15:32 +0530 Subject: [PATCH 5/8] Update amazonminitv.py --- yt_dlp/extractor/amazonminitv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/amazonminitv.py b/yt_dlp/extractor/amazonminitv.py index cddebcdb8..6511b16f2 100644 --- a/yt_dlp/extractor/amazonminitv.py +++ b/yt_dlp/extractor/amazonminitv.py @@ -145,7 +145,7 @@ class AmazonMiniTVSeasonIE(AmazonMiniTVBaseIE): def _entries(self, asin): season_info = self._call_api( asin, note='Downloading season info', - data={'cursor': '8e0cefec-e190-46ba-854d-1f3ca7978b4a:::'} + data={'cursor': '8e0cefec-e190-46ba-854d-1f3ca7978b4a:::'}, ) for season in season_info['widgets'][0]['data']['options']: From 53a8806ced224f0e4e5fcfde2d861329152cb1cb Mon Sep 17 00:00:00 2001 From: DEVENU <89598703+devenu85@users.noreply.github.com> Date: Tue, 23 Jul 2024 19:26:09 +0530 Subject: [PATCH 6/8] Update amazonminitv.py --- yt_dlp/extractor/amazonminitv.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/amazonminitv.py b/yt_dlp/extractor/amazonminitv.py index 6511b16f2..282d0d563 100644 --- a/yt_dlp/extractor/amazonminitv.py +++ b/yt_dlp/extractor/amazonminitv.py @@ -83,9 +83,9 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE): def _real_extract(self, url): asin = f'amzn1.dv.gti.{self._match_id(url)}' prs = self._call_api(asin, note='Downloading playback info') - playback_info = prs['widgets'][0]['data']['playbackAssets']['manifestData'] - title_info = prs['widgets'][0]['data']['contentDetails'] - title_info_ = prs['metaData']['contentDetails'] + playback_info = traverse_obj(prs, ('widgets', 0, 'data', 'playbackAssets', 'manifestData')) + title_info = traverse_obj(prs, ('widgets', 0, 'data', 'contentDetails')) + title_info_ = traverse_obj(prs, ('metaData', 'contentDetails')) formats, subtitles = [], {} for mpd in playback_info: From b3dbc3deec1a73403ad059afedefcd3d1f51d0da Mon Sep 17 00:00:00 2001 From: DEVENU <89598703+devenu85@users.noreply.github.com> Date: Tue, 23 Jul 2024 21:52:09 +0530 Subject: [PATCH 7/8] Update amazonminitv.py --- yt_dlp/extractor/amazonminitv.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/amazonminitv.py b/yt_dlp/extractor/amazonminitv.py index 282d0d563..fbcd179f2 100644 --- a/yt_dlp/extractor/amazonminitv.py +++ b/yt_dlp/extractor/amazonminitv.py @@ -104,12 +104,9 @@ def _real_extract(self, url): 'subtitles': subtitles, 'language': traverse_obj(title_info, ('audioTracks', 0)), 'thumbnails': [{ - 'id': 'thumbnailImage', - 'url': title_info.get('thumbnailImage'), - }, { - 'id': 'seasonThumbnailImage', - 'url': title_info.get('thumbnailImage'), - }], + 'id': 'imageSrc', + 'url': title_info.get('imageSrc'), + }] if title_info.get('imageSrc') else [], 'description': traverse_obj(title_info_, ('description', 'synopsis')), 'release_timestamp': int_or_none(try_get(title_info_, lambda x: x['publicReleaseDateUTC'] / 1000)), 'duration': traverse_obj(title_info, ('description', 'contentLengthInSeconds')), From ac410f1354c7891474834e17001b990d55e01f6e Mon Sep 17 00:00:00 2001 From: DEVENU <89598703+devenu85@users.noreply.github.com> Date: Thu, 15 Aug 2024 21:27:30 +0530 Subject: [PATCH 8/8] Update amazonminitv.py --- yt_dlp/extractor/amazonminitv.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/amazonminitv.py b/yt_dlp/extractor/amazonminitv.py index fbcd179f2..516db8694 100644 --- a/yt_dlp/extractor/amazonminitv.py +++ b/yt_dlp/extractor/amazonminitv.py @@ -95,21 +95,21 @@ def _real_extract(self, url): subtitles = self._merge_subtitles(subtitles, mpd_subs) credits_time = try_get(title_info, lambda x: x['skipData']['INTRO']['endTime']) - is_episode = title_info.get('vodType') == 'EPISODE' + is_episode = title_info_.get('vodType') == 'EPISODE' return { 'id': asin, - 'title': title_info.get('name'), + 'title': title_info_.get('name'), 'formats': formats, 'subtitles': subtitles, 'language': traverse_obj(title_info, ('audioTracks', 0)), 'thumbnails': [{ 'id': 'imageSrc', - 'url': title_info.get('imageSrc'), - }] if title_info.get('imageSrc') else [], - 'description': traverse_obj(title_info_, ('description', 'synopsis')), + 'url': title_info_.get('imageSrc'), + }] if title_info_.get('imageSrc') else [], + 'description': traverse_obj(title_info_, ('synopsis')), 'release_timestamp': int_or_none(try_get(title_info_, lambda x: x['publicReleaseDateUTC'] / 1000)), - 'duration': traverse_obj(title_info, ('description', 'contentLengthInSeconds')), + 'duration': traverse_obj(title_info_, ('contentLengthInSeconds')), 'chapters': [{ 'start_time': credits_time, 'title': 'End Credits',