From 380027be4ea64056e613dc61173602799fef23b7 Mon Sep 17 00:00:00 2001 From: subrat-lima Date: Thu, 12 Sep 2024 13:51:02 +0530 Subject: [PATCH 1/9] [ie/afl] added AFLVideoIE extractor --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/afl.py | 54 +++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 yt_dlp/extractor/afl.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index e7b162512..9e9f4b601 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -75,6 +75,7 @@ HistoryTopicIE, ) from .aeonco import AeonCoIE +from .afl import AFLVideoIE from .afreecatv import ( AfreecaTVCatchStoryIE, AfreecaTVIE, diff --git a/yt_dlp/extractor/afl.py b/yt_dlp/extractor/afl.py new file mode 100644 index 000000000..44b2ee290 --- /dev/null +++ b/yt_dlp/extractor/afl.py @@ -0,0 +1,54 @@ + +from .brightcove import BrightcoveNewIE +from .common import InfoExtractor +from ..utils import ( + extract_attributes, + get_element_by_class, + smuggle_url, + str_or_none, + traverse_obj, +) + + +class AFLVideoIE(InfoExtractor): + IE_NAME = 'afl:video' + _VALID_URL = r'https?://(?:www\.)?afl\.com.au/(?:aflw/)?video/(?P\d+)' + _TESTS = [{ + 'url': 'https://www.afl.com.au/aflw/video/1217670/the-w-show-aflws-line-in-the-sand-moment-bonnies-bold-bid', + 'md5': '7000431c2bd3f96eddb5f63273aea83e', + 'info_dict': { + 'id': '6361825702112', + 'ext': 'mp4', + 'description': 'md5:d1fee2ae8e3ecf486c1f0f7aa19e724b', + 'upload_date': '20240911', + 'duration': 1523.28, + 'tags': 'count:0', + 'thumbnail': r're:^https?://.*\.jpg$', + 'title': "The W Show: AFLW's 'line in the sand' moment, Bonnie's bold bid", + 'uploader_id': '6057984922001', + 'timestamp': 1726038522, + }, + }, { + 'url': 'https://www.afl.com.au/video/1217264/bulldogs-season-review-gold-plated-list-going-to-waste-duos-frightening-future?videoId=1217264&modal=true&type=video&publishFrom=1725998400001', + 'only_matching': True, + }, { + 'url': 'https://www.afl.com.au/video/1210885/wafl-showreel-ef-hamish-davis-highlights?videoId=1210885&modal=true&type=video&publishFrom=1725171238001', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + element = get_element_by_class('inline-player__player-container', webpage) + attrs = traverse_obj(extract_attributes(element), { + 'account_id': ('data-account', {str_or_none}), + 'player_id': ('data-player', {lambda x: f'{x}_default'}, {str_or_none}), + 'video_id': ('data-video-id', {str_or_none}), + }) + account_id = attrs.get('account_id') + player_id = attrs.get('player_id') + video_id = attrs.get('video_id') + + video_url = f'https://players.brightcove.net/{account_id}/{player_id}/index.html?videoId={video_id}' + video_url = smuggle_url(video_url, {'referrer': url}) + return self.url_result(video_url, BrightcoveNewIE) From 8125680192ec104dbfb6b1cc07ccc7e3c189ddc5 Mon Sep 17 00:00:00 2001 From: subrat-lima Date: Wed, 18 Sep 2024 09:21:58 +0530 Subject: [PATCH 2/9] [ie/afl] added OmnyFMShow extractor --- yt_dlp/extractor/_extractors.py | 6 ++- yt_dlp/extractor/omnyfm.py | 68 +++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 yt_dlp/extractor/omnyfm.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 9e9f4b601..652d409d7 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -75,7 +75,10 @@ HistoryTopicIE, ) from .aeonco import AeonCoIE -from .afl import AFLVideoIE +from .afl import ( + AFLPodcastsIE, + AFLVideoIE, +) from .afreecatv import ( AfreecaTVCatchStoryIE, AfreecaTVIE, @@ -1427,6 +1430,7 @@ ) from .oktoberfesttv import OktoberfestTVIE from .olympics import OlympicsReplayIE +from .omnyfm import OmnyFMShowIE from .on24 import On24IE from .ondemandkorea import ( OnDemandKoreaIE, diff --git a/yt_dlp/extractor/omnyfm.py b/yt_dlp/extractor/omnyfm.py new file mode 100644 index 000000000..f01fa3582 --- /dev/null +++ b/yt_dlp/extractor/omnyfm.py @@ -0,0 +1,68 @@ +import functools +import json +import math + +from .common import InfoExtractor +from ..utils import ( + InAdvancePagedList, + clean_html, + float_or_none, + get_element_by_id, + int_or_none, + str_or_none, + traverse_obj, + unified_strdate, + url_or_none, +) + + +class OmnyFMShowIE(InfoExtractor): + IE_NAME = 'omnyfm:show' + _VALID_URL = r'https?://omny\.fm/shows/(?P[\w-]+)' + _PAGE_SIZE = 10 + _TESTS = [{ + 'url': 'https://omny.fm/shows/league-leaders', + 'info_dict': { + 'id': 'bbe146d4-9bee-4763-b785-ad830009a23f', + 'title': 'League Leaders with Nicole Livingstone', + }, + 'playlist_mincount': 15, + }, { + 'url': 'https://omny.fm/shows/afl-daily', + 'only_matching': True, + }] + + def _fetch_page(self, org_id, playlist_id, page): + return self._download_json(f'https://api.omny.fm/orgs/{org_id}/programs/{playlist_id}/clips?cursor={page}&pageSize={self._PAGE_SIZE}', f'{playlist_id}_{page}') + + def _entries(self, org_id, playlist_id, first_page_data, page): + data = first_page_data if not page else self._fetch_page(org_id, playlist_id, page + 1) + for clip in data.get('Clips', {}): + yield traverse_obj(clip, { + 'id': ('Id', {str_or_none}), + 'title': ('Title', {str_or_none}), + 'description': ('Description', {clean_html}), + 'thumbnail': (('ImageUrl', 'ArtworkUrl'), {url_or_none}, any), + 'duration': ('DurationSeconds', {float_or_none}), + 'url': ('AudioUrl', {url_or_none}), + 'season_number': ('Season', {int_or_none}), + 'episode_number': ('Episode', {int_or_none}), + 'timestamp': ('PublishedUtc', {unified_strdate}, {int_or_none}), + 'filesize': ('PublishedAudioSizeInBytes', {int}), + }) + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + data = json.loads(get_element_by_id('__NEXT_DATA__', webpage)) + org_id = traverse_obj(data, ('props', 'pageProps', 'program', 'OrganizationId', {str_or_none})) + playlist_id = traverse_obj(data, ('props', 'pageProps', 'program', 'Id', {str_or_none})) + playlist_count = traverse_obj(data, ('props', 'pageProps', 'program', 'DefaultPlaylist', 'NumberOfClips', {int_or_none})) + title = traverse_obj(data, ('props', 'pageProps', 'program', 'Name', {str_or_none})) + first_page_data = traverse_obj(data, ('props', 'pageProps', 'clips', {dict})) + total_pages = math.ceil(playlist_count / self._PAGE_SIZE) + + return self.playlist_result(InAdvancePagedList( + functools.partial(self._entries, org_id, playlist_id, first_page_data), + total_pages, self._PAGE_SIZE), playlist_id, title) From 5fea24bda2b1a23648f6067d690116d619fa76a3 Mon Sep 17 00:00:00 2001 From: subrat-lima Date: Wed, 18 Sep 2024 13:23:50 +0530 Subject: [PATCH 3/9] [ie/afl][ie/omnyfm] added AFLPodcastIE and updated OmnyFMShowIE 1. AFLPodcastIE: Added extractor for AFL podcasts 2. OmnyFMShowIE: Updated code to adjust url before download page to support various url patterns --- yt_dlp/extractor/_extractors.py | 2 +- yt_dlp/extractor/afl.py | 26 ++++++++++++++++++++++++++ yt_dlp/extractor/omnyfm.py | 4 +++- 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 652d409d7..46a993058 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -76,7 +76,7 @@ ) from .aeonco import AeonCoIE from .afl import ( - AFLPodcastsIE, + AFLPodcastIE, AFLVideoIE, ) from .afreecatv import ( diff --git a/yt_dlp/extractor/afl.py b/yt_dlp/extractor/afl.py index 44b2ee290..506a25808 100644 --- a/yt_dlp/extractor/afl.py +++ b/yt_dlp/extractor/afl.py @@ -1,12 +1,14 @@ from .brightcove import BrightcoveNewIE from .common import InfoExtractor +from .omnyfm import OmnyFMShowIE from ..utils import ( extract_attributes, get_element_by_class, smuggle_url, str_or_none, traverse_obj, + url_or_none, ) @@ -52,3 +54,27 @@ def _real_extract(self, url): video_url = f'https://players.brightcove.net/{account_id}/{player_id}/index.html?videoId={video_id}' video_url = smuggle_url(video_url, {'referrer': url}) return self.url_result(video_url, BrightcoveNewIE) + + +class AFLPodcastIE(InfoExtractor): + IE_NAME = 'afl:podcast' + _VALID_URL = r'https?://(?:www\.)?afl\.com.au/(?:aflw/)?podcasts/(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://www.afl.com.au/podcasts/between-us', + 'md5': '7000431c2bd3f96eddb5f63273aea83e', + 'info_dict': { + 'id': 'e0ab8454-f818-483f-bed1-b156002c021f', + 'title': 'Between Us', + }, + 'playlist_mincount': 7, + }, { + 'url': 'https://www.afl.com.au/podcasts/afl-daily', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + element = get_element_by_class('omny-embed', webpage) + podcast_url = traverse_obj(extract_attributes(element), ('src', {url_or_none})) + return self.url_result(podcast_url, OmnyFMShowIE) diff --git a/yt_dlp/extractor/omnyfm.py b/yt_dlp/extractor/omnyfm.py index f01fa3582..0f69d5954 100644 --- a/yt_dlp/extractor/omnyfm.py +++ b/yt_dlp/extractor/omnyfm.py @@ -19,6 +19,7 @@ class OmnyFMShowIE(InfoExtractor): IE_NAME = 'omnyfm:show' _VALID_URL = r'https?://omny\.fm/shows/(?P[\w-]+)' + _EMBED_REGEX = [r']+?src=(?:["\'])(?Phttps?://omny\.fm/shows/.+?)\1'] _PAGE_SIZE = 10 _TESTS = [{ 'url': 'https://omny.fm/shows/league-leaders', @@ -53,7 +54,8 @@ def _entries(self, org_id, playlist_id, first_page_data, page): def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) + page_url = 'https://omny.fm/shows/' + display_id + webpage = self._download_webpage(page_url, display_id) data = json.loads(get_element_by_id('__NEXT_DATA__', webpage)) org_id = traverse_obj(data, ('props', 'pageProps', 'program', 'OrganizationId', {str_or_none})) From b62a7cf7259bd279d769c16828857a1cb03a5065 Mon Sep 17 00:00:00 2001 From: subrat-lima Date: Wed, 18 Sep 2024 15:16:55 +0530 Subject: [PATCH 4/9] [ie/afl] added AFCVideoIE --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/afl.py | 38 +++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index b84a268ba..4f23317f2 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -76,6 +76,7 @@ ) from .aeonco import AeonCoIE from .afl import ( + AFCVideoIE, AFLPodcastIE, AFLVideoIE, ) diff --git a/yt_dlp/extractor/afl.py b/yt_dlp/extractor/afl.py index 506a25808..a5f87d9e9 100644 --- a/yt_dlp/extractor/afl.py +++ b/yt_dlp/extractor/afl.py @@ -78,3 +78,41 @@ def _real_extract(self, url): element = get_element_by_class('omny-embed', webpage) podcast_url = traverse_obj(extract_attributes(element), ('src', {url_or_none})) return self.url_result(podcast_url, OmnyFMShowIE) + + +class AFCVideoIE(InfoExtractor): + IE_NAME = 'afc:video' + _VALID_URL = r'https?://(?:www\.)?afc\.com.au/video/(?P\d+)' + _TESTS = [{ + 'url': 'https://www.afc.com.au/video/1657583/girls-academies-be-a-pro?videoId=1657583&modal=true&type=video&publishFrom=1726548621001', + 'md5': '6b52c149ae6566abe4cfc2d24978983d', + 'info_dict': { + 'id': '6362050135112', + 'ext': 'mp4', + 'description': 'md5:35897062f9a02043ece73a410bda595c', + 'upload_date': '20240917', + 'duration': 103.92, + 'tags': 'count:0', + 'thumbnail': r're:^https?://.*\.jpg$', + 'title': 'AFLW Jones Radiology Injury Update: R4', + 'uploader_id': '6057984922001', + 'timestamp': 1726558062, + }, + }, { + 'url': 'https://www.afc.com.au/video/1586280/se10ep16-the-crows-show?videoId=1586280&modal=true&type=video&publishFrom=1719639000001&tagNames=crowsshowepisode', + 'only_matching': True, + }, { + 'url': 'https://www.afc.com.au/video/1647468/matthew-clarke-presser-september-6?videoId=1647468&modal=true&type=video&publishFrom=1725591002001', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex(r'"mediaId"\s*:\s*"(\d+)"', webpage, 'video-id') + player_id = self._search_regex(r'data-player-id\s*=\s*"(\w+)"', webpage, 'player-id') + '_default' + account_id = self._search_regex(r'data-account-id\s*=\s*"(\d+)"', webpage, 'account-id') + + video_url = f'https://players.brightcove.net/{account_id}/{player_id}/index.html?videoId={video_id}' + video_url = smuggle_url(video_url, {'referrer': url}) + return self.url_result(video_url, BrightcoveNewIE) From e17e2beea64ba063fef6ff36fdf3d602fcbe2cec Mon Sep 17 00:00:00 2001 From: subrat-lima Date: Wed, 18 Sep 2024 15:53:14 +0530 Subject: [PATCH 5/9] [ie/afl] add support for lions.com.au and carltonfc.com.au made the following changes: 1. added support for lions.com.au videos 1. added support for carltonfc.com.au videos and podcasts --- yt_dlp/extractor/afl.py | 53 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/afl.py b/yt_dlp/extractor/afl.py index a5f87d9e9..1228798ad 100644 --- a/yt_dlp/extractor/afl.py +++ b/yt_dlp/extractor/afl.py @@ -1,4 +1,3 @@ - from .brightcove import BrightcoveNewIE from .common import InfoExtractor from .omnyfm import OmnyFMShowIE @@ -14,7 +13,7 @@ class AFLVideoIE(InfoExtractor): IE_NAME = 'afl:video' - _VALID_URL = r'https?://(?:www\.)?afl\.com.au/(?:aflw/)?video/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?(?:afl|lions)\.com.au/(?:aflw/)?video/(?P\d+)' _TESTS = [{ 'url': 'https://www.afl.com.au/aflw/video/1217670/the-w-show-aflws-line-in-the-sand-moment-bonnies-bold-bid', 'md5': '7000431c2bd3f96eddb5f63273aea83e', @@ -30,12 +29,30 @@ class AFLVideoIE(InfoExtractor): 'uploader_id': '6057984922001', 'timestamp': 1726038522, }, + }, { + 'url': 'https://www.lions.com.au/video/1655451/team-song-brisbane?videoId=1655451&modal=true&type=video&publishFrom=1726318577001', + 'md5': '47e8c67e317b48a69787c8bc39c3c591', + 'info_dict': { + 'id': '6361958949112', + 'ext': 'mp4', + 'description': 'md5:c0fb37fcad9ec0f49ac54eb8d76641bd', + 'upload_date': '20240914', + 'duration': 41.0, + 'tags': 'count:0', + 'thumbnail': r're:^https?://.*\.jpg$', + 'title': 'Team Song: Brisbane', + 'uploader_id': '6057984922001', + 'timestamp': 1726318788, + }, }, { 'url': 'https://www.afl.com.au/video/1217264/bulldogs-season-review-gold-plated-list-going-to-waste-duos-frightening-future?videoId=1217264&modal=true&type=video&publishFrom=1725998400001', 'only_matching': True, }, { 'url': 'https://www.afl.com.au/video/1210885/wafl-showreel-ef-hamish-davis-highlights?videoId=1210885&modal=true&type=video&publishFrom=1725171238001', 'only_matching': True, + }, { + 'url': 'https://www.lions.com.au/video/1657551/svarc-weve-built-up-really-well?videoId=1657551&modal=true&type=video&publishFrom=1726545600001', + 'only_matching': True, }] def _real_extract(self, url): @@ -58,7 +75,7 @@ def _real_extract(self, url): class AFLPodcastIE(InfoExtractor): IE_NAME = 'afl:podcast' - _VALID_URL = r'https?://(?:www\.)?afl\.com.au/(?:aflw/)?podcasts/(?P[\w-]+)' + _VALID_URL = r'https?://(?:www\.)?(?:afl|carltonfc)\.com.au/(?:aflw/)?podcasts/(?P[\w-]+)' _TESTS = [{ 'url': 'https://www.afl.com.au/podcasts/between-us', 'md5': '7000431c2bd3f96eddb5f63273aea83e', @@ -67,9 +84,20 @@ class AFLPodcastIE(InfoExtractor): 'title': 'Between Us', }, 'playlist_mincount': 7, + }, { + 'url': 'https://www.carltonfc.com.au/podcasts/walk-a-mile', + 'md5': '', + 'info_dict': { + 'id': '6dbb9b23-7f00-49d4-b44e-aec2017651dc', + 'title': 'Walk a Mile in Their Shoes', + }, + 'playlist_mincount': 3, }, { 'url': 'https://www.afl.com.au/podcasts/afl-daily', 'only_matching': True, + }, { + 'url': 'https://www.carltonfc.com.au/podcasts/summer-sessions', + 'only_matching': True, }] def _real_extract(self, url): @@ -82,7 +110,7 @@ def _real_extract(self, url): class AFCVideoIE(InfoExtractor): IE_NAME = 'afc:video' - _VALID_URL = r'https?://(?:www\.)?afc\.com.au/video/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?(?:afc|carltonfc)\.com.au/video/(?P\d+)' _TESTS = [{ 'url': 'https://www.afc.com.au/video/1657583/girls-academies-be-a-pro?videoId=1657583&modal=true&type=video&publishFrom=1726548621001', 'md5': '6b52c149ae6566abe4cfc2d24978983d', @@ -98,11 +126,26 @@ class AFCVideoIE(InfoExtractor): 'uploader_id': '6057984922001', 'timestamp': 1726558062, }, + }, { + 'url': 'https://www.carltonfc.com.au/video/1657596/cripps-on-taking-carlton-to-the-next-level?videoId=1657596&modal=true&type=video&publishFrom=1726555500001', + 'md5': 'fb5d909329871aa6d182e520d1627846', + 'info_dict': { + 'id': '6362089476112', + 'ext': 'mp4', + 'description': 'md5:823db447fd9aed2033548e39283d3c0f', + 'upload_date': '20240918', + 'duration': 75.72, + 'tags': 'count:0', + 'thumbnail': r're:^https?://.*\.jpg$', + 'title': 'The Rundown | Impact of fans', + 'uploader_id': '6057984922001', + 'timestamp': 1726631322, + }, }, { 'url': 'https://www.afc.com.au/video/1586280/se10ep16-the-crows-show?videoId=1586280&modal=true&type=video&publishFrom=1719639000001&tagNames=crowsshowepisode', 'only_matching': True, }, { - 'url': 'https://www.afc.com.au/video/1647468/matthew-clarke-presser-september-6?videoId=1647468&modal=true&type=video&publishFrom=1725591002001', + 'url': 'https://www.carltonfc.com.au/video/1658173/the-rundown-impact-of-fans?videoId=1658173&modal=true&type=video&publishFrom=1726630922001', 'only_matching': True, }] From 3e4523b78cb30a8e83eceb1ba65cc36a0a21f59f Mon Sep 17 00:00:00 2001 From: subrat-lima Date: Thu, 19 Sep 2024 13:40:37 +0530 Subject: [PATCH 6/9] [ie/omnyfm] updated extractor to use OnDemandPagedList instead of InAdvancePagedList --- yt_dlp/extractor/omnyfm.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/omnyfm.py b/yt_dlp/extractor/omnyfm.py index 0f69d5954..a338ae1c1 100644 --- a/yt_dlp/extractor/omnyfm.py +++ b/yt_dlp/extractor/omnyfm.py @@ -1,10 +1,9 @@ import functools import json -import math from .common import InfoExtractor from ..utils import ( - InAdvancePagedList, + OnDemandPagedList, clean_html, float_or_none, get_element_by_id, @@ -60,11 +59,8 @@ def _real_extract(self, url): data = json.loads(get_element_by_id('__NEXT_DATA__', webpage)) org_id = traverse_obj(data, ('props', 'pageProps', 'program', 'OrganizationId', {str_or_none})) playlist_id = traverse_obj(data, ('props', 'pageProps', 'program', 'Id', {str_or_none})) - playlist_count = traverse_obj(data, ('props', 'pageProps', 'program', 'DefaultPlaylist', 'NumberOfClips', {int_or_none})) title = traverse_obj(data, ('props', 'pageProps', 'program', 'Name', {str_or_none})) first_page_data = traverse_obj(data, ('props', 'pageProps', 'clips', {dict})) - total_pages = math.ceil(playlist_count / self._PAGE_SIZE) - return self.playlist_result(InAdvancePagedList( - functools.partial(self._entries, org_id, playlist_id, first_page_data), - total_pages, self._PAGE_SIZE), playlist_id, title) + entries = OnDemandPagedList(functools.partial(self._entries, org_id, playlist_id, first_page_data), self._PAGE_SIZE) + return self.playlist_result(entries, playlist_id, title) From 549d28cd04cb32bf9d906950bce3ff8633d74f04 Mon Sep 17 00:00:00 2001 From: subrat-lima Date: Thu, 19 Sep 2024 14:01:03 +0530 Subject: [PATCH 7/9] [ie/omnyfm] updated extractor to use _search_nextjs_data for json data parsing --- yt_dlp/extractor/omnyfm.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/yt_dlp/extractor/omnyfm.py b/yt_dlp/extractor/omnyfm.py index a338ae1c1..ba9e089b2 100644 --- a/yt_dlp/extractor/omnyfm.py +++ b/yt_dlp/extractor/omnyfm.py @@ -1,12 +1,10 @@ import functools -import json from .common import InfoExtractor from ..utils import ( OnDemandPagedList, clean_html, float_or_none, - get_element_by_id, int_or_none, str_or_none, traverse_obj, @@ -56,7 +54,7 @@ def _real_extract(self, url): page_url = 'https://omny.fm/shows/' + display_id webpage = self._download_webpage(page_url, display_id) - data = json.loads(get_element_by_id('__NEXT_DATA__', webpage)) + data = self._search_nextjs_data(webpage, display_id) org_id = traverse_obj(data, ('props', 'pageProps', 'program', 'OrganizationId', {str_or_none})) playlist_id = traverse_obj(data, ('props', 'pageProps', 'program', 'Id', {str_or_none})) title = traverse_obj(data, ('props', 'pageProps', 'program', 'Name', {str_or_none})) From 74e26f7599daa6c6172cf269900740d0b52c4d8b Mon Sep 17 00:00:00 2001 From: subrat-lima Date: Thu, 19 Sep 2024 14:28:55 +0530 Subject: [PATCH 8/9] [ie/afl] updated AFCVideoIE to extract video attrs using html extract_attributes function --- yt_dlp/extractor/afl.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/afl.py b/yt_dlp/extractor/afl.py index 1228798ad..f59e4124e 100644 --- a/yt_dlp/extractor/afl.py +++ b/yt_dlp/extractor/afl.py @@ -4,6 +4,7 @@ from ..utils import ( extract_attributes, get_element_by_class, + get_element_html_by_id, smuggle_url, str_or_none, traverse_obj, @@ -153,8 +154,9 @@ def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) video_id = self._search_regex(r'"mediaId"\s*:\s*"(\d+)"', webpage, 'video-id') - player_id = self._search_regex(r'data-player-id\s*=\s*"(\w+)"', webpage, 'player-id') + '_default' - account_id = self._search_regex(r'data-account-id\s*=\s*"(\d+)"', webpage, 'account-id') + video_attrs = extract_attributes(get_element_html_by_id('VideoModal', webpage)) + player_id = video_attrs['data-player-id'] + '_default' + account_id = video_attrs['data-account-id'] video_url = f'https://players.brightcove.net/{account_id}/{player_id}/index.html?videoId={video_id}' video_url = smuggle_url(video_url, {'referrer': url}) From d9e0e023b8bd3c7aa85c6b7d438d0e875b8e87e4 Mon Sep 17 00:00:00 2001 From: subrat-lima Date: Thu, 19 Sep 2024 15:39:55 +0530 Subject: [PATCH 9/9] [ie/afl] fixed AFLCFVideoIE incorrect video parsing & split CarltonFCVideoIE Changes made: 1. AFCVideoIE, CarltonFCVideoIE: fixed the issue to find the correct video media id. (thanks to pzhlkj6612a) 2. AFCVideoIE: added support for pages where mediaId is unavailable in webpage 3. CarltonFCVideoIE: split from AFCVideoIE because of point 2 as the variation is specific to AFCVideo 4. Updated test cases --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/afl.py | 87 ++++++++++++++++++++++++--------- 2 files changed, 65 insertions(+), 23 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 4f23317f2..b82163d48 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -79,6 +79,7 @@ AFCVideoIE, AFLPodcastIE, AFLVideoIE, + CarltonFCVideoIE, ) from .afreecatv import ( AfreecaTVCatchStoryIE, diff --git a/yt_dlp/extractor/afl.py b/yt_dlp/extractor/afl.py index f59e4124e..4cb401ac2 100644 --- a/yt_dlp/extractor/afl.py +++ b/yt_dlp/extractor/afl.py @@ -4,6 +4,7 @@ from ..utils import ( extract_attributes, get_element_by_class, + get_element_html_by_attribute, get_element_html_by_id, smuggle_url, str_or_none, @@ -111,40 +112,79 @@ def _real_extract(self, url): class AFCVideoIE(InfoExtractor): IE_NAME = 'afc:video' - _VALID_URL = r'https?://(?:www\.)?(?:afc|carltonfc)\.com.au/video/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?afc\.com.au/video/(?P\d+)' _TESTS = [{ 'url': 'https://www.afc.com.au/video/1657583/girls-academies-be-a-pro?videoId=1657583&modal=true&type=video&publishFrom=1726548621001', - 'md5': '6b52c149ae6566abe4cfc2d24978983d', + 'md5': 'd0f4ec78b5a693d95c975ae3aeed8b2d', 'info_dict': { - 'id': '6362050135112', + 'id': '6362048189112', 'ext': 'mp4', - 'description': 'md5:35897062f9a02043ece73a410bda595c', + 'description': 'md5:5c43f1affe1a0cd8e2192358a49de9cc', 'upload_date': '20240917', - 'duration': 103.92, + 'duration': 50.48, 'tags': 'count:0', 'thumbnail': r're:^https?://.*\.jpg$', - 'title': 'AFLW Jones Radiology Injury Update: R4', + 'title': 'Girls Academies – ‘Be a Pro’', 'uploader_id': '6057984922001', - 'timestamp': 1726558062, - }, - }, { - 'url': 'https://www.carltonfc.com.au/video/1657596/cripps-on-taking-carlton-to-the-next-level?videoId=1657596&modal=true&type=video&publishFrom=1726555500001', - 'md5': 'fb5d909329871aa6d182e520d1627846', - 'info_dict': { - 'id': '6362089476112', - 'ext': 'mp4', - 'description': 'md5:823db447fd9aed2033548e39283d3c0f', - 'upload_date': '20240918', - 'duration': 75.72, - 'tags': 'count:0', - 'thumbnail': r're:^https?://.*\.jpg$', - 'title': 'The Rundown | Impact of fans', - 'uploader_id': '6057984922001', - 'timestamp': 1726631322, + 'timestamp': 1726548942, }, }, { 'url': 'https://www.afc.com.au/video/1586280/se10ep16-the-crows-show?videoId=1586280&modal=true&type=video&publishFrom=1719639000001&tagNames=crowsshowepisode', + 'md5': 'bd9984d62f87b4c2299bb62ffc869189', + 'info_dict': { + 'id': '6355746458112', + 'ext': 'mp4', + 'description': 'md5:4470d107af6e749a8225fd558b98b50b', + 'upload_date': '20240627', + 'duration': 1193.64, + 'tags': 'count:0', + 'thumbnail': r're:^https?://.*\.jpg$', + 'title': 'SE10EP16 - The Crows Show', + 'uploader_id': '6057984922001', + 'timestamp': 1719466601, + }, + }, { + 'url': 'https://www.afc.com.au/video/1634706/jones-radiology-injury-update-r24?videoId=1634706&modal=true&type=video&publishFrom=1724126172001', 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_attrs = extract_attributes(get_element_html_by_id('VideoModal', webpage)) + player_id = video_attrs['data-player-id'] + '_default' + account_id = video_attrs['data-account-id'] + + video_element_html = get_element_html_by_attribute('data-id', display_id, webpage) + if video_element_html is None: + data = self._download_json(f'https://aflapi.afc.com.au/content/aflc-adel/video/en/{display_id}', display_id) + video_id = traverse_obj(data, ('mediaId', {str_or_none})) + else: + video_id = self._search_regex(r'"mediaId"\s*:\s*"(\d+)"', video_element_html, 'video-id', fatal=False) + + video_url = f'https://players.brightcove.net/{account_id}/{player_id}/index.html?videoId={video_id}' + video_url = smuggle_url(video_url, {'referrer': url}) + return self.url_result(video_url, BrightcoveNewIE) + + +class CarltonFCVideoIE(InfoExtractor): + IE_NAME = 'carltonfc:video' + _VALID_URL = r'https?://(?:www\.)?carltonfc\.com.au/video/(?P\d+)' + _TESTS = [{ + 'url': 'https://www.carltonfc.com.au/video/1657596/cripps-on-taking-carlton-to-the-next-level?videoId=1657596&modal=true&type=video&publishFrom=1726555500001', + 'md5': '67916ea9dd28376365184bb3869a1548', + 'info_dict': { + 'id': '6362046715112', + 'ext': 'mp4', + 'description': 'md5:02eeff6576fcd7c33e18e34b1b0ebf56', + 'upload_date': '20240917', + 'duration': 90.44, + 'tags': 'count:0', + 'thumbnail': r're:^https?://.*\.jpg$', + 'title': 'Cripps on taking Carlton to the next level', + 'uploader_id': '6057984922001', + 'timestamp': 1726550622, + }, }, { 'url': 'https://www.carltonfc.com.au/video/1658173/the-rundown-impact-of-fans?videoId=1658173&modal=true&type=video&publishFrom=1726630922001', 'only_matching': True, @@ -153,7 +193,8 @@ class AFCVideoIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - video_id = self._search_regex(r'"mediaId"\s*:\s*"(\d+)"', webpage, 'video-id') + video_tag = get_element_html_by_attribute('data-id', display_id, webpage) + video_id = self._search_regex(r'"mediaId"\s*:\s*"(\d+)"', video_tag, 'video-id') video_attrs = extract_attributes(get_element_html_by_id('VideoModal', webpage)) player_id = video_attrs['data-player-id'] + '_default' account_id = video_attrs['data-account-id']