Merge cb0aa20d4f into 4a9bc8c363

[ie/afl] updated AFCVideoIE & CarltonFCVideoIE mediaId parser
[ie/omnyfm] added back the InAdvancePagedList function
2024-09-27 00:37:03 +02:00 · 2024-09-20 02:06:28 +05:30 · 2024-09-20 02:00:27 +05:30 · 2024-09-20 01:36:13 +05:30 · 2024-09-20 01:18:31 +05:30 · 2024-09-20 01:07:41 +05:30
2 changed files with 21 additions and 15 deletions
--- a/yt_dlp/extractor/afl.py
+++ b/yt_dlp/extractor/afl.py
@ -15,7 +15,7 @@

 class AFLVideoIE(InfoExtractor):
    IE_NAME = 'afl:video'
-    _VALID_URL = r'https?://(?:www\.)?(?:afl|lions)\.com.au/(?:aflw/)?video/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:afl|lions)\.com\.au/(?:aflw/)?video/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.afl.com.au/aflw/video/1217670/the-w-show-aflws-line-in-the-sand-moment-bonnies-bold-bid',
        'md5': '7000431c2bd3f96eddb5f63273aea83e',
@ -77,7 +77,7 @@ def _real_extract(self, url):

 class AFLPodcastIE(InfoExtractor):
    IE_NAME = 'afl:podcast'
-    _VALID_URL = r'https?://(?:www\.)?(?:afl|carltonfc)\.com.au/(?:aflw/)?podcasts/(?P<id>[\w-]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:afl|carltonfc)\.com\.au/(?:aflw/)?podcasts/(?P<id>[\w-]+)'
    _TESTS = [{
        'url': 'https://www.afl.com.au/podcasts/between-us',
        'md5': '7000431c2bd3f96eddb5f63273aea83e',
@ -112,7 +112,7 @@ def _real_extract(self, url):

 class AFCVideoIE(InfoExtractor):
    IE_NAME = 'afc:video'
-    _VALID_URL = r'https?://(?:www\.)?afc\.com.au/video/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?afc\.com\.au/video/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.afc.com.au/video/1657583/girls-academies-be-a-pro?videoId=1657583&modal=true&type=video&publishFrom=1726548621001',
        'md5': 'd0f4ec78b5a693d95c975ae3aeed8b2d',
@ -156,11 +156,11 @@ def _real_extract(self, url):
        account_id = video_attrs['data-account-id']

        video_element_html = get_element_html_by_attribute('data-id', display_id, webpage)
-        if video_element_html is None:
-            data = self._download_json(f'https://aflapi.afc.com.au/content/aflc-adel/video/en/{display_id}', display_id)
-            video_id = traverse_obj(data, ('mediaId', {str_or_none}))
+        if not video_element_html:
+            video_data = self._download_json(f'https://aflapi.afc.com.au/content/aflc-adel/video/en/{display_id}', display_id)
        else:
-            video_id = self._search_regex(r'"mediaId"\s*:\s*"(\d+)"', video_element_html, 'video-id', fatal=False)
+            video_data = self._search_json(r'data-ui-args\s*=\s*["\']', video_element_html, 'video-id', display_id)
+        video_id = video_data['mediaId']

        video_url = f'https://players.brightcove.net/{account_id}/{player_id}/index.html?videoId={video_id}'
        video_url = smuggle_url(video_url, {'referrer': url})
@ -169,7 +169,7 @@ def _real_extract(self, url):

 class CarltonFCVideoIE(InfoExtractor):
    IE_NAME = 'carltonfc:video'
-    _VALID_URL = r'https?://(?:www\.)?carltonfc\.com.au/video/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?carltonfc\.com\.au/video/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.carltonfc.com.au/video/1657596/cripps-on-taking-carlton-to-the-next-level?videoId=1657596&modal=true&type=video&publishFrom=1726555500001',
        'md5': '67916ea9dd28376365184bb3869a1548',
@ -193,12 +193,14 @@ class CarltonFCVideoIE(InfoExtractor):
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
-        video_tag = get_element_html_by_attribute('data-id', display_id, webpage)
-        video_id = self._search_regex(r'"mediaId"\s*:\s*"(\d+)"', video_tag, 'video-id')
        video_attrs = extract_attributes(get_element_html_by_id('VideoModal', webpage))
        player_id = video_attrs['data-player-id'] + '_default'
        account_id = video_attrs['data-account-id']

+        video_element_html = get_element_html_by_attribute('data-id', display_id, webpage)
+        video_data = self._search_json(r'data-ui-args\s*=\s*["\']', video_element_html, 'video-id', display_id)
+        video_id = video_data['mediaId']
+
        video_url = f'https://players.brightcove.net/{account_id}/{player_id}/index.html?videoId={video_id}'
        video_url = smuggle_url(video_url, {'referrer': url})
        return self.url_result(video_url, BrightcoveNewIE)
--- a/yt_dlp/extractor/omnyfm.py
+++ b/yt_dlp/extractor/omnyfm.py
@ -1,8 +1,9 @@
 import functools
+import math

 from .common import InfoExtractor
 from ..utils import (
-    OnDemandPagedList,
+    InAdvancePagedList,
    clean_html,
    float_or_none,
    int_or_none,
@ -15,8 +16,8 @@

 class OmnyFMShowIE(InfoExtractor):
    IE_NAME = 'omnyfm:show'
-    _VALID_URL = r'https?://omny\.fm/shows/(?P<id>[\w-]+)'
-    _EMBED_REGEX = [r'<iframe[^>]+?src=(?:["\'])(?P<url>https?://omny\.fm/shows/.+?)\1']
+    _VALID_URL = r'https?://omny\.fm/shows/(?P<id>[^/]+)'
+    _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://omny\.fm/shows/.+?)\1']
    _PAGE_SIZE = 10
    _TESTS = [{
        'url': 'https://omny.fm/shows/league-leaders',
@ -57,8 +58,11 @@ def _real_extract(self, url):
        data = self._search_nextjs_data(webpage, display_id)
        org_id = traverse_obj(data, ('props', 'pageProps', 'program', 'OrganizationId', {str_or_none}))
        playlist_id = traverse_obj(data, ('props', 'pageProps', 'program', 'Id', {str_or_none}))
+        playlist_count = traverse_obj(data, ('props', 'pageProps', 'program', 'DefaultPlaylist', 'NumberOfClips', {int_or_none}))
        title = traverse_obj(data, ('props', 'pageProps', 'program', 'Name', {str_or_none}))
        first_page_data = traverse_obj(data, ('props', 'pageProps', 'clips', {dict}))
+        total_pages = math.ceil(playlist_count / self._PAGE_SIZE)

-        entries = OnDemandPagedList(functools.partial(self._entries, org_id, playlist_id, first_page_data), self._PAGE_SIZE)
-        return self.playlist_result(entries, playlist_id, title)
+        return self.playlist_result(InAdvancePagedList(
+            functools.partial(self._entries, org_id, playlist_id, first_page_data),
+            total_pages, self._PAGE_SIZE), playlist_id, title)
Author	SHA1	Message	Date
Subrat Lima	4b6e27fe72	Merge `cb0aa20d4f` into `4a9bc8c363`	2024-09-20 02:06:28 +05:30
subrat-lima	cb0aa20d4f	[ie/afl] updated AFCVideoIE & CarltonFCVideoIE mediaId parser	2024-09-20 02:00:27 +05:30
subrat-lima	2c49f52c04	[ie/omnyfm] added back the InAdvancePagedList function	2024-09-20 01:36:13 +05:30
subrat-lima	4ff1288758	[ie/omnyfm] updated _VALID_URL regex pattern for id	2024-09-20 01:18:31 +05:30
subrat-lima	93e65f14dc	[ie/afl] updated _VALID_URL regex to properly escape dot character	2024-09-20 01:07:41 +05:30
subrat-lima	d40dbdc50b	[ie/omnyfm] updated _EMBED_REGEX	2024-09-20 01:02:07 +05:30