From 88a99c87b680ae59002534a517e191f46c42cbd4 Mon Sep 17 00:00:00 2001 From: Midnight Veil Date: Tue, 10 Oct 2023 04:55:46 +1100 Subject: [PATCH] [ie/tenplay] Add support for seasons (#7939) Closes #7744 Authored by: midnightveil --- yt_dlp/extractor/_extractors.py | 5 ++- yt_dlp/extractor/tenplay.py | 58 +++++++++++++++++++++++++++++++-- 2 files changed, 60 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 55c3c2f8e8..6717a6039f 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1992,7 +1992,10 @@ WeTvSeriesIE, ) from .tennistv import TennisTVIE -from .tenplay import TenPlayIE +from .tenplay import ( + TenPlayIE, + TenPlaySeasonIE, +) from .testurl import TestURLIE from .tf1 import TF1IE from .tfo import TFOIE diff --git a/yt_dlp/extractor/tenplay.py b/yt_dlp/extractor/tenplay.py index c7097cf025..7ce7cbf849 100644 --- a/yt_dlp/extractor/tenplay.py +++ b/yt_dlp/extractor/tenplay.py @@ -1,9 +1,11 @@ -from datetime import datetime import base64 +import functools +import itertools +from datetime import datetime from .common import InfoExtractor from ..networking import HEADRequest -from ..utils import int_or_none, urlencode_postdata +from ..utils import int_or_none, traverse_obj, urlencode_postdata, urljoin class TenPlayIE(InfoExtractor): @@ -113,3 +115,55 @@ def _real_extract(self, url): 'uploader': 'Channel 10', 'uploader_id': '2199827728001', } + + +class TenPlaySeasonIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?P[^/?#]+)/episodes/(?P[^/?#]+)/?(?:$|[?#])' + _TESTS = [{ + 'url': 'https://10play.com.au/masterchef/episodes/season-14', + 'info_dict': { + 'title': 'Season 14', + 'id': 'MjMyOTIy', + }, + 'playlist_mincount': 64, + }, { + 'url': 'https://10play.com.au/the-bold-and-the-beautiful-fast-tracked/episodes/season-2022', + 'info_dict': { + 'title': 'Season 2022', + 'id': 'Mjc0OTIw', + }, + 'playlist_mincount': 256, + }] + + def _entries(self, load_more_url, display_id=None): + skip_ids = [] + for page in itertools.count(1): + episodes_carousel = self._download_json( + load_more_url, display_id, query={'skipIds[]': skip_ids}, + note=f'Fetching episodes page {page}') + + episodes_chunk = episodes_carousel['items'] + skip_ids.extend(ep['id'] for ep in episodes_chunk) + + for ep in episodes_chunk: + yield ep['cardLink'] + if not episodes_carousel['hasMore']: + break + + def _real_extract(self, url): + show, season = self._match_valid_url(url).group('show', 'season') + season_info = self._download_json( + f'https://10play.com.au/api/shows/{show}/episodes/{season}', f'{show}/{season}') + + episodes_carousel = traverse_obj(season_info, ( + 'content', 0, 'components', ( + lambda _, v: v['title'].lower() == 'episodes', + (..., {dict}), + )), get_all=False) or {} + + playlist_id = episodes_carousel['tpId'] + + return self.playlist_from_matches( + self._entries(urljoin(url, episodes_carousel['loadMoreUrl']), playlist_id), + playlist_id, traverse_obj(season_info, ('content', 0, 'title', {str})), + getter=functools.partial(urljoin, url))