From dd078970ba1739cfd4fcc798a4b5026cb11c427a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 17 Oct 2021 17:16:05 +0530 Subject: [PATCH] [crunchyroll] Add support for `beta.crunchyroll` URLs and fix series URLs with language code --- yt_dlp/extractor/crunchyroll.py | 56 ++++++++++++++++++++++++++++++++- yt_dlp/extractor/extractors.py | 4 ++- 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py index 256c6943f..fb05415fc 100644 --- a/yt_dlp/extractor/crunchyroll.py +++ b/yt_dlp/extractor/crunchyroll.py @@ -650,7 +650,7 @@ def _real_extract(self, url): class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): IE_NAME = 'crunchyroll:playlist' - _VALID_URL = r'https?://(?:(?Pwww|m)\.)?(?Pcrunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P[\w\-]+))/?(?:\?|$)' + _VALID_URL = r'https?://(?:(?Pwww|m)\.)?(?Pcrunchyroll\.com/(?:\w{1,2}/)?(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P[\w\-]+))/?(?:\?|$)' _TESTS = [{ 'url': 'https://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi', @@ -672,6 +672,9 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): # geo-restricted (US), 18+ maturity wall, non-premium will be available since 2015.11.14 'url': 'http://www.crunchyroll.com/ladies-versus-butlers?skip_wall=1', 'only_matching': True, + }, { + 'url': 'http://www.crunchyroll.com/fr/ladies-versus-butlers', + 'only_matching': True, }] def _real_extract(self, url): @@ -698,3 +701,54 @@ def _real_extract(self, url): 'title': title, 'entries': entries, } + + +class CrunchyrollBetaIE(CrunchyrollBaseIE): + IE_NAME = 'crunchyroll:beta' + _VALID_URL = r'https?://beta\.crunchyroll\.com/(?P(?:\w{1,2}/)?)watch/(?P\w+)/(?P[\w\-]+)/?(?:\?|$)' + _TESTS = [{ + 'url': 'https://beta.crunchyroll.com/watch/GY2P1Q98Y/to-the-future', + 'info_dict': { + 'id': '696363', + 'ext': 'mp4', + 'timestamp': 1459610100, + 'description': 'md5:a022fbec4fbb023d43631032c91ed64b', + 'uploader': 'Toei Animation', + 'title': 'World Trigger Episode 73 – To the Future', + 'upload_date': '20160402', + }, + 'params': {'skip_download': 'm3u8'}, + 'expected_warnings': ['Unable to download XML'] + }] + + def _real_extract(self, url): + lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'internal_id', 'id') + webpage = self._download_webpage(url, display_id) + episode_data = self._parse_json( + self._search_regex(r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'episode data'), + display_id)['content']['byId'][internal_id] + video_id = episode_data['external_id'].split('.')[1] + series_id = episode_data['episode_metadata']['series_slug_title'] + return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id}/{display_id}-{video_id}', + CrunchyrollIE.ie_key(), video_id) + + +class CrunchyrollBetaShowIE(CrunchyrollBaseIE): + IE_NAME = 'crunchyroll:playlist:beta' + _VALID_URL = r'https?://beta\.crunchyroll\.com/(?P(?:\w{1,2}/)?)series/\w+/(?P[\w\-]+)/?(?:\?|$)' + _TESTS = [{ + 'url': 'https://beta.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA', + 'info_dict': { + 'id': 'girl-friend-beta', + 'title': 'Girl Friend BETA', + }, + 'playlist_mincount': 10, + }, { + 'url': 'https://beta.crunchyroll.com/it/series/GY19NQ2QR/Girl-Friend-BETA', + 'only_matching': True, + }] + + def _real_extract(self, url): + lang, series_id = self._match_valid_url(url).group('lang', 'id') + return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id.lower()}', + CrunchyrollShowPlaylistIE.ie_key(), series_id) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 6bc9a2b1e..4c89c5a18 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -298,7 +298,9 @@ from .crooksandliars import CrooksAndLiarsIE from .crunchyroll import ( CrunchyrollIE, - CrunchyrollShowPlaylistIE + CrunchyrollShowPlaylistIE, + CrunchyrollBetaIE, + CrunchyrollBetaShowIE, ) from .cspan import CSpanIE from .ctsnews import CtsNewsIE