yt-dlp/yt_dlp/extractor/crunchyroll.py

import base64
import json
import re
import urllib.request
import xml.etree.ElementTree
import zlib
from hashlib import sha1
from math import floor, pow, sqrt

from .common import InfoExtractor
from .vrv import VRVBaseIE
from ..aes import aes_cbc_decrypt
from ..compat import (
    compat_b64decode,
    compat_etree_fromstring,
    compat_str,
    compat_urllib_parse_urlencode,
    compat_urlparse,
)
from ..utils import (
    ExtractorError,
    bytes_to_intlist,
    extract_attributes,
    float_or_none,
    format_field,
    int_or_none,
    intlist_to_bytes,
    join_nonempty,
    lowercase_escape,
    merge_dicts,
    parse_iso8601,
    qualities,
    remove_end,
    sanitized_Request,
    traverse_obj,
    try_get,
    xpath_text,
)


class CrunchyrollBaseIE(InfoExtractor):
    _LOGIN_URL = 'https://www.crunchyroll.com/welcome/login'
    _API_BASE = 'https://api.crunchyroll.com'
    _NETRC_MACHINE = 'crunchyroll'

    def _call_rpc_api(self, method, video_id, note=None, data=None):
        data = data or {}
        data['req'] = 'RpcApi' + method
        data = compat_urllib_parse_urlencode(data).encode('utf-8')
        return self._download_xml(
            'https://www.crunchyroll.com/xml/',
            video_id, note, fatal=False, data=data, headers={
                'Content-Type': 'application/x-www-form-urlencoded',
            })

    def _perform_login(self, username, password):
        if self._get_cookies(self._LOGIN_URL).get('etp_rt'):
            return

        upsell_response = self._download_json(
            f'{self._API_BASE}/get_upsell_data.0.json', None, 'Getting session id',
            query={
                'sess_id': 1,
                'device_id': 'whatvalueshouldbeforweb',
                'device_type': 'com.crunchyroll.static',
                'access_token': 'giKq5eY27ny3cqz',
                'referer': self._LOGIN_URL
            })
        if upsell_response['code'] != 'ok':
            raise ExtractorError('Could not get session id')
        session_id = upsell_response['data']['session_id']

        login_response = self._download_json(
            f'{self._API_BASE}/login.1.json', None, 'Logging in',
            data=compat_urllib_parse_urlencode({
                'account': username,
                'password': password,
                'session_id': session_id
            }).encode('ascii'))
        if login_response['code'] != 'ok':
            raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True)
        if not self._get_cookies(self._LOGIN_URL).get('etp_rt'):
            raise ExtractorError('Login succeeded but did not set etp_rt cookie')

    # Beta-specific, but needed for redirects
    def _get_beta_embedded_json(self, webpage, display_id):
        initial_state = self._parse_json(self._search_regex(
            r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'initial state'), display_id)
        app_config = self._parse_json(self._search_regex(
            r'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage, 'app config'), display_id)
        return initial_state, app_config

    def _redirect_to_beta(self, webpage, iekey, video_id):
        if not self._get_cookies(self._LOGIN_URL).get('etp_rt'):
            raise ExtractorError('Received a beta page from non-beta url when not logged in.')
        initial_state, app_config = self._get_beta_embedded_json(webpage, video_id)
        url = app_config['baseSiteUrl'] + initial_state['router']['locations']['current']['pathname']
        self.to_screen(f'{video_id}: Redirected to beta site - {url}')
        return self.url_result(f'{url}', iekey, video_id)

    @staticmethod
    def _add_skip_wall(url):
        parsed_url = compat_urlparse.urlparse(url)
        qs = compat_urlparse.parse_qs(parsed_url.query)
        # Always force skip_wall to bypass maturity wall, namely 18+ confirmation message:
        # > This content may be inappropriate for some people.
        # > Are you sure you want to continue?
        # since it's not disabled by default in crunchyroll account's settings.
        # See https://github.com/ytdl-org/youtube-dl/issues/7202.
        qs['skip_wall'] = ['1']
        return compat_urlparse.urlunparse(
            parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))


class CrunchyrollIE(CrunchyrollBaseIE, VRVBaseIE):
    IE_NAME = 'crunchyroll'
    _VALID_URL = r'''(?x)
        https?://(?:(?P<prefix>www|m)\.)?(?P<url>
            crunchyroll\.(?:com|fr)/(?:
                media(?:-|/\?id=)|
                (?!series/|watch/)(?:[^/]+/){1,2}[^/?&#]*?
            )(?P<id>[0-9]+)
        )(?:[/?&#]|$)'''

    _TESTS = [{
        'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
        'info_dict': {
            'id': '645513',
            'ext': 'mp4',
            'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
            'description': 'md5:2d17137920c64f2f49981a7797d275ef',
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'Yomiuri Telecasting Corporation (YTV)',
            'upload_date': '20131013',
            'url': 're:(?!.*&amp)',
        },
        'params': {
            # rtmp
            'skip_download': True,
        },
        'skip': 'Video gone',
    }, {
        'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1',
        'info_dict': {
            'id': '589804',
            'ext': 'flv',
            'title': 'Culture Japan Episode 1 – Rebuilding Japan after the 3.11',
            'description': 'md5:2fbc01f90b87e8e9137296f37b461c12',
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'Danny Choo Network',
            'upload_date': '20120213',
        },
        'params': {
            # rtmp
            'skip_download': True,
        },
        'skip': 'Video gone',
    }, {
        'url': 'http://www.crunchyroll.com/rezero-starting-life-in-another-world-/episode-5-the-morning-of-our-promise-is-still-distant-702409',
        'info_dict': {
            'id': '702409',
            'ext': 'mp4',
            'title': compat_str,
            'description': compat_str,
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'Re:Zero Partners',
            'timestamp': 1462098900,
            'upload_date': '20160501',
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
    }, {
        'url': 'http://www.crunchyroll.com/konosuba-gods-blessing-on-this-wonderful-world/episode-1-give-me-deliverance-from-this-judicial-injustice-727589',
        'info_dict': {
            'id': '727589',
            'ext': 'mp4',
            'title': compat_str,
            'description': compat_str,
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'Kadokawa Pictures Inc.',
            'timestamp': 1484130900,
            'upload_date': '20170111',
            'series': compat_str,
            'season': "KONOSUBA -God's blessing on this wonderful world! 2",
            'season_number': 2,
            'episode': 'Give Me Deliverance From This Judicial Injustice!',
            'episode_number': 1,
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
    }, {
        'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
        'only_matching': True,
    }, {
        # geo-restricted (US), 18+ maturity wall, non-premium available
        'url': 'http://www.crunchyroll.com/cosplay-complex-ova/episode-1-the-birth-of-the-cosplay-club-565617',
        'only_matching': True,
    }, {
        # A description with double quotes
        'url': 'http://www.crunchyroll.com/11eyes/episode-1-piros-jszaka-red-night-535080',
        'info_dict': {
            'id': '535080',
            'ext': 'mp4',
            'title': compat_str,
            'description': compat_str,
            'uploader': 'Marvelous AQL Inc.',
            'timestamp': 1255512600,
            'upload_date': '20091014',
        },
        'params': {
            # Just test metadata extraction
            'skip_download': True,
        },
    }, {
        # make sure we can extract an uploader name that's not a link
        'url': 'http://www.crunchyroll.com/hakuoki-reimeiroku/episode-1-dawn-of-the-divine-warriors-606899',
        'info_dict': {
            'id': '606899',
            'ext': 'mp4',
            'title': 'Hakuoki Reimeiroku Episode 1 – Dawn of the Divine Warriors',
            'description': 'Ryunosuke was left to die, but Serizawa-san asked him a simple question "Do you want to live?"',
            'uploader': 'Geneon Entertainment',
            'upload_date': '20120717',
        },
        'params': {
            # just test metadata extraction
            'skip_download': True,
        },
        'skip': 'Video gone',
    }, {
        # A video with a vastly different season name compared to the series name
        'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532',
        'info_dict': {
            'id': '590532',
            'ext': 'mp4',
            'title': compat_str,
            'description': compat_str,
            'uploader': 'TV TOKYO',
            'timestamp': 1330956000,
            'upload_date': '20120305',
            'series': 'Nyarko-san: Another Crawling Chaos',
            'season': 'Haiyoru! Nyaruani (ONA)',
        },
        'params': {
            # Just test metadata extraction
            'skip_download': True,
        },
    }, {
        'url': 'http://www.crunchyroll.com/media-723735',
        'only_matching': True,
    }, {
        'url': 'https://www.crunchyroll.com/en-gb/mob-psycho-100/episode-2-urban-legends-encountering-rumors-780921',
        'only_matching': True,
    }]

    _FORMAT_IDS = {
        '360': ('60', '106'),
        '480': ('61', '106'),
        '720': ('62', '106'),
        '1080': ('80', '108'),
    }

    def _download_webpage(self, url_or_request, *args, **kwargs):
        request = (url_or_request if isinstance(url_or_request, urllib.request.Request)
                   else sanitized_Request(url_or_request))
        # Accept-Language must be set explicitly to accept any language to avoid issues
        # similar to https://github.com/ytdl-org/youtube-dl/issues/6797.
        # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
        # should be imposed or not (from what I can see it just takes the first language
        # ignoring the priority and requires it to correspond the IP). By the way this causes
        # Crunchyroll to not work in georestriction cases in some browsers that don't place
        # the locale lang first in header. However allowing any language seems to workaround the issue.
        request.add_header('Accept-Language', '*')
        return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)

    def _decrypt_subtitles(self, data, iv, id):
        data = bytes_to_intlist(compat_b64decode(data))
        iv = bytes_to_intlist(compat_b64decode(iv))
        id = int(id)

        def obfuscate_key_aux(count, modulo, start):
            output = list(start)
            for _ in range(count):
                output.append(output[-1] + output[-2])
            # cut off start values
            output = output[2:]
            output = list(map(lambda x: x % modulo + 33, output))
            return output

        def obfuscate_key(key):
            num1 = int(floor(pow(2, 25) * sqrt(6.9)))
            num2 = (num1 ^ key) << 5
            num3 = key ^ num1
            num4 = num3 ^ (num3 >> 3) ^ num2
            prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2)))
            shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest())
            # Extend 160 Bit hash to 256 Bit
            return shaHash + [0] * 12

        key = obfuscate_key(id)

        decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
        return zlib.decompress(decrypted_data)

    def _convert_subtitles_to_srt(self, sub_root):
        output = ''

        for i, event in enumerate(sub_root.findall('./events/event'), 1):
            start = event.attrib['start'].replace('.', ',')
            end = event.attrib['end'].replace('.', ',')
            text = event.attrib['text'].replace('\\N', '\n')
            output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
        return output

    def _convert_subtitles_to_ass(self, sub_root):
        output = ''

        def ass_bool(strvalue):
            assvalue = '0'
            if strvalue == '1':
                assvalue = '-1'
            return assvalue

        output = '[Script Info]\n'
        output += 'Title: %s\n' % sub_root.attrib['title']
        output += 'ScriptType: v4.00+\n'
        output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style']
        output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x']
        output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y']
        output += """
[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
"""
        for style in sub_root.findall('./styles/style'):
            output += 'Style: ' + style.attrib['name']
            output += ',' + style.attrib['font_name']
            output += ',' + style.attrib['font_size']
            output += ',' + style.attrib['primary_colour']
            output += ',' + style.attrib['secondary_colour']
            output += ',' + style.attrib['outline_colour']
            output += ',' + style.attrib['back_colour']
            output += ',' + ass_bool(style.attrib['bold'])
            output += ',' + ass_bool(style.attrib['italic'])
            output += ',' + ass_bool(style.attrib['underline'])
            output += ',' + ass_bool(style.attrib['strikeout'])
            output += ',' + style.attrib['scale_x']
            output += ',' + style.attrib['scale_y']
            output += ',' + style.attrib['spacing']
            output += ',' + style.attrib['angle']
            output += ',' + style.attrib['border_style']
            output += ',' + style.attrib['outline']
            output += ',' + style.attrib['shadow']
            output += ',' + style.attrib['alignment']
            output += ',' + style.attrib['margin_l']
            output += ',' + style.attrib['margin_r']
            output += ',' + style.attrib['margin_v']
            output += ',' + style.attrib['encoding']
            output += '\n'

        output += """
[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
"""
        for event in sub_root.findall('./events/event'):
            output += 'Dialogue: 0'
            output += ',' + event.attrib['start']
            output += ',' + event.attrib['end']
            output += ',' + event.attrib['style']
            output += ',' + event.attrib['name']
            output += ',' + event.attrib['margin_l']
            output += ',' + event.attrib['margin_r']
            output += ',' + event.attrib['margin_v']
            output += ',' + event.attrib['effect']
            output += ',' + event.attrib['text']
            output += '\n'

        return output

    def _extract_subtitles(self, subtitle):
        sub_root = compat_etree_fromstring(subtitle)
        return [{
            'ext': 'srt',
            'data': self._convert_subtitles_to_srt(sub_root),
        }, {
            'ext': 'ass',
            'data': self._convert_subtitles_to_ass(sub_root),
        }]

    def _get_subtitles(self, video_id, webpage):
        subtitles = {}
        for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage):
            sub_doc = self._call_rpc_api(
                'Subtitle_GetXml', video_id,
                'Downloading subtitles for ' + sub_name, data={
                    'subtitle_script_id': sub_id,
                })
            if not isinstance(sub_doc, xml.etree.ElementTree.Element):
                continue
            sid = sub_doc.get('id')
            iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
            data = xpath_text(sub_doc, 'data', 'subtitle data')
            if not sid or not iv or not data:
                continue
            subtitle = self._decrypt_subtitles(data, iv, sid).decode('utf-8')
            lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
            if not lang_code:
                continue
            subtitles[lang_code] = self._extract_subtitles(subtitle)
        return subtitles

    def _real_extract(self, url):
        mobj = self._match_valid_url(url)
        video_id = mobj.group('id')

        if mobj.group('prefix') == 'm':
            mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage')
            webpage_url = self._search_regex(r'<link rel="canonical" href="([^"]+)" />', mobile_webpage, 'webpage_url')
        else:
            webpage_url = 'http://www.' + mobj.group('url')

        webpage = self._download_webpage(
            self._add_skip_wall(webpage_url), video_id,
            headers=self.geo_verification_headers())
        if re.search(r'<div id="preload-data">', webpage):
            return self._redirect_to_beta(webpage, CrunchyrollBetaIE.ie_key(), video_id)
        note_m = self._html_search_regex(
            r'<div class="showmedia-trailer-notice">(.+?)</div>',
            webpage, 'trailer-notice', default='')
        if note_m:
            raise ExtractorError(note_m, expected=True)

        mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P<msg>{.+?})\]\)', webpage)
        if mobj:
            msg = json.loads(mobj.group('msg'))
            if msg.get('type') == 'error':
                raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True)

        if 'To view this, please log in to verify you are 18 or older.' in webpage:
            self.raise_login_required()

        media = self._parse_json(self._search_regex(
            r'vilos\.config\.media\s*=\s*({.+?});',
            webpage, 'vilos media', default='{}'), video_id)
        media_metadata = media.get('metadata') or {}

        language = self._search_regex(
            r'(?:vilos\.config\.player\.language|LOCALE)\s*=\s*(["\'])(?P<lang>(?:(?!\1).)+)\1',
            webpage, 'language', default=None, group='lang')

        video_title = self._html_search_regex(
            (r'(?s)<h1[^>]*>((?:(?!<h1).)*?<(?:span[^>]+itemprop=["\']title["\']|meta[^>]+itemprop=["\']position["\'])[^>]*>(?:(?!<h1).)+?)</h1>',
             r'<title>(.+?),\s+-\s+.+? Crunchyroll'),
            webpage, 'video_title', default=None)
        if not video_title:
            video_title = re.sub(r'^Watch\s+', '', self._og_search_description(webpage))
        video_title = re.sub(r' {2,}', ' ', video_title)
        video_description = (self._parse_json(self._html_search_regex(
            r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
            webpage, 'description', default='{}'), video_id) or media_metadata).get('description')

        thumbnails = []
        thumbnail_url = (self._parse_json(self._html_search_regex(
            r'<script type="application\/ld\+json">\n\s*(.+?)<\/script>',
            webpage, 'thumbnail_url', default='{}'), video_id)).get('image')
        if thumbnail_url:
            thumbnails.append({
                'url': thumbnail_url,
                'width': 1920,
                'height': 1080
            })

        if video_description:
            video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
        video_uploader = self._html_search_regex(
            # try looking for both an uploader that's a link and one that's not
            [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
            webpage, 'video_uploader', default=False)

        requested_languages = self._configuration_arg('language')
        requested_hardsubs = [('' if val == 'none' else val) for val in self._configuration_arg('hardsub')]
        language_preference = qualities((requested_languages or [language or ''])[::-1])
        hardsub_preference = qualities((requested_hardsubs or ['', language or ''])[::-1])

        formats = []
        for stream in media.get('streams', []):
            audio_lang = stream.get('audio_lang') or ''
            hardsub_lang = stream.get('hardsub_lang') or ''
            if (requested_languages and audio_lang.lower() not in requested_languages
                    or requested_hardsubs and hardsub_lang.lower() not in requested_hardsubs):
                continue
            vrv_formats = self._extract_vrv_formats(
                stream.get('url'), video_id, stream.get('format'),
                audio_lang, hardsub_lang)
            for f in vrv_formats:
                f['language_preference'] = language_preference(audio_lang)
                f['quality'] = hardsub_preference(hardsub_lang)
            formats.extend(vrv_formats)
        if not formats:
            available_fmts = []
            for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
                attrs = extract_attributes(a)
                href = attrs.get('href')
                if href and '/freetrial' in href:
                    continue
                available_fmts.append(fmt)
            if not available_fmts:
                for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
                    available_fmts = re.findall(p, webpage)
                    if available_fmts:
                        break
            if not available_fmts:
                available_fmts = self._FORMAT_IDS.keys()
            video_encode_ids = []

            for fmt in available_fmts:
                stream_quality, stream_format = self._FORMAT_IDS[fmt]
                video_format = fmt + 'p'
                stream_infos = []
                streamdata = self._call_rpc_api(
                    'VideoPlayer_GetStandardConfig', video_id,
                    'Downloading media info for %s' % video_format, data={
                        'media_id': video_id,
                        'video_format': stream_format,
                        'video_quality': stream_quality,
                        'current_page': url,
                    })
                if isinstance(streamdata, xml.etree.ElementTree.Element):
                    stream_info = streamdata.find('./{default}preload/stream_info')
                    if stream_info is not None:
                        stream_infos.append(stream_info)
                stream_info = self._call_rpc_api(
                    'VideoEncode_GetStreamInfo', video_id,
                    'Downloading stream info for %s' % video_format, data={
                        'media_id': video_id,
                        'video_format': stream_format,
                        'video_encode_quality': stream_quality,
                    })
                if isinstance(stream_info, xml.etree.ElementTree.Element):
                    stream_infos.append(stream_info)
                for stream_info in stream_infos:
                    video_encode_id = xpath_text(stream_info, './video_encode_id')
                    if video_encode_id in video_encode_ids:
                        continue
                    video_encode_ids.append(video_encode_id)

                    video_file = xpath_text(stream_info, './file')
                    if not video_file:
                        continue
                    if video_file.startswith('http'):
                        formats.extend(self._extract_m3u8_formats(
                            video_file, video_id, 'mp4', entry_protocol='m3u8_native',
                            m3u8_id='hls', fatal=False))
                        continue

                    video_url = xpath_text(stream_info, './host')
                    if not video_url:
                        continue
                    metadata = stream_info.find('./metadata')
                    format_info = {
                        'format': video_format,
                        'height': int_or_none(xpath_text(metadata, './height')),
                        'width': int_or_none(xpath_text(metadata, './width')),
                    }

                    if '.fplive.net/' in video_url:
                        video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
                        parsed_video_url = compat_urlparse.urlparse(video_url)
                        direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
                            netloc='v.lvlt.crcdn.net',
                            path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
                        if self._is_valid_url(direct_video_url, video_id, video_format):
                            format_info.update({
                                'format_id': 'http-' + video_format,
                                'url': direct_video_url,
                            })
                            formats.append(format_info)
                            continue

                    format_info.update({
                        'format_id': 'rtmp-' + video_format,
                        'url': video_url,
                        'play_path': video_file,
                        'ext': 'flv',
                    })
                    formats.append(format_info)
        self._sort_formats(formats)

        metadata = self._call_rpc_api(
            'VideoPlayer_GetMediaMetadata', video_id,
            note='Downloading media info', data={
                'media_id': video_id,
            })

        subtitles = {}
        for subtitle in media.get('subtitles', []):
            subtitle_url = subtitle.get('url')
            if not subtitle_url:
                continue
            subtitles.setdefault(subtitle.get('language', 'enUS'), []).append({
                'url': subtitle_url,
                'ext': subtitle.get('format', 'ass'),
            })
        if not subtitles:
            subtitles = self.extract_subtitles(video_id, webpage)

        # webpage provide more accurate data than series_title from XML
        series = self._html_search_regex(
            r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
            webpage, 'series', fatal=False)

        season = episode = episode_number = duration = None

        if isinstance(metadata, xml.etree.ElementTree.Element):
            season = xpath_text(metadata, 'series_title')
            episode = xpath_text(metadata, 'episode_title')
            episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
            duration = float_or_none(media_metadata.get('duration'), 1000)

        if not episode:
            episode = media_metadata.get('title')
        if not episode_number:
            episode_number = int_or_none(media_metadata.get('episode_number'))
        thumbnail_url = try_get(media, lambda x: x['thumbnail']['url'])
        if thumbnail_url:
            thumbnails.append({
                'url': thumbnail_url,
                'width': 640,
                'height': 360
            })

        season_number = int_or_none(self._search_regex(
            r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
            webpage, 'season number', default=None))

        info = self._search_json_ld(webpage, video_id, default={})

        return merge_dicts({
            'id': video_id,
            'title': video_title,
            'description': video_description,
            'duration': duration,
            'thumbnails': thumbnails,
            'uploader': video_uploader,
            'series': series,
            'season': season,
            'season_number': season_number,
            'episode': episode,
            'episode_number': episode_number,
            'subtitles': subtitles,
            'formats': formats,
        }, info)


class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
    IE_NAME = 'crunchyroll:playlist'
    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:\w{2}(?:-\w{2})?/)?(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'

    _TESTS = [{
        'url': 'https://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
        'info_dict': {
            'id': 'a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
            'title': 'A Bridge to the Starry Skies - Hoshizora e Kakaru Hashi'
        },
        'playlist_count': 13,
    }, {
        # geo-restricted (US), 18+ maturity wall, non-premium available
        'url': 'http://www.crunchyroll.com/cosplay-complex-ova',
        'info_dict': {
            'id': 'cosplay-complex-ova',
            'title': 'Cosplay Complex OVA'
        },
        'playlist_count': 3,
        'skip': 'Georestricted',
    }, {
        # geo-restricted (US), 18+ maturity wall, non-premium will be available since 2015.11.14
        'url': 'http://www.crunchyroll.com/ladies-versus-butlers?skip_wall=1',
        'only_matching': True,
    }, {
        'url': 'http://www.crunchyroll.com/fr/ladies-versus-butlers',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        show_id = self._match_id(url)

        webpage = self._download_webpage(
            # https:// gives a 403, but http:// does not
            self._add_skip_wall(url).replace('https://', 'http://'), show_id,
            headers=self.geo_verification_headers())
        if re.search(r'<div id="preload-data">', webpage):
            return self._redirect_to_beta(webpage, CrunchyrollBetaShowIE.ie_key(), show_id)
        title = self._html_search_meta('name', webpage, default=None)

        episode_re = r'<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"'
        season_re = r'<a [^>]+season-dropdown[^>]+>([^<]+)'
        paths = re.findall(f'(?s){episode_re}|{season_re}', webpage)

        entries, current_season = [], None
        for ep_id, ep, season in paths:
            if season:
                current_season = season
                continue
            entries.append(self.url_result(
                f'http://www.crunchyroll.com{ep}', CrunchyrollIE.ie_key(), ep_id, season=current_season))

        return {
            '_type': 'playlist',
            'id': show_id,
            'title': title,
            'entries': reversed(entries),
        }


class CrunchyrollBetaBaseIE(CrunchyrollBaseIE):
    params = None

    def _get_params(self, lang):
        if not CrunchyrollBetaBaseIE.params:
            if self._get_cookies(f'https://beta.crunchyroll.com/{lang}').get('etp_rt'):
                grant_type, key = 'etp_rt_cookie', 'accountAuthClientId'
            else:
                grant_type, key = 'client_id', 'anonClientId'

            initial_state, app_config = self._get_beta_embedded_json(self._download_webpage(
                f'https://beta.crunchyroll.com/{lang}', None, note='Retrieving main page'), None)
            api_domain = app_config['cxApiParams']['apiDomain']

            auth_response = self._download_json(
                f'{api_domain}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
                headers={
                    'Authorization': 'Basic ' + str(base64.b64encode(('%s:' % app_config['cxApiParams'][key]).encode('ascii')), 'ascii')
                }, data=f'grant_type={grant_type}'.encode('ascii'))
            policy_response = self._download_json(
                f'{api_domain}/index/v2', None, note='Retrieving signed policy',
                headers={
                    'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']
                })
            cms = traverse_obj(policy_response, 'cms_beta', 'cms')
            bucket = cms['bucket']
            params = {
                'Policy': cms['policy'],
                'Signature': cms['signature'],
                'Key-Pair-Id': cms['key_pair_id']
            }
            locale = traverse_obj(initial_state, ('localization', 'locale'))
            if locale:
                params['locale'] = locale
            CrunchyrollBetaBaseIE.params = (api_domain, bucket, params)
        return CrunchyrollBetaBaseIE.params


class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):
    IE_NAME = 'crunchyroll:beta'
    _VALID_URL = r'''(?x)
        https?://beta\.crunchyroll\.com/
        (?P<lang>(?:\w{2}(?:-\w{2})?/)?)
        watch/(?P<id>\w+)
        (?:/(?P<display_id>[\w-]+))?/?(?:[?#]|$)'''
    _TESTS = [{
        'url': 'https://beta.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
        'info_dict': {
            'id': 'GY2P1Q98Y',
            'ext': 'mp4',
            'duration': 1380.241,
            'timestamp': 1459632600,
            'description': 'md5:a022fbec4fbb023d43631032c91ed64b',
            'title': 'World Trigger Episode 73 – To the Future',
            'upload_date': '20160402',
            'series': 'World Trigger',
            'series_id': 'GR757DMKY',
            'season': 'World Trigger',
            'season_id': 'GR9P39NJ6',
            'season_number': 1,
            'episode': 'To the Future',
            'episode_number': 73,
            'thumbnail': r're:^https://beta.crunchyroll.com/imgsrv/.*\.jpeg$',
        },
        'params': {'skip_download': 'm3u8'},
    }, {
        'url': 'https://beta.crunchyroll.com/watch/GY2P1Q98Y',
        'only_matching': True,
    }, {
        'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id')
        api_domain, bucket, params = self._get_params(lang)

        episode_response = self._download_json(
            f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id,
            note='Retrieving episode metadata', query=params)
        if episode_response.get('is_premium_only') and not episode_response.get('playback'):
            raise ExtractorError('This video is for premium members only.', expected=True)

        stream_response = self._download_json(
            f'{api_domain}{episode_response["__links__"]["streams"]["href"]}', display_id,
            note='Retrieving stream info', query=params)
        get_streams = lambda name: (traverse_obj(stream_response, name) or {}).items()

        requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
        hardsub_preference = qualities(requested_hardsubs[::-1])
        requested_formats = self._configuration_arg('format') or ['adaptive_hls']

        formats = []
        for stream_type, streams in get_streams('streams'):
            if stream_type not in requested_formats:
                continue
            for stream in streams.values():
                hardsub_lang = stream.get('hardsub_locale') or ''
                if hardsub_lang.lower() not in requested_hardsubs:
                    continue
                format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s'))
                if not stream.get('url'):
                    continue
                if stream_type.endswith('hls'):
                    adaptive_formats = self._extract_m3u8_formats(
                        stream['url'], display_id, 'mp4', m3u8_id=format_id,
                        fatal=False, note=f'Downloading {format_id} HLS manifest')
                elif stream_type.endswith('dash'):
                    adaptive_formats = self._extract_mpd_formats(
                        stream['url'], display_id, mpd_id=format_id,
                        fatal=False, note=f'Downloading {format_id} MPD manifest')
                for f in adaptive_formats:
                    if f.get('acodec') != 'none':
                        f['language'] = stream_response.get('audio_locale')
                    f['quality'] = hardsub_preference(hardsub_lang.lower())
                formats.extend(adaptive_formats)
        self._sort_formats(formats)

        return {
            'id': internal_id,
            'title': '%s Episode %s – %s' % (
                episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')),
            'description': try_get(episode_response, lambda x: x['description'].replace(r'\r\n', '\n')),
            'duration': float_or_none(episode_response.get('duration_ms'), 1000),
            'timestamp': parse_iso8601(episode_response.get('upload_date')),
            'series': episode_response.get('series_title'),
            'series_id': episode_response.get('series_id'),
            'season': episode_response.get('season_title'),
            'season_id': episode_response.get('season_id'),
            'season_number': episode_response.get('season_number'),
            'episode': episode_response.get('title'),
            'episode_number': episode_response.get('sequence_number'),
            'formats': formats,
            'thumbnails': [{
                'url': thumb.get('source'),
                'width': thumb.get('width'),
                'height': thumb.get('height'),
            } for thumb in traverse_obj(episode_response, ('images', 'thumbnail', ..., ...)) or []],
            'subtitles': {
                lang: [{
                    'url': subtitle_data.get('url'),
                    'ext': subtitle_data.get('format')
                }] for lang, subtitle_data in get_streams('subtitles')
            },
        }


class CrunchyrollBetaShowIE(CrunchyrollBetaBaseIE):
    IE_NAME = 'crunchyroll:playlist:beta'
    _VALID_URL = r'''(?x)
        https?://beta\.crunchyroll\.com/
        (?P<lang>(?:\w{2}(?:-\w{2})?/)?)
        series/(?P<id>\w+)
        (?:/(?P<display_id>[\w-]+))?/?(?:[?#]|$)'''
    _TESTS = [{
        'url': 'https://beta.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA',
        'info_dict': {
            'id': 'GY19NQ2QR',
            'title': 'Girl Friend BETA',
        },
        'playlist_mincount': 10,
    }, {
        'url': 'https://beta.crunchyroll.com/it/series/GY19NQ2QR',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id')
        api_domain, bucket, params = self._get_params(lang)

        series_response = self._download_json(
            f'{api_domain}/cms/v2{bucket}/series/{internal_id}', display_id,
            note='Retrieving series metadata', query=params)

        seasons_response = self._download_json(
            f'{api_domain}/cms/v2{bucket}/seasons?series_id={internal_id}', display_id,
            note='Retrieving season list', query=params)

        def entries():
            for season in seasons_response['items']:
                episodes_response = self._download_json(
                    f'{api_domain}/cms/v2{bucket}/episodes?season_id={season["id"]}', display_id,
                    note=f'Retrieving episode list for {season.get("slug_title")}', query=params)
                for episode in episodes_response['items']:
                    episode_id = episode['id']
                    episode_display_id = episode['slug_title']
                    yield {
                        '_type': 'url',
                        'url': f'https://beta.crunchyroll.com/{lang}watch/{episode_id}/{episode_display_id}',
                        'ie_key': CrunchyrollBetaIE.ie_key(),
                        'id': episode_id,
                        'title': '%s Episode %s – %s' % (episode.get('season_title'), episode.get('episode'), episode.get('title')),
                        'description': try_get(episode, lambda x: x['description'].replace(r'\r\n', '\n')),
                        'duration': float_or_none(episode.get('duration_ms'), 1000),
                        'series': episode.get('series_title'),
                        'series_id': episode.get('series_id'),
                        'season': episode.get('season_title'),
                        'season_id': episode.get('season_id'),
                        'season_number': episode.get('season_number'),
                        'episode': episode.get('title'),
                        'episode_number': episode.get('sequence_number')
                    }

        return self.playlist_result(entries(), internal_id, series_response.get('title'))
-												[crunchyroll:beta] Add cookies support (#2506)

* Extract directly from the beta API when cookies are passed. If login cookie is absent, the extraction is delegated to `CrunchyrollIE`. This causes different metadata to be extracted (including formats and video id) and therefore results in a different archive entry. For now, this issue is unavoidable since the browser also redirects to the old site when not logged in.

* Adds extractor-args `format` and `hardsub` to control the source and subtitles of the extracted formats

Closes #1911
Authored by: tejing1
											
										
										
											2022-01-29 01:33:51 +01:00
+								import base64
-												[crunchyroll] Handle error message
											
										
										
											2014-02-25 14:29:16 +01:00
+								import json
-												[compat] Remove more functions

Removing any more will require changes to a large number of extractors

											
										
										
											2022-06-24 10:10:17 +02:00
+								import re
 								import urllib.request
-												[cleanup] Mark some compat variables for removal (#2173)

Authored by fstirlitz, pukkandan

											
										
										
											2022-04-11 22:09:26 +02:00
+								import xml.etree.ElementTree
-												[compat] Remove more functions

Removing any more will require changes to a large number of extractors

											
										
										
											2022-06-24 10:10:17 +02:00
+								import zlib
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 03:08:17 +01:00
+								from hashlib import sha1
-												[compat] Remove more functions

Removing any more will require changes to a large number of extractors

											
										
										
											2022-06-24 10:10:17 +02:00
+								from math import floor, pow, sqrt
-												[crunchyroll] limit VRVIE inheritance to CrunchyrollIE

											
										
										
											2018-09-01 11:04:10 +02:00
+								from .common import InfoExtractor
-												[crunhyroll] Fix inheritance

https://github.com/yt-dlp/yt-dlp/pull/2955#issuecomment-1083060465

											
										
										
											2022-03-30 14:49:22 +02:00
+								from .vrv import VRVBaseIE
-												[compat] Remove more functions

Removing any more will require changes to a large number of extractors

											
										
										
											2022-06-24 10:10:17 +02:00
+								from ..aes import aes_cbc_decrypt
-												Fix imports and general cleanup

· Import from compat what comes from compat. Yes, some names are available in utils too, but that's an implementation detail.
· Use _match_id consistently whenever possible
· Fix some outdated tests
· Use consistent valid URL (always match the whole protocol, no ^ at start required)
· Use modern test definitions

											
										
										
											2014-12-13 12:24:42 +01:00
+								from ..compat import (
-												Switch codebase to use compat_b64decode

											
										
										
											2018-01-23 16:23:12 +01:00
+								    compat_b64decode,
-												Use a wrapper around xml.etree.ElementTree.fromstring in python 2.x (#7178)

Attributes aren't unicode objects, so they couldn't be directly used in info_dict fields (for example '--write-description' doesn't work with bytes).

											
										
										
											2015-10-25 20:04:55 +01:00
+								    compat_etree_fromstring,
-												[crunchyroll] Fix and improve extraction (closes #25096, closes #25060)

											
										
										
											2020-05-02 18:42:51 +02:00
+								    compat_str,
-												[compat] Add compat_urllib_parse_urlencode and eliminate encode_dict

encode_dict functionality has been improved and moved directly into compat_urllib_parse_urlencode
All occurrences of compat_urllib_parse.urlencode throughout the codebase have been replaced by compat_urllib_parse_urlencode

Closes #8974

											
										
										
											2016-03-25 20:46:57 +01:00
+								    compat_urllib_parse_urlencode,
-												[crunchyroll] Workaround fplive.net rtmp URLs (Closes #5881)

											
										
										
											2015-08-18 19:02:57 +02:00
+								    compat_urlparse,
-												Fix imports and general cleanup

· Import from compat what comes from compat. Yes, some names are available in utils too, but that's an implementation detail.
· Use _match_id consistently whenever possible
· Fix some outdated tests
· Use consistent valid URL (always match the whole protocol, no ^ at start required)
· Use modern test definitions

											
										
										
											2014-12-13 12:24:42 +01:00
+								)
 								from ..utils import (
 								    ExtractorError,
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 03:08:17 +01:00
+								    bytes_to_intlist,
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 09:16:28 +02:00
+								    extract_attributes,
 								    float_or_none,
-												[crunchyroll:beta] Add cookies support (#2506)

* Extract directly from the beta API when cookies are passed. If login cookie is absent, the extraction is delegated to `CrunchyrollIE`. This causes different metadata to be extracted (including formats and video id) and therefore results in a different archive entry. For now, this issue is unavoidable since the browser also redirects to the old site when not logged in.

* Adds extractor-args `format` and `hardsub` to control the source and subtitles of the extracted formats

Closes #1911
Authored by: tejing1
											
										
										
											2022-01-29 01:33:51 +01:00
+								    format_field,
-												[crunchyroll] Extract width and height (closes #6749)

											
										
										
											2015-09-03 13:15:02 +02:00
+								    int_or_none,
-												[compat] Remove more functions

Removing any more will require changes to a large number of extractors

											
										
										
											2022-06-24 10:10:17 +02:00
+								    intlist_to_bytes,
-												[crunchyroll:beta] Add cookies support (#2506)

* Extract directly from the beta API when cookies are passed. If login cookie is absent, the extraction is delegated to `CrunchyrollIE`. This causes different metadata to be extracted (including formats and video id) and therefore results in a different archive entry. For now, this issue is unavoidable since the browser also redirects to the old site when not logged in.

* Adds extractor-args `format` and `hardsub` to control the source and subtitles of the extracted formats

Closes #1911
Authored by: tejing1
											
										
										
											2022-01-29 01:33:51 +01:00
+								    join_nonempty,
-												[crunchyroll] Fix description extraction

											
										
										
											2015-11-07 15:02:39 +01:00
+								    lowercase_escape,
-												[crunchyroll] Fix and improve extraction (closes #25096, closes #25060)

											
										
										
											2020-05-02 18:42:51 +02:00
+								    merge_dicts,
-												[extractor/crunchyroll:beta] Extract timestamp and fix tests (#4535)

Closes #4533
Authored by: tejing1
											
										
										
											2022-08-02 23:18:40 +02:00
+								    parse_iso8601,
-												[crunchyroll] Add extractor-args `language` and `hardsub`
Closes #1516

											
										
										
											2021-11-04 19:04:37 +01:00
+								    qualities,
-												[crunchyroll] Workaround fplive.net rtmp URLs (Closes #5881)

											
										
										
											2015-08-18 19:02:57 +02:00
+								    remove_end,
-												Switch codebase to use sanitized_Request instead of
compat_urllib_request.Request

[downloader/dash] Use sanitized_Request

[downloader/http] Use sanitized_Request

[atresplayer] Use sanitized_Request

[bambuser] Use sanitized_Request

[bliptv] Use sanitized_Request

[brightcove] Use sanitized_Request

[cbs] Use sanitized_Request

[ceskatelevize] Use sanitized_Request

[collegerama] Use sanitized_Request

[extractor/common] Use sanitized_Request

[crunchyroll] Use sanitized_Request

[dailymotion] Use sanitized_Request

[dcn] Use sanitized_Request

[dramafever] Use sanitized_Request

[dumpert] Use sanitized_Request

[eitb] Use sanitized_Request

[escapist] Use sanitized_Request

[everyonesmixtape] Use sanitized_Request

[extremetube] Use sanitized_Request

[facebook] Use sanitized_Request

[fc2] Use sanitized_Request

[flickr] Use sanitized_Request

[4tube] Use sanitized_Request

[gdcvault] Use sanitized_Request

[extractor/generic] Use sanitized_Request

[hearthisat] Use sanitized_Request

[hotnewhiphop] Use sanitized_Request

[hypem] Use sanitized_Request

[iprima] Use sanitized_Request

[ivi] Use sanitized_Request

[keezmovies] Use sanitized_Request

[letv] Use sanitized_Request

[lynda] Use sanitized_Request

[metacafe] Use sanitized_Request

[minhateca] Use sanitized_Request

[miomio] Use sanitized_Request

[meovideo] Use sanitized_Request

[mofosex] Use sanitized_Request

[moniker] Use sanitized_Request

[mooshare] Use sanitized_Request

[movieclips] Use sanitized_Request

[mtv] Use sanitized_Request

[myvideo] Use sanitized_Request

[neteasemusic] Use sanitized_Request

[nfb] Use sanitized_Request

[niconico] Use sanitized_Request

[noco] Use sanitized_Request

[nosvideo] Use sanitized_Request

[novamov] Use sanitized_Request

[nowness] Use sanitized_Request

[nuvid] Use sanitized_Request

[played] Use sanitized_Request

[pluralsight] Use sanitized_Request

[pornhub] Use sanitized_Request

[pornotube] Use sanitized_Request

[primesharetv] Use sanitized_Request

[promptfile] Use sanitized_Request

[qqmusic] Use sanitized_Request

[rtve] Use sanitized_Request

[safari] Use sanitized_Request

[sandia] Use sanitized_Request

[shared] Use sanitized_Request

[sharesix] Use sanitized_Request

[sina] Use sanitized_Request

[smotri] Use sanitized_Request

[sohu] Use sanitized_Request

[spankwire] Use sanitized_Request

[sportdeutschland] Use sanitized_Request

[streamcloud] Use sanitized_Request

[streamcz] Use sanitized_Request

[tapely] Use sanitized_Request

[tube8] Use sanitized_Request

[tubitv] Use sanitized_Request

[twitch] Use sanitized_Request

[twitter] Use sanitized_Request

[udemy] Use sanitized_Request

[vbox7] Use sanitized_Request

[veoh] Use sanitized_Request

[vessel] Use sanitized_Request

[vevo] Use sanitized_Request

[viddler] Use sanitized_Request

[videomega] Use sanitized_Request

[viewvster] Use sanitized_Request

[viki] Use sanitized_Request

[vk] Use sanitized_Request

[vodlocker] Use sanitized_Request

[voicerepublic] Use sanitized_Request

[wistia] Use sanitized_Request

[xfileshare] Use sanitized_Request

[xtube] Use sanitized_Request

[xvideos] Use sanitized_Request

[yandexmusic] Use sanitized_Request

[youku] Use sanitized_Request

[youporn] Use sanitized_Request

[youtube] Use sanitized_Request

[patreon] Use sanitized_Request

[extractor/common] Remove unused import

[nfb] PEP 8

											
										
										
											2015-11-21 17:18:17 +01:00
+								    sanitized_Request,
-												[crunchyroll:beta] Add cookies support (#2506)

* Extract directly from the beta API when cookies are passed. If login cookie is absent, the extraction is delegated to `CrunchyrollIE`. This causes different metadata to be extracted (including formats and video id) and therefore results in a different archive entry. For now, this issue is unavoidable since the browser also redirects to the old site when not logged in.

* Adds extractor-args `format` and `hardsub` to control the source and subtitles of the extracted formats

Closes #1911
Authored by: tejing1
											
										
										
											2022-01-29 01:33:51 +01:00
+								    traverse_obj,
-												[crunchyroll] Fix thumbnail (#650)

Authored by: funniray
											
										
										
											2021-08-09 23:39:20 +02:00
+								    try_get,
-												[crunchyroll] Extract width and height (closes #6749)

											
										
										
											2015-09-03 13:15:02 +02:00
+								    xpath_text,
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 03:08:17 +01:00
+								)
-												[crunchyroll] Tidy and modernize
											
										
										
											2014-02-25 14:26:11 +01:00
-												[crunchyroll] limit VRVIE inheritance to CrunchyrollIE

											
										
										
											2018-09-01 11:04:10 +02:00
+								class CrunchyrollBaseIE(InfoExtractor):
-												[crunchyroll] Fix login (#2530)

Closes #1424
Authored by: tejing1
											
										
										
											2022-01-31 21:01:17 +01:00
+								    _LOGIN_URL = 'https://www.crunchyroll.com/welcome/login'
 								    _API_BASE = 'https://api.crunchyroll.com'
-												[crunchyroll] Bypass maturity wall (Closes #7202)

											
										
										
											2015-10-18 02:57:57 +02:00
+								    _NETRC_MACHINE = 'crunchyroll'
-												[crunchyroll] extract old rtmp formats

											
										
										
											2017-11-13 19:15:16 +01:00
+								    def _call_rpc_api(self, method, video_id, note=None, data=None):
 								        data = data or {}
 								        data['req'] = 'RpcApi' + method
 								        data = compat_urllib_parse_urlencode(data).encode('utf-8')
 								        return self._download_xml(
-												[crunchyroll] switch to HTTPS for RpcApi(closes #17749)

											
										
										
											2018-10-02 20:43:06 +02:00
+								            'https://www.crunchyroll.com/xml/',
-												[crunchyroll] extract old rtmp formats

											
										
										
											2017-11-13 19:15:16 +01:00
+								            video_id, note, fatal=False, data=data, headers={
 								                'Content-Type': 'application/x-www-form-urlencoded',
 								            })
-												[extractor] Add `_perform_login` function (#2943)

* Adds new functions `_initialize_pre_login` and `_perform_login` as part of the extractor API
* Adds `ie.supports_login` to the public API
											
										
										
											2022-03-18 21:53:33 +01:00
+								    def _perform_login(self, username, password):
-												[crunchyroll] Fix login (#2530)

Closes #1424
Authored by: tejing1
											
										
										
											2022-01-31 21:01:17 +01:00
+								        if self._get_cookies(self._LOGIN_URL).get('etp_rt'):
-												[crunchyroll] Fix authentication (Closes #10655)

											
										
										
											2016-09-15 16:53:35 +02:00
+								            return
-												[crunchyroll] Fix login (#2530)

Closes #1424
Authored by: tejing1
											
										
										
											2022-01-31 21:01:17 +01:00
+								        upsell_response = self._download_json(
 								            f'{self._API_BASE}/get_upsell_data.0.json', None, 'Getting session id',
 								            query={
 								                'sess_id': 1,
 								                'device_id': 'whatvalueshouldbeforweb',
 								                'device_type': 'com.crunchyroll.static',
 								                'access_token': 'giKq5eY27ny3cqz',
 								                'referer': self._LOGIN_URL
 								            })
 								        if upsell_response['code'] != 'ok':
 								            raise ExtractorError('Could not get session id')
 								        session_id = upsell_response['data']['session_id']
 								        login_response = self._download_json(
 								            f'{self._API_BASE}/login.1.json', None, 'Logging in',
 								            data=compat_urllib_parse_urlencode({
 								                'account': username,
 								                'password': password,
 								                'session_id': session_id
 								            }).encode('ascii'))
 								        if login_response['code'] != 'ok':
-												[crunchyroll] Better error reporting on login failure (#2938)

Authored by: tejing1
											
										
										
											2022-03-04 12:27:35 +01:00
+								            raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True)
-												[crunchyroll] Fix login (#2530)

Closes #1424
Authored by: tejing1
											
										
										
											2022-01-31 21:01:17 +01:00
+								        if not self._get_cookies(self._LOGIN_URL).get('etp_rt'):
 								            raise ExtractorError('Login succeeded but did not set etp_rt cookie')
-												[crunchyroll] Bypass maturity wall (Closes #7202)

											
										
										
											2015-10-18 02:57:57 +02:00
-												[crunchyroll:playlist] Implement beta API (#2955)

Closes #3121, #2930

Authored by: tejing1
											
										
										
											2022-04-05 12:51:12 +02:00
+								    # Beta-specific, but needed for redirects
 								    def _get_beta_embedded_json(self, webpage, display_id):
 								        initial_state = self._parse_json(self._search_regex(
 								            r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'initial state'), display_id)
 								        app_config = self._parse_json(self._search_regex(
 								            r'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage, 'app config'), display_id)
 								        return initial_state, app_config
 								    def _redirect_to_beta(self, webpage, iekey, video_id):
 								        if not self._get_cookies(self._LOGIN_URL).get('etp_rt'):
 								            raise ExtractorError('Received a beta page from non-beta url when not logged in.')
 								        initial_state, app_config = self._get_beta_embedded_json(webpage, video_id)
 								        url = app_config['baseSiteUrl'] + initial_state['router']['locations']['current']['pathname']
 								        self.to_screen(f'{video_id}: Redirected to beta site - {url}')
 								        return self.url_result(f'{url}', iekey, video_id)
-												[crunchyroll] Bypass maturity wall (Closes #7202)

											
										
										
											2015-10-18 02:57:57 +02:00
+								    @staticmethod
 								    def _add_skip_wall(url):
 								        parsed_url = compat_urlparse.urlparse(url)
 								        qs = compat_urlparse.parse_qs(parsed_url.query)
 								        # Always force skip_wall to bypass maturity wall, namely 18+ confirmation message:
 								        # > This content may be inappropriate for some people.
 								        # > Are you sure you want to continue?
 								        # since it's not disabled by default in crunchyroll account's settings.
-												Start moving to ytdl-org

											
										
										
											2019-03-09 13:14:41 +01:00
+								        # See https://github.com/ytdl-org/youtube-dl/issues/7202.
-												[crunchyroll] Bypass maturity wall (Closes #7202)

											
										
										
											2015-10-18 02:57:57 +02:00
+								        qs['skip_wall'] = ['1']
 								        return compat_urlparse.urlunparse(
-												[compat] Add compat_urllib_parse_urlencode and eliminate encode_dict

encode_dict functionality has been improved and moved directly into compat_urllib_parse_urlencode
All occurrences of compat_urllib_parse.urlencode throughout the codebase have been replaced by compat_urllib_parse_urlencode

Closes #8974

											
										
										
											2016-03-25 20:46:57 +01:00
+								            parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
-												[crunchyroll] Bypass maturity wall (Closes #7202)

											
										
										
											2015-10-18 02:57:57 +02:00
-												[crunchyroll] Force Accept-Language to any for all requests (Closes #6797)

											
										
										
											2015-09-08 10:11:20 +02:00
-												[crunhyroll] Fix inheritance

https://github.com/yt-dlp/yt-dlp/pull/2955#issuecomment-1083060465

											
										
										
											2022-03-30 14:49:22 +02:00
+								class CrunchyrollIE(CrunchyrollBaseIE, VRVBaseIE):
-												[crunchyroll] limit VRVIE inheritance to CrunchyrollIE

											
										
										
											2018-09-01 11:04:10 +02:00
+								    IE_NAME = 'crunchyroll'
-												[extractor/crunchyroll] Improve `_VALID_URL`s

Closes #4633

											
										
										
											2022-08-12 09:38:32 +02:00
+								    _VALID_URL = r'''(?x)
 								        https?://(?:(?P<prefix>www|m)\.)?(?P<url>
 								            crunchyroll\.(?:com|fr)/(?:
 								                media(?:-|/\?id=)|
 								                (?!series/|watch/)(?:[^/]+/){1,2}[^/?&#]*?
 								            )(?P<id>[0-9]+)
 								        )(?:[/?&#]|$)'''
-												[crunchyroll] Add .fr domain (#4537)

											
										
										
											2014-12-21 19:58:15 +01:00
+								    _TESTS = [{
-												[crunchyroll] Add support for mobile URLs and use unicode literals
											
										
										
											2014-01-29 23:23:44 +01:00
+								        'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
 								        'info_dict': {
-												[crunchyroll] Tidy and modernize
											
										
										
											2014-02-25 14:26:11 +01:00
+								            'id': '645513',
-												[crunchyroll] Fix descriptions with double quotes (closes #12124)

											
										
										
											2017-02-22 17:08:45 +01:00
+								            'ext': 'mp4',
-												[crunchyroll] Add support for mobile URLs and use unicode literals
											
										
										
											2014-01-29 23:23:44 +01:00
+								            'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
 								            'description': 'md5:2d17137920c64f2f49981a7797d275ef',
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 09:16:28 +02:00
+								            'thumbnail': r're:^https?://.*\.jpg$',
-												[crunchyroll] Add support for mobile URLs and use unicode literals
											
										
										
											2014-01-29 23:23:44 +01:00
+								            'uploader': 'Yomiuri Telecasting Corporation (YTV)',
 								            'upload_date': '20131013',
-												[crunchyroll] Correct parsing (Fixes #4014)

											
										
										
											2014-10-23 23:25:02 +02:00
+								            'url': 're:(?!.*&amp)',
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 03:08:17 +01:00
+								        },
-												[crunchyroll] Add support for mobile URLs and use unicode literals
											
										
										
											2014-01-29 23:23:44 +01:00
+								        'params': {
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 03:08:17 +01:00
+								            # rtmp
-												[crunchyroll] Add support for mobile URLs and use unicode literals
											
										
										
											2014-01-29 23:23:44 +01:00
+								            'skip_download': True,
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 03:08:17 +01:00
+								        },
-												[crunchyroll] Fix and improve extraction (closes #25096, closes #25060)

											
										
										
											2020-05-02 18:42:51 +02:00
+								        'skip': 'Video gone',
-												[crunchyroll] Fix extraction (Closes #5855, closes #5881)

											
										
										
											2015-07-05 02:29:36 +02:00
+								    }, {
 								        'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1',
 								        'info_dict': {
 								            'id': '589804',
 								            'ext': 'flv',
 								            'title': 'Culture Japan Episode 1 – Rebuilding Japan after the 3.11',
-												[crunchyroll] Fix description extraction

											
										
										
											2015-11-07 15:02:39 +01:00
+								            'description': 'md5:2fbc01f90b87e8e9137296f37b461c12',
-												Fix "invalid escape sequences" error on Python 3.6

											
										
										
											2017-01-02 13:08:07 +01:00
+								            'thumbnail': r're:^https?://.*\.jpg$',
-												[crunchyroll] Fix extraction (Closes #5855, closes #5881)

											
										
										
											2015-07-05 02:29:36 +02:00
+								            'uploader': 'Danny Choo Network',
 								            'upload_date': '20120213',
 								        },
 								        'params': {
 								            # rtmp
 								            'skip_download': True,
 								        },
-												[crunchyroll] Skip an invalid _TEST

											
										
										
											2016-10-15 08:36:07 +02:00
+								        'skip': 'Video gone',
-												[crunchyroll] Add support for HLS (Closes #10301)

											
										
										
											2016-08-11 19:56:16 +02:00
+								    }, {
 								        'url': 'http://www.crunchyroll.com/rezero-starting-life-in-another-world-/episode-5-the-morning-of-our-promise-is-still-distant-702409',
 								        'info_dict': {
 								            'id': '702409',
 								            'ext': 'mp4',
-												[crunchyroll] Fix and improve extraction (closes #25096, closes #25060)

											
										
										
											2020-05-02 18:42:51 +02:00
+								            'title': compat_str,
 								            'description': compat_str,
-												Fix "invalid escape sequences" error on Python 3.6

											
										
										
											2017-01-02 13:08:07 +01:00
+								            'thumbnail': r're:^https?://.*\.jpg$',
-												[crunchyroll] Fix and improve extraction (closes #25096, closes #25060)

											
										
										
											2020-05-02 18:42:51 +02:00
+								            'uploader': 'Re:Zero Partners',
 								            'timestamp': 1462098900,
 								            'upload_date': '20160501',
-												[crunchyroll] Add support for HLS (Closes #10301)

											
										
										
											2016-08-11 19:56:16 +02:00
+								        },
 								        'params': {
 								            # m3u8 download
 								            'skip_download': True,
 								        },
-												[crunchyroll] Improve series and season metadata extraction (closes #11832)

											
										
										
											2017-01-27 17:55:55 +01:00
+								    }, {
 								        'url': 'http://www.crunchyroll.com/konosuba-gods-blessing-on-this-wonderful-world/episode-1-give-me-deliverance-from-this-judicial-injustice-727589',
 								        'info_dict': {
 								            'id': '727589',
 								            'ext': 'mp4',
-												[crunchyroll] Fix and improve extraction (closes #25096, closes #25060)

											
										
										
											2020-05-02 18:42:51 +02:00
+								            'title': compat_str,
 								            'description': compat_str,
-												[crunchyroll] Improve series and season metadata extraction (closes #11832)

											
										
										
											2017-01-27 17:55:55 +01:00
+								            'thumbnail': r're:^https?://.*\.jpg$',
 								            'uploader': 'Kadokawa Pictures Inc.',
-												[crunchyroll] Fix and improve extraction (closes #25096, closes #25060)

											
										
										
											2020-05-02 18:42:51 +02:00
+								            'timestamp': 1484130900,
 								            'upload_date': '20170111',
 								            'series': compat_str,
-												[crunchyroll] Extract season name


											
										
										
											2017-03-12 05:18:10 +01:00
+								            'season': "KONOSUBA -God's blessing on this wonderful world! 2",
-												[crunchyroll] Improve series and season metadata extraction (closes #11832)

											
										
										
											2017-01-27 17:55:55 +01:00
+								            'season_number': 2,
-												[crunchyroll] Update test


											
										
										
											2017-05-01 19:56:51 +02:00
+								            'episode': 'Give Me Deliverance From This Judicial Injustice!',
-												[crunchyroll] Improve series and season metadata extraction (closes #11832)

											
										
										
											2017-01-27 17:55:55 +01:00
+								            'episode_number': 1,
 								        },
 								        'params': {
 								            # m3u8 download
 								            'skip_download': True,
 								        },
-												[crunchyroll] Add .fr domain (#4537)

											
										
										
											2014-12-21 19:58:15 +01:00
+								    }, {
 								        'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
 								        'only_matching': True,
-												[crunchyroll] Add maturity wall reference tests (#7202)

											
										
										
											2015-10-18 03:06:47 +02:00
+								    }, {
 								        # geo-restricted (US), 18+ maturity wall, non-premium available
 								        'url': 'http://www.crunchyroll.com/cosplay-complex-ova/episode-1-the-birth-of-the-cosplay-club-565617',
 								        'only_matching': True,
-												[crunchyroll] Fix descriptions with double quotes (closes #12124)

											
										
										
											2017-02-22 17:08:45 +01:00
+								    }, {
 								        # A description with double quotes
 								        'url': 'http://www.crunchyroll.com/11eyes/episode-1-piros-jszaka-red-night-535080',
 								        'info_dict': {
 								            'id': '535080',
 								            'ext': 'mp4',
-												[crunchyroll] Fix and improve extraction (closes #25096, closes #25060)

											
										
										
											2020-05-02 18:42:51 +02:00
+								            'title': compat_str,
 								            'description': compat_str,
-												[crunchyroll] Fix descriptions with double quotes (closes #12124)

											
										
										
											2017-02-22 17:08:45 +01:00
+								            'uploader': 'Marvelous AQL Inc.',
-												[crunchyroll] Fix and improve extraction (closes #25096, closes #25060)

											
										
										
											2020-05-02 18:42:51 +02:00
+								            'timestamp': 1255512600,
 								            'upload_date': '20091014',
-												[crunchyroll] Fix descriptions with double quotes (closes #12124)

											
										
										
											2017-02-22 17:08:45 +01:00
+								        },
 								        'params': {
 								            # Just test metadata extraction
 								            'skip_download': True,
 								        },
-												[crunchyroll] Extract uploader name that's not a link

Provide the Crunchyroll extractor with the ability to extract uploader
names that aren't links. Add a test for this new functionality.
This fixes #12267.
											
										
										
											2017-02-26 12:08:10 +01:00
+								    }, {
 								        # make sure we can extract an uploader name that's not a link
 								        'url': 'http://www.crunchyroll.com/hakuoki-reimeiroku/episode-1-dawn-of-the-divine-warriors-606899',
 								        'info_dict': {
 								            'id': '606899',
 								            'ext': 'mp4',
 								            'title': 'Hakuoki Reimeiroku Episode 1 – Dawn of the Divine Warriors',
 								            'description': 'Ryunosuke was left to die, but Serizawa-san asked him a simple question "Do you want to live?"',
 								            'uploader': 'Geneon Entertainment',
 								            'upload_date': '20120717',
 								        },
 								        'params': {
 								            # just test metadata extraction
 								            'skip_download': True,
 								        },
-												[crunchyroll] Fix and improve extraction (closes #25096, closes #25060)

											
										
										
											2020-05-02 18:42:51 +02:00
+								        'skip': 'Video gone',
-												[crunchyroll] Extract season name


											
										
										
											2017-03-12 05:18:10 +01:00
+								    }, {
 								        # A video with a vastly different season name compared to the series name
 								        'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532',
 								        'info_dict': {
 								            'id': '590532',
 								            'ext': 'mp4',
-												[crunchyroll] Fix and improve extraction (closes #25096, closes #25060)

											
										
										
											2020-05-02 18:42:51 +02:00
+								            'title': compat_str,
 								            'description': compat_str,
-												[crunchyroll] Extract season name


											
										
										
											2017-03-12 05:18:10 +01:00
+								            'uploader': 'TV TOKYO',
-												[crunchyroll] Fix and improve extraction (closes #25096, closes #25060)

											
										
										
											2020-05-02 18:42:51 +02:00
+								            'timestamp': 1330956000,
-												[crunchyroll] Extract season name


											
										
										
											2017-03-12 05:18:10 +01:00
+								            'upload_date': '20120305',
 								            'series': 'Nyarko-san: Another Crawling Chaos',
 								            'season': 'Haiyoru! Nyaruani (ONA)',
 								        },
 								        'params': {
 								            # Just test metadata extraction
 								            'skip_download': True,
 								        },
-												[crunchyroll:playlist] Restrict _VALID_URL (closes #17069)


											
										
										
											2018-07-29 01:56:52 +02:00
+								    }, {
 								        'url': 'http://www.crunchyroll.com/media-723735',
 								        'only_matching': True,
-												[crunchyroll] Extend _VALID_URL (closes #18955)

											
										
										
											2019-01-22 21:55:41 +01:00
+								    }, {
 								        'url': 'https://www.crunchyroll.com/en-gb/mob-psycho-100/episode-2-urban-legends-encountering-rumors-780921',
 								        'only_matching': True,
-												[crunchyroll] Add .fr domain (#4537)

											
										
										
											2014-12-21 19:58:15 +01:00
+								    }]
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 03:08:17 +01:00
 								    _FORMAT_IDS = {
-												[crunchyroll] Add support for mobile URLs and use unicode literals
											
										
										
											2014-01-29 23:23:44 +01:00
+								        '360': ('60', '106'),
 								        '480': ('61', '106'),
 								        '720': ('62', '106'),
 								        '1080': ('80', '108'),
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 03:08:17 +01:00
+								    }
-												[crunchyroll] Move Accept-Language workaround to video extractor since it causes playlists not to list any videos

											
										
										
											2019-06-21 19:15:52 +02:00
+								    def _download_webpage(self, url_or_request, *args, **kwargs):
-												[compat] Remove more functions

Removing any more will require changes to a large number of extractors

											
										
										
											2022-06-24 10:10:17 +02:00
+								        request = (url_or_request if isinstance(url_or_request, urllib.request.Request)
-												[crunchyroll] Move Accept-Language workaround to video extractor since it causes playlists not to list any videos

											
										
										
											2019-06-21 19:15:52 +02:00
+								                   else sanitized_Request(url_or_request))
 								        # Accept-Language must be set explicitly to accept any language to avoid issues
 								        # similar to https://github.com/ytdl-org/youtube-dl/issues/6797.
 								        # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
 								        # should be imposed or not (from what I can see it just takes the first language
 								        # ignoring the priority and requires it to correspond the IP). By the way this causes
 								        # Crunchyroll to not work in georestriction cases in some browsers that don't place
 								        # the locale lang first in header. However allowing any language seems to workaround the issue.
 								        request.add_header('Accept-Language', '*')
 								        return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 03:08:17 +01:00
+								    def _decrypt_subtitles(self, data, iv, id):
-												Switch codebase to use compat_b64decode

											
										
										
											2018-01-23 16:23:12 +01:00
+								        data = bytes_to_intlist(compat_b64decode(data))
 								        iv = bytes_to_intlist(compat_b64decode(iv))
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 03:08:17 +01:00
+								        id = int(id)
 								        def obfuscate_key_aux(count, modulo, start):
 								            output = list(start)
 								            for _ in range(count):
 								                output.append(output[-1] + output[-2])
 								            # cut off start values
 								            output = output[2:]
 								            output = list(map(lambda x: x % modulo + 33, output))
 								            return output
 								        def obfuscate_key(key):
 								            num1 = int(floor(pow(2, 25) * sqrt(6.9)))
 								            num2 = (num1 ^ key) << 5
 								            num3 = key ^ num1
 								            num4 = num3 ^ (num3 >> 3) ^ num2
 								            prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2)))
-												[crunchyroll] Add support for mobile URLs and use unicode literals
											
										
										
											2014-01-29 23:23:44 +01:00
+								            shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest())
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 03:08:17 +01:00
+								            # Extend 160 Bit hash to 256 Bit
 								            return shaHash + [0] * 12
-												[crunchyroll] Tidy and modernize
											
										
										
											2014-02-25 14:26:11 +01:00
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 03:08:17 +01:00
+								        key = obfuscate_key(id)
-												PEP8 applied

											
										
										
											2014-11-23 20:41:03 +01:00
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 03:08:17 +01:00
+								        decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
 								        return zlib.decompress(decrypted_data)
-												[crunchycroll] Fix building of ass subtitles (reported in #4019)

Parse the xml document instead of using regexes, otherwise unicode characters are left unescaped.

											
										
										
											2014-10-29 21:19:20 +01:00
+								    def _convert_subtitles_to_srt(self, sub_root):
-												[crunchyroll] Add support for mobile URLs and use unicode literals
											
										
										
											2014-01-29 23:23:44 +01:00
+								        output = ''
-												[crunchycroll] Fix building of ass subtitles (reported in #4019)

Parse the xml document instead of using regexes, otherwise unicode characters are left unescaped.

											
										
										
											2014-10-29 21:19:20 +01:00
 								        for i, event in enumerate(sub_root.findall('./events/event'), 1):
 								            start = event.attrib['start'].replace('.', ',')
 								            end = event.attrib['end'].replace('.', ',')
 								            text = event.attrib['text'].replace('\\N', '\n')
-												[crunchyroll] Add support for mobile URLs and use unicode literals
											
										
										
											2014-01-29 23:23:44 +01:00
+								            output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 03:08:17 +01:00
+								        return output
-												[crunchycroll] Fix building of ass subtitles (reported in #4019)

Parse the xml document instead of using regexes, otherwise unicode characters are left unescaped.

											
										
										
											2014-10-29 21:19:20 +01:00
+								    def _convert_subtitles_to_ass(self, sub_root):
-												Add SSA (.ass) subtitle output for Crunchyroll
											
										
										
											2014-08-30 13:48:56 +02:00
+								        output = ''
 								        def ass_bool(strvalue):
 								            assvalue = '0'
 								            if strvalue == '1':
 								                assvalue = '-1'
 								            return assvalue
 								        output = '[Script Info]\n'
-												[refactor] Single quotes consistency

											
										
										
											2016-02-14 10:37:17 +01:00
+								        output += 'Title: %s\n' % sub_root.attrib['title']
-												Add SSA (.ass) subtitle output for Crunchyroll
											
										
										
											2014-08-30 13:48:56 +02:00
+								        output += 'ScriptType: v4.00+\n'
-												[refactor] Single quotes consistency

											
										
										
											2016-02-14 10:37:17 +01:00
+								        output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style']
 								        output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x']
 								        output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y']
-												[crunchyroll] Remove ScaledBorderAndShadow settings

See https://github.com/rg3/youtube-dl/pull/9028, especially @lachs0r's
comments for the reason behind this change

											
										
										
											2017-02-01 09:39:32 +01:00
+								        output += """
-												Add SSA (.ass) subtitle output for Crunchyroll
											
										
										
											2014-08-30 13:48:56 +02:00
+								[V4+ Styles]
 								Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
 								"""
 								        for style in sub_root.findall('./styles/style'):
-												[refactor] Single quotes consistency

											
										
										
											2016-02-14 10:37:17 +01:00
+								            output += 'Style: ' + style.attrib['name']
 								            output += ',' + style.attrib['font_name']
 								            output += ',' + style.attrib['font_size']
 								            output += ',' + style.attrib['primary_colour']
 								            output += ',' + style.attrib['secondary_colour']
 								            output += ',' + style.attrib['outline_colour']
 								            output += ',' + style.attrib['back_colour']
 								            output += ',' + ass_bool(style.attrib['bold'])
 								            output += ',' + ass_bool(style.attrib['italic'])
 								            output += ',' + ass_bool(style.attrib['underline'])
 								            output += ',' + ass_bool(style.attrib['strikeout'])
 								            output += ',' + style.attrib['scale_x']
 								            output += ',' + style.attrib['scale_y']
 								            output += ',' + style.attrib['spacing']
 								            output += ',' + style.attrib['angle']
 								            output += ',' + style.attrib['border_style']
 								            output += ',' + style.attrib['outline']
 								            output += ',' + style.attrib['shadow']
 								            output += ',' + style.attrib['alignment']
 								            output += ',' + style.attrib['margin_l']
 								            output += ',' + style.attrib['margin_r']
 								            output += ',' + style.attrib['margin_v']
 								            output += ',' + style.attrib['encoding']
-												Add SSA (.ass) subtitle output for Crunchyroll
											
										
										
											2014-08-30 13:48:56 +02:00
+								            output += '\n'
 								        output += """
 								[Events]
 								Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 								"""
 								        for event in sub_root.findall('./events/event'):
 								            output += 'Dialogue: 0'
-												[refactor] Single quotes consistency

											
										
										
											2016-02-14 10:37:17 +01:00
+								            output += ',' + event.attrib['start']
 								            output += ',' + event.attrib['end']
 								            output += ',' + event.attrib['style']
 								            output += ',' + event.attrib['name']
 								            output += ',' + event.attrib['margin_l']
 								            output += ',' + event.attrib['margin_r']
 								            output += ',' + event.attrib['margin_v']
 								            output += ',' + event.attrib['effect']
 								            output += ',' + event.attrib['text']
-												Add SSA (.ass) subtitle output for Crunchyroll
											
										
										
											2014-08-30 13:48:56 +02:00
+								            output += '\n'
 								        return output
-												[crunchyroll] Extract subtitles extraction routine

											
										
										
											2015-05-30 10:12:58 +02:00
+								    def _extract_subtitles(self, subtitle):
-												Use a wrapper around xml.etree.ElementTree.fromstring in python 2.x (#7178)

Attributes aren't unicode objects, so they couldn't be directly used in info_dict fields (for example '--write-description' doesn't work with bytes).

											
										
										
											2015-10-25 20:04:55 +01:00
+								        sub_root = compat_etree_fromstring(subtitle)
-												[crunchyroll] Extract subtitles extraction routine

											
										
										
											2015-05-30 10:12:58 +02:00
+								        return [{
 								            'ext': 'srt',
 								            'data': self._convert_subtitles_to_srt(sub_root),
 								        }, {
 								            'ext': 'ass',
 								            'data': self._convert_subtitles_to_ass(sub_root),
 								        }]
-												[crunchyroll] Convert to new subtitles system

											
										
										
											2015-02-15 18:21:42 +01:00
+								    def _get_subtitles(self, video_id, webpage):
 								        subtitles = {}
-												[crunchyroll] Improve subtitle regex (Closes #7262)

											
										
										
											2015-10-22 16:34:11 +02:00
+								        for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage):
-												[crunchyroll] extract old rtmp formats

											
										
										
											2017-11-13 19:15:16 +01:00
+								            sub_doc = self._call_rpc_api(
 								                'Subtitle_GetXml', video_id,
 								                'Downloading subtitles for ' + sub_name, data={
 								                    'subtitle_script_id': sub_id,
 								                })
-												[cleanup] Mark some compat variables for removal (#2173)

Authored by fstirlitz, pukkandan

											
										
										
											2022-04-11 22:09:26 +02:00
+								            if not isinstance(sub_doc, xml.etree.ElementTree.Element):
-												[crunchyroll] Convert to new subtitles system

											
										
										
											2015-02-15 18:21:42 +01:00
+								                continue
-												[crunchyroll] extract old rtmp formats

											
										
										
											2017-11-13 19:15:16 +01:00
+								            sid = sub_doc.get('id')
 								            iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
 								            data = xpath_text(sub_doc, 'data', 'subtitle data')
 								            if not sid or not iv or not data:
 								                continue
 								            subtitle = self._decrypt_subtitles(data, iv, sid).decode('utf-8')
-												[crunchyroll] Convert to new subtitles system

											
										
										
											2015-02-15 18:21:42 +01:00
+								            lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
 								            if not lang_code:
 								                continue
-												[crunchyroll] Extract subtitles extraction routine

											
										
										
											2015-05-30 10:12:58 +02:00
+								            subtitles[lang_code] = self._extract_subtitles(subtitle)
-												[crunchyroll] Convert to new subtitles system

											
										
										
											2015-02-15 18:21:42 +01:00
+								        return subtitles
-												PEP8 applied

											
										
										
											2014-11-23 20:41:03 +01:00
+								    def _real_extract(self, url):
-												[extractor] Common function `_match_valid_url`

											
										
										
											2021-08-19 03:41:24 +02:00
+								        mobj = self._match_valid_url(url)
-												[extractor] Fix pre-checking archive for some extractors
The `id` regex group must be present for `_match_id` and pre-checking archive to work correctly

											
										
										
											2021-06-06 11:35:07 +02:00
+								        video_id = mobj.group('id')
-												[crunchyroll] Add support for mobile URLs and use unicode literals
											
										
										
											2014-01-29 23:23:44 +01:00
 								        if mobj.group('prefix') == 'm':
 								            mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage')
 								            webpage_url = self._search_regex(r'<link rel="canonical" href="([^"]+)" />', mobile_webpage, 'webpage_url')
 								        else:
 								            webpage_url = 'http://www.' + mobj.group('url')
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 03:08:17 +01:00
-												[crunchyroll] pass geo verifcation proxy

											
										
										
											2017-04-01 10:33:23 +02:00
+								        webpage = self._download_webpage(
 								            self._add_skip_wall(webpage_url), video_id,
 								            headers=self.geo_verification_headers())
-												[crunchyroll:playlist] Implement beta API (#2955)

Closes #3121, #2930

Authored by: tejing1
											
										
										
											2022-04-05 12:51:12 +02:00
+								        if re.search(r'<div id="preload-data">', webpage):
 								            return self._redirect_to_beta(webpage, CrunchyrollBetaIE.ie_key(), video_id)
-												[crunchyroll] Detect required login (#6677)

											
										
										
											2015-08-26 16:47:57 +02:00
+								        note_m = self._html_search_regex(
 								            r'<div class="showmedia-trailer-notice">(.+?)</div>',
 								            webpage, 'trailer-notice', default='')
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 03:08:17 +01:00
+								        if note_m:
-												Fix some linter and typos

											
										
										
											2021-04-16 02:00:46 +02:00
+								            raise ExtractorError(note_m, expected=True)
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 03:08:17 +01:00
-												[crunchyroll] Handle error message
											
										
										
											2014-02-25 14:29:16 +01:00
+								        mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P<msg>{.+?})\]\)', webpage)
 								        if mobj:
 								            msg = json.loads(mobj.group('msg'))
 								            if msg.get('type') == 'error':
 								                raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True)
-												[crunchyroll] Detect required login (#6677)

											
										
										
											2015-08-26 16:47:57 +02:00
 								        if 'To view this, please log in to verify you are 18 or older.' in webpage:
-												[crunchyroll] Fix typo

											
										
										
											2015-08-26 17:27:57 +02:00
+								            self.raise_login_required()
-												[crunchyroll] Handle error message
											
										
										
											2014-02-25 14:29:16 +01:00
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 09:16:28 +02:00
+								        media = self._parse_json(self._search_regex(
 								            r'vilos\.config\.media\s*=\s*({.+?});',
 								            webpage, 'vilos media', default='{}'), video_id)
 								        media_metadata = media.get('metadata') or {}
-												[crunchyroll] Prefer hardsubless formats and formats in locale language

											
										
										
											2018-09-16 18:54:25 +02:00
+								        language = self._search_regex(
 								            r'(?:vilos\.config\.player\.language|LOCALE)\s*=\s*(["\'])(?P<lang>(?:(?!\1).)+)\1',
 								            webpage, 'language', default=None, group='lang')
-												[crunchyroll] Fix title extraction (Closes #7396)

											
										
										
											2015-11-07 14:25:59 +01:00
+								        video_title = self._html_search_regex(
-												[crunchyroll] Fix and improve extraction (closes #25096, closes #25060)

											
										
										
											2020-05-02 18:42:51 +02:00
+								            (r'(?s)<h1[^>]*>((?:(?!<h1).)*?<(?:span[^>]+itemprop=["\']title["\']|meta[^>]+itemprop=["\']position["\'])[^>]*>(?:(?!<h1).)+?)</h1>',
 								             r'<title>(.+?),\s+-\s+.+? Crunchyroll'),
 								            webpage, 'video_title', default=None)
 								        if not video_title:
 								            video_title = re.sub(r'^Watch\s+', '', self._og_search_description(webpage))
-												[crunchyroll] Add support for mobile URLs and use unicode literals
											
										
										
											2014-01-29 23:23:44 +01:00
+								        video_title = re.sub(r' {2,}', ' ', video_title)
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 09:16:28 +02:00
+								        video_description = (self._parse_json(self._html_search_regex(
-												[crunchyroll] Fix descriptions with double quotes (closes #12124)

											
										
										
											2017-02-22 17:08:45 +01:00
+								            r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 09:16:28 +02:00
+								            webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
-												[crunchyroll] Fix thumbnail (#650)

Authored by: funniray
											
										
										
											2021-08-09 23:39:20 +02:00
 								        thumbnails = []
 								        thumbnail_url = (self._parse_json(self._html_search_regex(
 								            r'<script type="application\/ld\+json">\n\s*(.+?)<\/script>',
 								            webpage, 'thumbnail_url', default='{}'), video_id)).get('image')
 								        if thumbnail_url:
 								            thumbnails.append({
 								                'url': thumbnail_url,
 								                'width': 1920,
 								                'height': 1080
 								            })
-												[crunchyroll] Fix description extraction

											
										
										
											2015-11-07 15:02:39 +01:00
+								        if video_description:
 								            video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
-												[crunchyroll] Fix uploader and upload date extraction

											
										
										
											2015-09-08 10:37:53 +02:00
+								        video_uploader = self._html_search_regex(
-												[crunchyroll] Extract uploader name that's not a link

Provide the Crunchyroll extractor with the ability to extract uploader
names that aren't links. Add a test for this new functionality.
This fixes #12267.
											
										
										
											2017-02-26 12:08:10 +01:00
+								            # try looking for both an uploader that's a link and one that's not
 								            [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
-												[crunchyroll] Fix and improve extraction (closes #25096, closes #25060)

											
										
										
											2020-05-02 18:42:51 +02:00
+								            webpage, 'video_uploader', default=False)
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 03:08:17 +01:00
-												[crunchyroll] Add extractor-args `language` and `hardsub`
Closes #1516

											
										
										
											2021-11-04 19:04:37 +01:00
+								        requested_languages = self._configuration_arg('language')
 								        requested_hardsubs = [('' if val == 'none' else val) for val in self._configuration_arg('hardsub')]
 								        language_preference = qualities((requested_languages or [language or ''])[::-1])
 								        hardsub_preference = qualities((requested_hardsubs or ['', language or ''])[::-1])
-												[crunchyroll] reduce requests for formats extraction

											
										
										
											2016-04-29 12:46:42 +02:00
+								        formats = []
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 09:16:28 +02:00
+								        for stream in media.get('streams', []):
-												[crunchyroll] Add extractor-args `language` and `hardsub`
Closes #1516

											
										
										
											2021-11-04 19:04:37 +01:00
+								            audio_lang = stream.get('audio_lang') or ''
 								            hardsub_lang = stream.get('hardsub_lang') or ''
 								            if (requested_languages and audio_lang.lower() not in requested_languages
 								                    or requested_hardsubs and hardsub_lang.lower() not in requested_hardsubs):
 								                continue
-												[crunchyroll] Prefer hardsubless formats and formats in locale language

											
										
										
											2018-09-16 18:54:25 +02:00
+								            vrv_formats = self._extract_vrv_formats(
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 09:16:28 +02:00
+								                stream.get('url'), video_id, stream.get('format'),
-												[crunchyroll] Prefer hardsubless formats and formats in locale language

											
										
										
											2018-09-16 18:54:25 +02:00
+								                audio_lang, hardsub_lang)
 								            for f in vrv_formats:
-												[crunchyroll] Add extractor-args `language` and `hardsub`
Closes #1516

											
										
										
											2021-11-04 19:04:37 +01:00
+								                f['language_preference'] = language_preference(audio_lang)
 								                f['quality'] = hardsub_preference(hardsub_lang)
-												[crunchyroll] Prefer hardsubless formats and formats in locale language

											
										
										
											2018-09-16 18:54:25 +02:00
+								            formats.extend(vrv_formats)
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 09:16:28 +02:00
+								        if not formats:
 								            available_fmts = []
 								            for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
 								                attrs = extract_attributes(a)
 								                href = attrs.get('href')
 								                if href and '/freetrial' in href:
 								                    continue
 								                available_fmts.append(fmt)
 								            if not available_fmts:
 								                for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
 								                    available_fmts = re.findall(p, webpage)
 								                    if available_fmts:
 								                        break
 								            if not available_fmts:
 								                available_fmts = self._FORMAT_IDS.keys()
 								            video_encode_ids = []
 								            for fmt in available_fmts:
 								                stream_quality, stream_format = self._FORMAT_IDS[fmt]
 								                video_format = fmt + 'p'
 								                stream_infos = []
 								                streamdata = self._call_rpc_api(
 								                    'VideoPlayer_GetStandardConfig', video_id,
 								                    'Downloading media info for %s' % video_format, data={
 								                        'media_id': video_id,
 								                        'video_format': stream_format,
 								                        'video_quality': stream_quality,
 								                        'current_page': url,
 								                    })
-												[cleanup] Mark some compat variables for removal (#2173)

Authored by fstirlitz, pukkandan

											
										
										
											2022-04-11 22:09:26 +02:00
+								                if isinstance(streamdata, xml.etree.ElementTree.Element):
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 09:16:28 +02:00
+								                    stream_info = streamdata.find('./{default}preload/stream_info')
 								                    if stream_info is not None:
 								                        stream_infos.append(stream_info)
 								                stream_info = self._call_rpc_api(
 								                    'VideoEncode_GetStreamInfo', video_id,
 								                    'Downloading stream info for %s' % video_format, data={
 								                        'media_id': video_id,
 								                        'video_format': stream_format,
 								                        'video_encode_quality': stream_quality,
 								                    })
-												[cleanup] Mark some compat variables for removal (#2173)

Authored by fstirlitz, pukkandan

											
										
										
											2022-04-11 22:09:26 +02:00
+								                if isinstance(stream_info, xml.etree.ElementTree.Element):
-												[crunchyroll] extract old rtmp formats

											
										
										
											2017-11-13 19:15:16 +01:00
+								                    stream_infos.append(stream_info)
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 09:16:28 +02:00
+								                for stream_info in stream_infos:
 								                    video_encode_id = xpath_text(stream_info, './video_encode_id')
 								                    if video_encode_id in video_encode_ids:
 								                        continue
 								                    video_encode_ids.append(video_encode_id)
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 03:08:17 +01:00
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 09:16:28 +02:00
+								                    video_file = xpath_text(stream_info, './file')
 								                    if not video_file:
 								                        continue
 								                    if video_file.startswith('http'):
 								                        formats.extend(self._extract_m3u8_formats(
 								                            video_file, video_id, 'mp4', entry_protocol='m3u8_native',
 								                            m3u8_id='hls', fatal=False))
-												[crunchyroll] extract old rtmp formats

											
										
										
											2017-11-13 19:15:16 +01:00
+								                        continue
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 09:16:28 +02:00
+								                    video_url = xpath_text(stream_info, './host')
 								                    if not video_url:
 								                        continue
 								                    metadata = stream_info.find('./metadata')
 								                    format_info = {
 								                        'format': video_format,
 								                        'height': int_or_none(xpath_text(metadata, './height')),
 								                        'width': int_or_none(xpath_text(metadata, './width')),
 								                    }
 								                    if '.fplive.net/' in video_url:
 								                        video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
 								                        parsed_video_url = compat_urlparse.urlparse(video_url)
 								                        direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
 								                            netloc='v.lvlt.crcdn.net',
 								                            path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
 								                        if self._is_valid_url(direct_video_url, video_id, video_format):
 								                            format_info.update({
 								                                'format_id': 'http-' + video_format,
 								                                'url': direct_video_url,
 								                            })
 								                            formats.append(format_info)
 								                            continue
 								                    format_info.update({
 								                        'format_id': 'rtmp-' + video_format,
 								                        'url': video_url,
 								                        'play_path': video_file,
 								                        'ext': 'flv',
 								                    })
 								                    formats.append(format_info)
-												[formatsort] Remove unnecessary `field_preference` from extractors

These were written with the old format sorting in mind and is no longer needed

											
										
										
											2021-02-18 19:22:28 +01:00
+								        self._sort_formats(formats)
-												[crunchyroll] extract old rtmp formats

											
										
										
											2017-11-13 19:15:16 +01:00
 								        metadata = self._call_rpc_api(
 								            'VideoPlayer_GetMediaMetadata', video_id,
 								            note='Downloading media info', data={
-												[crunchyroll] improve extraction

- extract more metadata(series, episode, episode_number)
- reduce duplicate requests for extracting formats
- remove duplicate formats

											
										
										
											2016-04-28 19:42:20 +02:00
+								                'media_id': video_id,
 								            })
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 09:16:28 +02:00
+								        subtitles = {}
 								        for subtitle in media.get('subtitles', []):
 								            subtitle_url = subtitle.get('url')
 								            if not subtitle_url:
 								                continue
 								            subtitles.setdefault(subtitle.get('language', 'enUS'), []).append({
 								                'url': subtitle_url,
 								                'ext': subtitle.get('format', 'ass'),
 								            })
 								        if not subtitles:
 								            subtitles = self.extract_subtitles(video_id, webpage)
-												[crunchyroll] Allow to list subtitles (fixes #3805)

											
										
										
											2014-09-25 16:57:38 +02:00
-												[crunchyroll] Improve series and season metadata extraction (closes #11832)

											
										
										
											2017-01-27 17:55:55 +01:00
+								        # webpage provide more accurate data than series_title from XML
 								        series = self._html_search_regex(
-												[crunchyroll] Relax series and season regex (closes #13659)

											
										
										
											2017-07-16 07:40:45 +02:00
+								            r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
-												[crunchyroll] Extract season name


											
										
										
											2017-03-12 05:18:10 +01:00
+								            webpage, 'series', fatal=False)
-												[crunchyroll] Improve series and season metadata extraction (closes #11832)

											
										
										
											2017-01-27 17:55:55 +01:00
-												[crunchyroll] Fix thumbnail (#650)

Authored by: funniray
											
										
										
											2021-08-09 23:39:20 +02:00
+								        season = episode = episode_number = duration = None
-												[crunchyroll] Improve extraction failsafeness (closes #17991)

											
										
										
											2018-10-28 16:12:54 +01:00
-												[cleanup] Mark some compat variables for removal (#2173)

Authored by fstirlitz, pukkandan

											
										
										
											2022-04-11 22:09:26 +02:00
+								        if isinstance(metadata, xml.etree.ElementTree.Element):
-												[crunchyroll] Improve extraction failsafeness (closes #17991)

											
										
										
											2018-10-28 16:12:54 +01:00
+								            season = xpath_text(metadata, 'series_title')
 								            episode = xpath_text(metadata, 'episode_title')
 								            episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
 								            duration = float_or_none(media_metadata.get('duration'), 1000)
 								        if not episode:
 								            episode = media_metadata.get('title')
 								        if not episode_number:
 								            episode_number = int_or_none(media_metadata.get('episode_number'))
-												[crunchyroll] Fix thumbnail (#650)

Authored by: funniray
											
										
										
											2021-08-09 23:39:20 +02:00
+								        thumbnail_url = try_get(media, lambda x: x['thumbnail']['url'])
 								        if thumbnail_url:
 								            thumbnails.append({
 								                'url': thumbnail_url,
 								                'width': 640,
 								                'height': 360
 								            })
-												[crunchyroll] Improve series and season metadata extraction (closes #11832)

											
										
										
											2017-01-27 17:55:55 +01:00
 								        season_number = int_or_none(self._search_regex(
-												[crunchyroll] Relax series and season regex (closes #13659)

											
										
										
											2017-07-16 07:40:45 +02:00
+								            r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
-												[crunchyroll] Improve series and season metadata extraction (closes #11832)

											
										
										
											2017-01-27 17:55:55 +01:00
+								            webpage, 'season number', default=None))
-												[crunchyroll] Fix and improve extraction (closes #25096, closes #25060)

											
										
										
											2020-05-02 18:42:51 +02:00
+								        info = self._search_json_ld(webpage, video_id, default={})
 								        return merge_dicts({
-												PEP8: more applied

											
										
										
											2014-11-23 21:20:46 +01:00
+								            'id': video_id,
 								            'title': video_title,
-												[crunchyroll] Add support for mobile URLs and use unicode literals
											
										
										
											2014-01-29 23:23:44 +01:00
+								            'description': video_description,
-												[crunchyroll] Improve extraction failsafeness (closes #17991)

											
										
										
											2018-10-28 16:12:54 +01:00
+								            'duration': duration,
-												[crunchyroll] Fix thumbnail (#650)

Authored by: funniray
											
										
										
											2021-08-09 23:39:20 +02:00
+								            'thumbnails': thumbnails,
-												PEP8: more applied

											
										
										
											2014-11-23 21:20:46 +01:00
+								            'uploader': video_uploader,
-												[crunchyroll] Improve series and season metadata extraction (closes #11832)

											
										
										
											2017-01-27 17:55:55 +01:00
+								            'series': series,
-												[crunchyroll] Extract season name


											
										
										
											2017-03-12 05:18:10 +01:00
+								            'season': season,
-												[crunchyroll] Improve series and season metadata extraction (closes #11832)

											
										
										
											2017-01-27 17:55:55 +01:00
+								            'season_number': season_number,
 								            'episode': episode,
 								            'episode_number': episode_number,
-												PEP8: more applied

											
										
										
											2014-11-23 21:20:46 +01:00
+								            'subtitles': subtitles,
 								            'formats': formats,
-												[crunchyroll] Fix and improve extraction (closes #25096, closes #25060)

											
										
										
											2020-05-02 18:42:51 +02:00
+								        }, info)
-												Added extractor for crunchyroll 'playlists' i.e. series. so that one can, e.g. download all episodes of a series

											
										
										
											2014-10-20 07:47:05 +02:00
-												[crunchyroll] Force Accept-Language to any for all requests (Closes #6797)

											
										
										
											2015-09-08 10:11:20 +02:00
+								class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
-												[refactor] Single quotes consistency

											
										
										
											2016-02-14 10:37:17 +01:00
+								    IE_NAME = 'crunchyroll:playlist'
-												[crunchyroll] Fix language code in _VALID_URLs

Closes #4451

											
										
										
											2022-07-27 12:41:15 +02:00
+								    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:\w{2}(?:-\w{2})?/)?(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'
-												Added extractor for crunchyroll 'playlists' i.e. series. so that one can, e.g. download all episodes of a series

											
										
										
											2014-10-20 07:47:05 +02:00
 								    _TESTS = [{
-												[crunchyroll:playlist] Force http
Closes #495

											
										
										
											2021-07-16 23:19:49 +02:00
+								        'url': 'https://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
-												[crunchyroll:playlist] Simplify (#3988)

											
										
										
											2014-10-26 17:28:09 +01:00
+								        'info_dict': {
 								            'id': 'a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
 								            'title': 'A Bridge to the Starry Skies - Hoshizora e Kakaru Hashi'
-												Added extractor for crunchyroll 'playlists' i.e. series. so that one can, e.g. download all episodes of a series

											
										
										
											2014-10-20 07:47:05 +02:00
+								        },
-												[crunchyroll:playlist] Simplify (#3988)

											
										
										
											2014-10-26 17:28:09 +01:00
+								        'playlist_count': 13,
-												[crunchyroll] Add maturity wall reference tests (#7202)

											
										
										
											2015-10-18 03:06:47 +02:00
+								    }, {
 								        # geo-restricted (US), 18+ maturity wall, non-premium available
 								        'url': 'http://www.crunchyroll.com/cosplay-complex-ova',
 								        'info_dict': {
 								            'id': 'cosplay-complex-ova',
 								            'title': 'Cosplay Complex OVA'
 								        },
 								        'playlist_count': 3,
 								        'skip': 'Georestricted',
 								    }, {
 								        # geo-restricted (US), 18+ maturity wall, non-premium will be available since 2015.11.14
 								        'url': 'http://www.crunchyroll.com/ladies-versus-butlers?skip_wall=1',
 								        'only_matching': True,
-												[crunchyroll] Add support for `beta.crunchyroll` URLs
and fix series URLs with language code

											
										
										
											2021-10-17 13:46:05 +02:00
+								    }, {
 								        'url': 'http://www.crunchyroll.com/fr/ladies-versus-butlers',
 								        'only_matching': True,
-												Added extractor for crunchyroll 'playlists' i.e. series. so that one can, e.g. download all episodes of a series

											
										
										
											2014-10-20 07:47:05 +02:00
+								    }]
 								    def _real_extract(self, url):
-												[crunchyroll:playlist] Simplify (#3988)

											
										
										
											2014-10-26 17:28:09 +01:00
+								        show_id = self._match_id(url)
-												[crunchyroll] pass geo verifcation proxy

											
										
										
											2017-04-01 10:33:23 +02:00
+								        webpage = self._download_webpage(
-												[crunchyroll:playlist] Force http
Closes #495

											
										
										
											2021-07-16 23:19:49 +02:00
+								            # https:// gives a 403, but http:// does not
 								            self._add_skip_wall(url).replace('https://', 'http://'), show_id,
-												[crunchyroll] pass geo verifcation proxy

											
										
										
											2017-04-01 10:33:23 +02:00
+								            headers=self.geo_verification_headers())
-												[crunchyroll:playlist] Implement beta API (#2955)

Closes #3121, #2930

Authored by: tejing1
											
										
										
											2022-04-05 12:51:12 +02:00
+								        if re.search(r'<div id="preload-data">', webpage):
 								            return self._redirect_to_beta(webpage, CrunchyrollBetaShowIE.ie_key(), show_id)
-												[crunchyroll:playlist] Fix and relax title extraction (closes #21291, closes #21443)

											
										
										
											2019-06-21 19:07:26 +02:00
+								        title = self._html_search_meta('name', webpage, default=None)
-												[crunchyroll] Add season to flat-playlist
Closes #1319

											
										
										
											2021-10-17 14:04:03 +02:00
+								        episode_re = r'<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"'
 								        season_re = r'<a [^>]+season-dropdown[^>]+>([^<]+)'
 								        paths = re.findall(f'(?s){episode_re}|{season_re}', webpage)
 								        entries, current_season = [], None
 								        for ep_id, ep, season in paths:
 								            if season:
 								                current_season = season
 								                continue
 								            entries.append(self.url_result(
 								                f'http://www.crunchyroll.com{ep}', CrunchyrollIE.ie_key(), ep_id, season=current_season))
-												[crunchyroll:playlist] Simplify (#3988)

											
										
										
											2014-10-26 17:28:09 +01:00
-												Added extractor for crunchyroll 'playlists' i.e. series. so that one can, e.g. download all episodes of a series

											
										
										
											2014-10-20 07:47:05 +02:00
+								        return {
-												[crunchyroll:playlist] Simplify (#3988)

											
										
										
											2014-10-26 17:28:09 +01:00
+								            '_type': 'playlist',
 								            'id': show_id,
 								            'title': title,
-												[crunchyroll] Add season to flat-playlist
Closes #1319

											
										
										
											2021-10-17 14:04:03 +02:00
+								            'entries': reversed(entries),
-												[crunchyroll:playlist] Simplify (#3988)

											
										
										
											2014-10-26 17:28:09 +01:00
+								        }
-												[crunchyroll] Add support for `beta.crunchyroll` URLs
and fix series URLs with language code

											
										
										
											2021-10-17 13:46:05 +02:00
-												[crunchyroll:playlist] Implement beta API (#2955)

Closes #3121, #2930

Authored by: tejing1
											
										
										
											2022-04-05 12:51:12 +02:00
+								class CrunchyrollBetaBaseIE(CrunchyrollBaseIE):
 								    params = None
 								    def _get_params(self, lang):
 								        if not CrunchyrollBetaBaseIE.params:
-												[extractor/crunchyroll:beta] Use anonymous access (#4704)

Closes #4692
Authored by: tejing1
											
										
										
											2022-08-30 18:34:13 +02:00
+								            if self._get_cookies(f'https://beta.crunchyroll.com/{lang}').get('etp_rt'):
 								                grant_type, key = 'etp_rt_cookie', 'accountAuthClientId'
 								            else:
 								                grant_type, key = 'client_id', 'anonClientId'
-												[crunchyroll:playlist] Implement beta API (#2955)

Closes #3121, #2930

Authored by: tejing1
											
										
										
											2022-04-05 12:51:12 +02:00
+								            initial_state, app_config = self._get_beta_embedded_json(self._download_webpage(
 								                f'https://beta.crunchyroll.com/{lang}', None, note='Retrieving main page'), None)
 								            api_domain = app_config['cxApiParams']['apiDomain']
-												[extractor/crunchyroll:beta] Use anonymous access (#4704)

Closes #4692
Authored by: tejing1
											
										
										
											2022-08-30 18:34:13 +02:00
-												[crunchyroll:playlist] Implement beta API (#2955)

Closes #3121, #2930

Authored by: tejing1
											
										
										
											2022-04-05 12:51:12 +02:00
+								            auth_response = self._download_json(
-												[extractor/crunchyroll:beta] Use anonymous access (#4704)

Closes #4692
Authored by: tejing1
											
										
										
											2022-08-30 18:34:13 +02:00
+								                f'{api_domain}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
-												[crunchyroll:playlist] Implement beta API (#2955)

Closes #3121, #2930

Authored by: tejing1
											
										
										
											2022-04-05 12:51:12 +02:00
+								                headers={
-												[extractor/crunchyroll:beta] Use anonymous access (#4704)

Closes #4692
Authored by: tejing1
											
										
										
											2022-08-30 18:34:13 +02:00
+								                    'Authorization': 'Basic ' + str(base64.b64encode(('%s:' % app_config['cxApiParams'][key]).encode('ascii')), 'ascii')
 								                }, data=f'grant_type={grant_type}'.encode('ascii'))
-												[crunchyroll:playlist] Implement beta API (#2955)

Closes #3121, #2930

Authored by: tejing1
											
										
										
											2022-04-05 12:51:12 +02:00
+								            policy_response = self._download_json(
 								                f'{api_domain}/index/v2', None, note='Retrieving signed policy',
 								                headers={
 								                    'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']
 								                })
-												[crunchyroll:beta] Fix extractor after API change (#3801)

Closes #2052
Authored by: Burve, tejing1
											
										
										
											2022-05-20 02:37:04 +02:00
+								            cms = traverse_obj(policy_response, 'cms_beta', 'cms')
 								            bucket = cms['bucket']
-												[crunchyroll:playlist] Implement beta API (#2955)

Closes #3121, #2930

Authored by: tejing1
											
										
										
											2022-04-05 12:51:12 +02:00
+								            params = {
-												[crunchyroll:beta] Fix extractor after API change (#3801)

Closes #2052
Authored by: Burve, tejing1
											
										
										
											2022-05-20 02:37:04 +02:00
+								                'Policy': cms['policy'],
 								                'Signature': cms['signature'],
 								                'Key-Pair-Id': cms['key_pair_id']
-												[crunchyroll:playlist] Implement beta API (#2955)

Closes #3121, #2930

Authored by: tejing1
											
										
										
											2022-04-05 12:51:12 +02:00
+								            }
 								            locale = traverse_obj(initial_state, ('localization', 'locale'))
 								            if locale:
 								                params['locale'] = locale
 								            CrunchyrollBetaBaseIE.params = (api_domain, bucket, params)
 								        return CrunchyrollBetaBaseIE.params
 								class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):
-												[crunchyroll] Add support for `beta.crunchyroll` URLs
and fix series URLs with language code

											
										
										
											2021-10-17 13:46:05 +02:00
+								    IE_NAME = 'crunchyroll:beta'
-												[extractor/crunchyroll] Improve `_VALID_URL`s

Closes #4633

											
										
										
											2022-08-12 09:38:32 +02:00
+								    _VALID_URL = r'''(?x)
 								        https?://beta\.crunchyroll\.com/
 								        (?P<lang>(?:\w{2}(?:-\w{2})?/)?)
 								        watch/(?P<id>\w+)
 								        (?:/(?P<display_id>[\w-]+))?/?(?:[?#]|$)'''
-												[crunchyroll] Add support for `beta.crunchyroll` URLs
and fix series URLs with language code

											
										
										
											2021-10-17 13:46:05 +02:00
+								    _TESTS = [{
 								        'url': 'https://beta.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
 								        'info_dict': {
-												[extractor/crunchyroll:beta] Extract timestamp and fix tests (#4535)

Closes #4533
Authored by: tejing1
											
										
										
											2022-08-02 23:18:40 +02:00
+								            'id': 'GY2P1Q98Y',
-												[crunchyroll] Add support for `beta.crunchyroll` URLs
and fix series URLs with language code

											
										
										
											2021-10-17 13:46:05 +02:00
+								            'ext': 'mp4',
-												[extractor/crunchyroll:beta] Extract timestamp and fix tests (#4535)

Closes #4533
Authored by: tejing1
											
										
										
											2022-08-02 23:18:40 +02:00
+								            'duration': 1380.241,
 								            'timestamp': 1459632600,
-												[crunchyroll] Add support for `beta.crunchyroll` URLs
and fix series URLs with language code

											
										
										
											2021-10-17 13:46:05 +02:00
+								            'description': 'md5:a022fbec4fbb023d43631032c91ed64b',
 								            'title': 'World Trigger Episode 73 – To the Future',
 								            'upload_date': '20160402',
-												[crunchyroll:playlist] Implement beta API (#2955)

Closes #3121, #2930

Authored by: tejing1
											
										
										
											2022-04-05 12:51:12 +02:00
+								            'series': 'World Trigger',
-												[extractor/crunchyroll:beta] Extract timestamp and fix tests (#4535)

Closes #4533
Authored by: tejing1
											
										
										
											2022-08-02 23:18:40 +02:00
+								            'series_id': 'GR757DMKY',
-												[crunchyroll:playlist] Implement beta API (#2955)

Closes #3121, #2930

Authored by: tejing1
											
										
										
											2022-04-05 12:51:12 +02:00
+								            'season': 'World Trigger',
-												[extractor/crunchyroll:beta] Extract timestamp and fix tests (#4535)

Closes #4533
Authored by: tejing1
											
										
										
											2022-08-02 23:18:40 +02:00
+								            'season_id': 'GR9P39NJ6',
-												[crunchyroll:playlist] Implement beta API (#2955)

Closes #3121, #2930

Authored by: tejing1
											
										
										
											2022-04-05 12:51:12 +02:00
+								            'season_number': 1,
-												[extractor/crunchyroll:beta] Extract timestamp and fix tests (#4535)

Closes #4533
Authored by: tejing1
											
										
										
											2022-08-02 23:18:40 +02:00
+								            'episode': 'To the Future',
 								            'episode_number': 73,
 								            'thumbnail': r're:^https://beta.crunchyroll.com/imgsrv/.*\.jpeg$',
-												[crunchyroll] Add support for `beta.crunchyroll` URLs
and fix series URLs with language code

											
										
										
											2021-10-17 13:46:05 +02:00
+								        },
 								        'params': {'skip_download': 'm3u8'},
-												[crunchyroll:playlist] Implement beta API (#2955)

Closes #3121, #2930

Authored by: tejing1
											
										
										
											2022-04-05 12:51:12 +02:00
+								    }, {
-												[extractor/crunchyroll] Improve `_VALID_URL`s

Closes #4633

											
										
										
											2022-08-12 09:38:32 +02:00
+								        'url': 'https://beta.crunchyroll.com/watch/GY2P1Q98Y',
-												[crunchyroll:playlist] Implement beta API (#2955)

Closes #3121, #2930

Authored by: tejing1
											
										
										
											2022-04-05 12:51:12 +02:00
+								        'only_matching': True,
-												Bugfix for f1042989c16795b9f75edd7856b1257570ab40e3

											
										
										
											2022-07-27 12:41:15 +02:00
+								    }, {
 								        'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
 								        'only_matching': True,
-												[crunchyroll] Add support for `beta.crunchyroll` URLs
and fix series URLs with language code

											
										
										
											2021-10-17 13:46:05 +02:00
+								    }]
 								    def _real_extract(self, url):
-												[crunchyroll:playlist] Implement beta API (#2955)

Closes #3121, #2930

Authored by: tejing1
											
										
										
											2022-04-05 12:51:12 +02:00
+								        lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id')
 								        api_domain, bucket, params = self._get_params(lang)
-												[crunchyroll:beta] Add cookies support (#2506)

* Extract directly from the beta API when cookies are passed. If login cookie is absent, the extraction is delegated to `CrunchyrollIE`. This causes different metadata to be extracted (including formats and video id) and therefore results in a different archive entry. For now, this issue is unavoidable since the browser also redirects to the old site when not logged in.

* Adds extractor-args `format` and `hardsub` to control the source and subtitles of the extracted formats

Closes #1911
Authored by: tejing1
											
										
										
											2022-01-29 01:33:51 +01:00
+								        episode_response = self._download_json(
 								            f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id,
-												[extractor/Crunchyroll] Handle missing metadata correctly (#4405)

Closes #4399

Authored by pukkandan, Burve
											
										
										
											2022-07-26 05:41:52 +02:00
+								            note='Retrieving episode metadata', query=params)
-												[crunchyroll:beta] Add cookies support (#2506)

* Extract directly from the beta API when cookies are passed. If login cookie is absent, the extraction is delegated to `CrunchyrollIE`. This causes different metadata to be extracted (including formats and video id) and therefore results in a different archive entry. For now, this issue is unavoidable since the browser also redirects to the old site when not logged in.

* Adds extractor-args `format` and `hardsub` to control the source and subtitles of the extracted formats

Closes #1911
Authored by: tejing1
											
										
										
											2022-01-29 01:33:51 +01:00
+								        if episode_response.get('is_premium_only') and not episode_response.get('playback'):
 								            raise ExtractorError('This video is for premium members only.', expected=True)
-												[extractor/crunchyroll:beta] Use streams API (#4555)

Closes #4452
Authored by: tejing1
											
										
										
											2022-08-04 20:05:58 +02:00
+								        stream_response = self._download_json(
 								            f'{api_domain}{episode_response["__links__"]["streams"]["href"]}', display_id,
 								            note='Retrieving stream info', query=params)
-												[extractor/Crunchyroll] Handle missing metadata correctly (#4405)

Closes #4399

Authored by pukkandan, Burve
											
										
										
											2022-07-26 05:41:52 +02:00
+								        get_streams = lambda name: (traverse_obj(stream_response, name) or {}).items()
-												[crunchyroll:beta] Add cookies support (#2506)

* Extract directly from the beta API when cookies are passed. If login cookie is absent, the extraction is delegated to `CrunchyrollIE`. This causes different metadata to be extracted (including formats and video id) and therefore results in a different archive entry. For now, this issue is unavoidable since the browser also redirects to the old site when not logged in.

* Adds extractor-args `format` and `hardsub` to control the source and subtitles of the extracted formats

Closes #1911
Authored by: tejing1
											
										
										
											2022-01-29 01:33:51 +01:00
 								        requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
 								        hardsub_preference = qualities(requested_hardsubs[::-1])
 								        requested_formats = self._configuration_arg('format') or ['adaptive_hls']
 								        formats = []
-												[extractor/Crunchyroll] Handle missing metadata correctly (#4405)

Closes #4399

Authored by pukkandan, Burve
											
										
										
											2022-07-26 05:41:52 +02:00
+								        for stream_type, streams in get_streams('streams'):
-												[crunchyroll:beta] Add cookies support (#2506)

* Extract directly from the beta API when cookies are passed. If login cookie is absent, the extraction is delegated to `CrunchyrollIE`. This causes different metadata to be extracted (including formats and video id) and therefore results in a different archive entry. For now, this issue is unavoidable since the browser also redirects to the old site when not logged in.

* Adds extractor-args `format` and `hardsub` to control the source and subtitles of the extracted formats

Closes #1911
Authored by: tejing1
											
										
										
											2022-01-29 01:33:51 +01:00
+								            if stream_type not in requested_formats:
 								                continue
 								            for stream in streams.values():
 								                hardsub_lang = stream.get('hardsub_locale') or ''
 								                if hardsub_lang.lower() not in requested_hardsubs:
 								                    continue
-												[extractor/Crunchyroll] Handle missing metadata correctly (#4405)

Closes #4399

Authored by pukkandan, Burve
											
										
										
											2022-07-26 05:41:52 +02:00
+								                format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s'))
-												[crunchyroll:beta] Add cookies support (#2506)

* Extract directly from the beta API when cookies are passed. If login cookie is absent, the extraction is delegated to `CrunchyrollIE`. This causes different metadata to be extracted (including formats and video id) and therefore results in a different archive entry. For now, this issue is unavoidable since the browser also redirects to the old site when not logged in.

* Adds extractor-args `format` and `hardsub` to control the source and subtitles of the extracted formats

Closes #1911
Authored by: tejing1
											
										
										
											2022-01-29 01:33:51 +01:00
+								                if not stream.get('url'):
 								                    continue
-												[extractor/Crunchyroll] Handle missing metadata correctly (#4405)

Closes #4399

Authored by pukkandan, Burve
											
										
										
											2022-07-26 05:41:52 +02:00
+								                if stream_type.endswith('hls'):
-												[crunchyroll:beta] Add cookies support (#2506)

* Extract directly from the beta API when cookies are passed. If login cookie is absent, the extraction is delegated to `CrunchyrollIE`. This causes different metadata to be extracted (including formats and video id) and therefore results in a different archive entry. For now, this issue is unavoidable since the browser also redirects to the old site when not logged in.

* Adds extractor-args `format` and `hardsub` to control the source and subtitles of the extracted formats

Closes #1911
Authored by: tejing1
											
										
										
											2022-01-29 01:33:51 +01:00
+								                    adaptive_formats = self._extract_m3u8_formats(
 								                        stream['url'], display_id, 'mp4', m3u8_id=format_id,
-												[extractor/Crunchyroll] Handle missing metadata correctly (#4405)

Closes #4399

Authored by pukkandan, Burve
											
										
										
											2022-07-26 05:41:52 +02:00
+								                        fatal=False, note=f'Downloading {format_id} HLS manifest')
 								                elif stream_type.endswith('dash'):
-												[crunchyroll:beta] Add cookies support (#2506)

* Extract directly from the beta API when cookies are passed. If login cookie is absent, the extraction is delegated to `CrunchyrollIE`. This causes different metadata to be extracted (including formats and video id) and therefore results in a different archive entry. For now, this issue is unavoidable since the browser also redirects to the old site when not logged in.

* Adds extractor-args `format` and `hardsub` to control the source and subtitles of the extracted formats

Closes #1911
Authored by: tejing1
											
										
										
											2022-01-29 01:33:51 +01:00
+								                    adaptive_formats = self._extract_mpd_formats(
 								                        stream['url'], display_id, mpd_id=format_id,
-												[extractor/Crunchyroll] Handle missing metadata correctly (#4405)

Closes #4399

Authored by pukkandan, Burve
											
										
										
											2022-07-26 05:41:52 +02:00
+								                        fatal=False, note=f'Downloading {format_id} MPD manifest')
-												[crunchyroll:beta] Add cookies support (#2506)

* Extract directly from the beta API when cookies are passed. If login cookie is absent, the extraction is delegated to `CrunchyrollIE`. This causes different metadata to be extracted (including formats and video id) and therefore results in a different archive entry. For now, this issue is unavoidable since the browser also redirects to the old site when not logged in.

* Adds extractor-args `format` and `hardsub` to control the source and subtitles of the extracted formats

Closes #1911
Authored by: tejing1
											
										
										
											2022-01-29 01:33:51 +01:00
+								                for f in adaptive_formats:
 								                    if f.get('acodec') != 'none':
 								                        f['language'] = stream_response.get('audio_locale')
 								                    f['quality'] = hardsub_preference(hardsub_lang.lower())
 								                formats.extend(adaptive_formats)
 								        self._sort_formats(formats)
 								        return {
 								            'id': internal_id,
-												[extractor/Crunchyroll] Handle missing metadata correctly (#4405)

Closes #4399

Authored by pukkandan, Burve
											
										
										
											2022-07-26 05:41:52 +02:00
+								            'title': '%s Episode %s – %s' % (
 								                episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')),
 								            'description': try_get(episode_response, lambda x: x['description'].replace(r'\r\n', '\n')),
-												[crunchyroll:beta] Add cookies support (#2506)

* Extract directly from the beta API when cookies are passed. If login cookie is absent, the extraction is delegated to `CrunchyrollIE`. This causes different metadata to be extracted (including formats and video id) and therefore results in a different archive entry. For now, this issue is unavoidable since the browser also redirects to the old site when not logged in.

* Adds extractor-args `format` and `hardsub` to control the source and subtitles of the extracted formats

Closes #1911
Authored by: tejing1
											
										
										
											2022-01-29 01:33:51 +01:00
+								            'duration': float_or_none(episode_response.get('duration_ms'), 1000),
-												[extractor/crunchyroll:beta] Extract timestamp and fix tests (#4535)

Closes #4533
Authored by: tejing1
											
										
										
											2022-08-02 23:18:40 +02:00
+								            'timestamp': parse_iso8601(episode_response.get('upload_date')),
-												[crunchyroll:beta] Add cookies support (#2506)

* Extract directly from the beta API when cookies are passed. If login cookie is absent, the extraction is delegated to `CrunchyrollIE`. This causes different metadata to be extracted (including formats and video id) and therefore results in a different archive entry. For now, this issue is unavoidable since the browser also redirects to the old site when not logged in.

* Adds extractor-args `format` and `hardsub` to control the source and subtitles of the extracted formats

Closes #1911
Authored by: tejing1
											
										
										
											2022-01-29 01:33:51 +01:00
+								            'series': episode_response.get('series_title'),
 								            'series_id': episode_response.get('series_id'),
 								            'season': episode_response.get('season_title'),
 								            'season_id': episode_response.get('season_id'),
 								            'season_number': episode_response.get('season_number'),
 								            'episode': episode_response.get('title'),
 								            'episode_number': episode_response.get('sequence_number'),
-												[extractor/Crunchyroll] Handle missing metadata correctly (#4405)

Closes #4399

Authored by pukkandan, Burve
											
										
										
											2022-07-26 05:41:52 +02:00
+								            'formats': formats,
 								            'thumbnails': [{
 								                'url': thumb.get('source'),
 								                'width': thumb.get('width'),
 								                'height': thumb.get('height'),
 								            } for thumb in traverse_obj(episode_response, ('images', 'thumbnail', ..., ...)) or []],
 								            'subtitles': {
 								                lang: [{
 								                    'url': subtitle_data.get('url'),
 								                    'ext': subtitle_data.get('format')
 								                }] for lang, subtitle_data in get_streams('subtitles')
 								            },
-												[crunchyroll:beta] Add cookies support (#2506)

* Extract directly from the beta API when cookies are passed. If login cookie is absent, the extraction is delegated to `CrunchyrollIE`. This causes different metadata to be extracted (including formats and video id) and therefore results in a different archive entry. For now, this issue is unavoidable since the browser also redirects to the old site when not logged in.

* Adds extractor-args `format` and `hardsub` to control the source and subtitles of the extracted formats

Closes #1911
Authored by: tejing1
											
										
										
											2022-01-29 01:33:51 +01:00
+								        }
-												[crunchyroll] Add support for `beta.crunchyroll` URLs
and fix series URLs with language code

											
										
										
											2021-10-17 13:46:05 +02:00
-												[crunchyroll:playlist] Implement beta API (#2955)

Closes #3121, #2930

Authored by: tejing1
											
										
										
											2022-04-05 12:51:12 +02:00
+								class CrunchyrollBetaShowIE(CrunchyrollBetaBaseIE):
-												[crunchyroll] Add support for `beta.crunchyroll` URLs
and fix series URLs with language code

											
										
										
											2021-10-17 13:46:05 +02:00
+								    IE_NAME = 'crunchyroll:playlist:beta'
-												[extractor/crunchyroll] Improve `_VALID_URL`s

Closes #4633

											
										
										
											2022-08-12 09:38:32 +02:00
+								    _VALID_URL = r'''(?x)
 								        https?://beta\.crunchyroll\.com/
 								        (?P<lang>(?:\w{2}(?:-\w{2})?/)?)
 								        series/(?P<id>\w+)
 								        (?:/(?P<display_id>[\w-]+))?/?(?:[?#]|$)'''
-												[crunchyroll] Add support for `beta.crunchyroll` URLs
and fix series URLs with language code

											
										
										
											2021-10-17 13:46:05 +02:00
+								    _TESTS = [{
 								        'url': 'https://beta.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA',
 								        'info_dict': {
-												[extractor/crunchyroll:beta] Extract timestamp and fix tests (#4535)

Closes #4533
Authored by: tejing1
											
										
										
											2022-08-02 23:18:40 +02:00
+								            'id': 'GY19NQ2QR',
-												[crunchyroll] Add support for `beta.crunchyroll` URLs
and fix series URLs with language code

											
										
										
											2021-10-17 13:46:05 +02:00
+								            'title': 'Girl Friend BETA',
 								        },
 								        'playlist_mincount': 10,
 								    }, {
-												[extractor/crunchyroll] Improve `_VALID_URL`s

Closes #4633

											
										
										
											2022-08-12 09:38:32 +02:00
+								        'url': 'https://beta.crunchyroll.com/it/series/GY19NQ2QR',
-												[crunchyroll] Add support for `beta.crunchyroll` URLs
and fix series URLs with language code

											
										
										
											2021-10-17 13:46:05 +02:00
+								        'only_matching': True,
 								    }]
 								    def _real_extract(self, url):
-												[crunchyroll:playlist] Implement beta API (#2955)

Closes #3121, #2930

Authored by: tejing1
											
										
										
											2022-04-05 12:51:12 +02:00
+								        lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id')
 								        api_domain, bucket, params = self._get_params(lang)
 								        series_response = self._download_json(
 								            f'{api_domain}/cms/v2{bucket}/series/{internal_id}', display_id,
 								            note='Retrieving series metadata', query=params)
 								        seasons_response = self._download_json(
 								            f'{api_domain}/cms/v2{bucket}/seasons?series_id={internal_id}', display_id,
 								            note='Retrieving season list', query=params)
 								        def entries():
 								            for season in seasons_response['items']:
 								                episodes_response = self._download_json(
 								                    f'{api_domain}/cms/v2{bucket}/episodes?season_id={season["id"]}', display_id,
 								                    note=f'Retrieving episode list for {season.get("slug_title")}', query=params)
 								                for episode in episodes_response['items']:
 								                    episode_id = episode['id']
 								                    episode_display_id = episode['slug_title']
 								                    yield {
 								                        '_type': 'url',
 								                        'url': f'https://beta.crunchyroll.com/{lang}watch/{episode_id}/{episode_display_id}',
 								                        'ie_key': CrunchyrollBetaIE.ie_key(),
 								                        'id': episode_id,
 								                        'title': '%s Episode %s – %s' % (episode.get('season_title'), episode.get('episode'), episode.get('title')),
 								                        'description': try_get(episode, lambda x: x['description'].replace(r'\r\n', '\n')),
 								                        'duration': float_or_none(episode.get('duration_ms'), 1000),
 								                        'series': episode.get('series_title'),
 								                        'series_id': episode.get('series_id'),
 								                        'season': episode.get('season_title'),
 								                        'season_id': episode.get('season_id'),
 								                        'season_number': episode.get('season_number'),
 								                        'episode': episode.get('title'),
 								                        'episode_number': episode.get('sequence_number')
 								                    }
 								        return self.playlist_result(entries(), internal_id, series_response.get('title'))