yt-dlp/yt_dlp/extractor/hidive.py

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    int_or_none,
    try_get,
    url_or_none,
    urlencode_postdata,
)


class HiDiveIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?hidive\.com/stream/(?P<id>(?P<title>[^/]+)/(?P<key>[^/?#&]+))'
    # Using X-Forwarded-For results in 403 HTTP error for HLS fragments,
    # so disabling geo bypass completely
    _GEO_BYPASS = False
    _NETRC_MACHINE = 'hidive'
    _LOGIN_URL = 'https://www.hidive.com/account/login'

    _TESTS = [{
        'url': 'https://www.hidive.com/stream/the-comic-artist-and-his-assistants/s01e001',
        'info_dict': {
            'id': 'the-comic-artist-and-his-assistants/s01e001',
            'ext': 'mp4',
            'title': 'the-comic-artist-and-his-assistants/s01e001',
            'series': 'the-comic-artist-and-his-assistants',
            'season_number': 1,
            'episode_number': 1,
        },
        'params': {
            'skip_download': True,
        },
        'skip': 'Requires Authentication',
    }]

    def _perform_login(self, username, password):
        webpage = self._download_webpage(self._LOGIN_URL, None)
        form = self._search_regex(
            r'(?s)<form[^>]+action="/account/login"[^>]*>(.+?)</form>',
            webpage, 'login form', default=None)
        if not form:
            return
        data = self._hidden_inputs(form)
        data.update({
            'Email': username,
            'Password': password,
        })
        login_webpage = self._download_webpage(
            self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(data))
        # If the user has multiple profiles on their account, select one. For now pick the first profile.
        profile_id = self._search_regex(
            r'<button [^>]+?data-profile-id="(\w+)"', login_webpage, 'profile id', default=None)
        if profile_id is None:
            return  # If only one profile, Hidive auto-selects it
        self._request_webpage(
            'https://www.hidive.com/ajax/chooseprofile', None,
            data=urlencode_postdata({
                'profileId': profile_id,
                'hash': self._search_regex(
                    r'\<button [^>]+?data-hash="(\w+)"', login_webpage, 'profile id hash'),
                'returnUrl': '/dashboard',
            }))

    def _call_api(self, video_id, title, key, data={}, **kwargs):
        data = {
            **data,
            'Title': title,
            'Key': key,
            'PlayerId': 'f4f895ce1ca713ba263b91caeb1daa2d08904783',
        }
        return self._download_json(
            'https://www.hidive.com/play/settings', video_id,
            data=urlencode_postdata(data), **kwargs) or {}

    def _real_extract(self, url):
        video_id, title, key = self._match_valid_url(url).group('id', 'title', 'key')
        settings = self._call_api(video_id, title, key)

        restriction = settings.get('restrictionReason')
        if restriction == 'RegionRestricted':
            self.raise_geo_restricted()
        if restriction and restriction != 'None':
            raise ExtractorError(
                f'{self.IE_NAME} said: {restriction}', expected=True)

        formats, parsed_urls = [], {None}
        for rendition_id, rendition in settings['renditions'].items():
            audio, version, extra = rendition_id.split('_')
            m3u8_url = url_or_none(try_get(rendition, lambda x: x['bitrates']['hls']))
            if m3u8_url not in parsed_urls:
                parsed_urls.add(m3u8_url)
                frmt = self._extract_m3u8_formats(
                    m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=rendition_id, fatal=False)
                for f in frmt:
                    f['language'] = audio
                    f['format_note'] = f'{version}, {extra}'
                formats.extend(frmt)

        subtitles = {}
        for rendition_id, rendition in settings['renditions'].items():
            audio, version, extra = rendition_id.split('_')
            for cc_file in rendition.get('ccFiles') or []:
                cc_url = url_or_none(try_get(cc_file, lambda x: x[2]))
                cc_lang = try_get(cc_file, (lambda x: x[1].replace(' ', '-').lower(), lambda x: x[0]), str)
                if cc_url not in parsed_urls and cc_lang:
                    parsed_urls.add(cc_url)
                    subtitles.setdefault(cc_lang, []).append({'url': cc_url})

        return {
            'id': video_id,
            'title': video_id,
            'subtitles': subtitles,
            'formats': formats,
            'series': title,
            'season_number': int_or_none(
                self._search_regex(r's(\d+)', key, 'season number', default=None)),
            'episode_number': int_or_none(
                self._search_regex(r'e(\d+)', key, 'episode number', default=None)),
            'http_headers': {'Referer': url},
        }
[hidive] Add extractor (closes #15494) 2018-03-04 11:46:36 +01:00			`from .common import InfoExtractor`
			`from ..utils import (`
			`ExtractorError,`
			`int_or_none,`
[HiDive] Fix extractor (#958) Closes #952, #408 Authored by: Ashish0804 2021-09-15 04:04:54 +02:00			`try_get,`
Improve URL extraction 2018-07-21 14:08:28 +02:00			`url_or_none,`
[hidive] Add extractor (closes #15494) 2018-03-04 11:46:36 +01:00			`urlencode_postdata,`
			`)`


			`class HiDiveIE(InfoExtractor):`
[Hidive] Fix duplicate and incorrect formats 2021-10-06 07:23:22 +02:00			`_VALID_URL = r'https?://(?:www\.)?hidive\.com/stream/(?P<id>(?P<title>[^/]+)/(?P<key>[^/?#&]+))'`
[hidive] Add extractor (closes #15494) 2018-03-04 11:46:36 +01:00			`# Using X-Forwarded-For results in 403 HTTP error for HLS fragments,`
			`# so disabling geo bypass completely`
			`_GEO_BYPASS = False`
[hidive] add support for authentication(closes #16534) 2018-05-24 12:53:42 +02:00			`_NETRC_MACHINE = 'hidive'`
			`_LOGIN_URL = 'https://www.hidive.com/account/login'`
[hidive] Add extractor (closes #15494) 2018-03-04 11:46:36 +01:00
			`_TESTS = [{`
			`'url': 'https://www.hidive.com/stream/the-comic-artist-and-his-assistants/s01e001',`
			`'info_dict': {`
			`'id': 'the-comic-artist-and-his-assistants/s01e001',`
			`'ext': 'mp4',`
			`'title': 'the-comic-artist-and-his-assistants/s01e001',`
			`'series': 'the-comic-artist-and-his-assistants',`
			`'season_number': 1,`
			`'episode_number': 1,`
			`},`
			`'params': {`
			`'skip_download': True,`
			`},`
[hidive] add support for authentication(closes #16534) 2018-05-24 12:53:42 +02:00			`'skip': 'Requires Authentication',`
[hidive] Add extractor (closes #15494) 2018-03-04 11:46:36 +01:00			`}]`

[extractor] Add `_perform_login` function (#2943) * Adds new functions `_initialize_pre_login` and `_perform_login` as part of the extractor API * Adds `ie.supports_login` to the public API 2022-03-18 21:53:33 +01:00			`def _perform_login(self, username, password):`
[hidive] add support for authentication(closes #16534) 2018-05-24 12:53:42 +02:00			`webpage = self._download_webpage(self._LOGIN_URL, None)`
			`form = self._search_regex(`
			`r'(?s)<form[^>]+action="/account/login"[^>]*>(.+?)</form>',`
[extractor/hidive] Fix cookie login when netrc is also given (#4447) Closes #3336 Authored by: winterbird-code 2022-07-26 15:22:18 +02:00			`webpage, 'login form', default=None)`
[extractor/Hidive] Fix subtitles and age-restriction (#5828) Authored by: chexxor Closes #408 2023-02-12 04:47:52 +01:00			`if not form:`
[extractor/hidive] Fix cookie login when netrc is also given (#4447) Closes #3336 Authored by: winterbird-code 2022-07-26 15:22:18 +02:00			`return`
[hidive] add support for authentication(closes #16534) 2018-05-24 12:53:42 +02:00			`data = self._hidden_inputs(form)`
			`data.update({`
[extractor] Add `_perform_login` function (#2943) * Adds new functions `_initialize_pre_login` and `_perform_login` as part of the extractor API * Adds `ie.supports_login` to the public API 2022-03-18 21:53:33 +01:00			`'Email': username,`
[hidive] add support for authentication(closes #16534) 2018-05-24 12:53:42 +02:00			`'Password': password,`
			`})`
[extractor/Hidive] Fix subtitles and age-restriction (#5828) Authored by: chexxor Closes #408 2023-02-12 04:47:52 +01:00			`login_webpage = self._download_webpage(`
[hidive] add support for authentication(closes #16534) 2018-05-24 12:53:42 +02:00			`self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(data))`
[extractor/Hidive] Fix subtitles and age-restriction (#5828) Authored by: chexxor Closes #408 2023-02-12 04:47:52 +01:00			`# If the user has multiple profiles on their account, select one. For now pick the first profile.`
[extractor/hidive] Fix login Fixes https://github.com/yt-dlp/yt-dlp/issues/6493#issuecomment-1462906556 2023-03-10 12:57:43 +01:00			`profile_id = self._search_regex(`
			`r'<button [^>]+?data-profile-id="(\w+)"', login_webpage, 'profile id', default=None)`
[extractor/Hidive] Fix subtitles and age-restriction (#5828) Authored by: chexxor Closes #408 2023-02-12 04:47:52 +01:00			`if profile_id is None:`
			`return # If only one profile, Hidive auto-selects it`
			`self._request_webpage(`
			`'https://www.hidive.com/ajax/chooseprofile', None,`
			`data=urlencode_postdata({`
			`'profileId': profile_id,`
[extractor/hidive] Fix login Fixes https://github.com/yt-dlp/yt-dlp/issues/6493#issuecomment-1462906556 2023-03-10 12:57:43 +01:00			`'hash': self._search_regex(`
			`r'\<button [^>]+?data-hash="(\w+)"', login_webpage, 'profile id hash'),`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`'returnUrl': '/dashboard',`
[extractor/Hidive] Fix subtitles and age-restriction (#5828) Authored by: chexxor Closes #408 2023-02-12 04:47:52 +01:00			`}))`
[hidive] add support for authentication(closes #16534) 2018-05-24 12:53:42 +02:00
[Hidive] Fix subtitles broken by 705e7c2005dfe67a905e18736c9f6345ee9d386b 2021-10-08 17:07:24 +02:00			`def _call_api(self, video_id, title, key, data={}, **kwargs):`
			`data = {`
			`**data,`
			`'Title': title,`
			`'Key': key,`
			`'PlayerId': 'f4f895ce1ca713ba263b91caeb1daa2d08904783',`
			`}`
			`return self._download_json(`
			`'https://www.hidive.com/play/settings', video_id,`
			`data=urlencode_postdata(data), **kwargs) or {}`

[hidive] Add extractor (closes #15494) 2018-03-04 11:46:36 +01:00			`def _real_extract(self, url):`
[Hidive] Fix duplicate and incorrect formats 2021-10-06 07:23:22 +02:00			`video_id, title, key = self._match_valid_url(url).group('id', 'title', 'key')`
[Hidive] Fix subtitles broken by 705e7c2005dfe67a905e18736c9f6345ee9d386b 2021-10-08 17:07:24 +02:00			`settings = self._call_api(video_id, title, key)`
[hidive] Add extractor (closes #15494) 2018-03-04 11:46:36 +01:00
[Hidive] Fix duplicate and incorrect formats 2021-10-06 07:23:22 +02:00			`restriction = settings.get('restrictionReason')`
			`if restriction == 'RegionRestricted':`
			`self.raise_geo_restricted()`
			`if restriction and restriction != 'None':`
			`raise ExtractorError(`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`f'{self.IE_NAME} said: {restriction}', expected=True)`
[hidive] Add extractor (closes #15494) 2018-03-04 11:46:36 +01:00
[hidive] Fix typo in b5ae35ee6d3f913898770b8c74ee5f5e5cc33560 2021-10-10 07:36:23 +02:00			`formats, parsed_urls = [], {None}`
[Hidive] Fix duplicate and incorrect formats 2021-10-06 07:23:22 +02:00			`for rendition_id, rendition in settings['renditions'].items():`
			`audio, version, extra = rendition_id.split('_')`
			`m3u8_url = url_or_none(try_get(rendition, lambda x: x['bitrates']['hls']))`
[Hidive] Fix subtitles broken by 705e7c2005dfe67a905e18736c9f6345ee9d386b 2021-10-08 17:07:24 +02:00			`if m3u8_url not in parsed_urls:`
			`parsed_urls.add(m3u8_url)`
[HiDive] Fix extractor (#958) Closes #952, #408 Authored by: Ashish0804 2021-09-15 04:04:54 +02:00			`frmt = self._extract_m3u8_formats(`
[Hidive] Fix duplicate and incorrect formats 2021-10-06 07:23:22 +02:00			`m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=rendition_id, fatal=False)`
[HiDive] Fix extractor (#958) Closes #952, #408 Authored by: Ashish0804 2021-09-15 04:04:54 +02:00			`for f in frmt:`
			`f['language'] = audio`
[Hidive] Fix duplicate and incorrect formats 2021-10-06 07:23:22 +02:00			`f['format_note'] = f'{version}, {extra}'`
[HiDive] Fix extractor (#958) Closes #952, #408 Authored by: Ashish0804 2021-09-15 04:04:54 +02:00			`formats.extend(frmt)`
[hidive] Add extractor (closes #15494) 2018-03-04 11:46:36 +01:00
[extractor/Hidive] Fix subtitles and age-restriction (#5828) Authored by: chexxor Closes #408 2023-02-12 04:47:52 +01:00			`subtitles = {}`
			`for rendition_id, rendition in settings['renditions'].items():`
			`audio, version, extra = rendition_id.split('_')`
			`for cc_file in rendition.get('ccFiles') or []:`
			`cc_url = url_or_none(try_get(cc_file, lambda x: x[2]))`
			`cc_lang = try_get(cc_file, (lambda x: x[1].replace(' ', '-').lower(), lambda x: x[0]), str)`
			`if cc_url not in parsed_urls and cc_lang:`
			`parsed_urls.add(cc_url)`
			`subtitles.setdefault(cc_lang, []).append({'url': cc_url})`

[hidive] Add extractor (closes #15494) 2018-03-04 11:46:36 +01:00			`return {`
			`'id': video_id,`
			`'title': video_id,`
[extractor/Hidive] Fix subtitles and age-restriction (#5828) Authored by: chexxor Closes #408 2023-02-12 04:47:52 +01:00			`'subtitles': subtitles,`
[hidive] Add extractor (closes #15494) 2018-03-04 11:46:36 +01:00			`'formats': formats,`
			`'series': title,`
[Hidive] Fix duplicate and incorrect formats 2021-10-06 07:23:22 +02:00			`'season_number': int_or_none(`
			`self._search_regex(r's(\d+)', key, 'season number', default=None)),`
			`'episode_number': int_or_none(`
			`self._search_regex(r'e(\d+)', key, 'episode number', default=None)),`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`'http_headers': {'Referer': url},`
[hidive] Add extractor (closes #15494) 2018-03-04 11:46:36 +01:00			`}`