From e7f4793d4d29fec0f6d87aa6e9839f67e0bbd293 Mon Sep 17 00:00:00 2001 From: nikhil Date: Thu, 29 Jul 2021 11:31:27 -0400 Subject: [PATCH 1/5] [extractor/nbc] Fix NBC Olympics extractor --- youtube_dl/extractor/nbc.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 0d77648c2..7c3902f4e 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -495,12 +495,10 @@ class NBCOlympicsStreamIE(AdobePassIE): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) pid = self._search_regex(r'pid\s*=\s*(\d+);', webpage, 'pid') - resource = self._search_regex( - r"resource\s*=\s*'(.+)';", webpage, - 'resource').replace("' + pid + '", pid) event_config = self._download_json( self._DATA_URL_TEMPLATE % ('event_config', pid), pid)['eventConfig'] + resource = event_config.get('resourceId', 'NBCOlympics') title = self._live_title(event_config['eventTitle']) source_url = self._download_json( self._DATA_URL_TEMPLATE % ('live_sources', pid), From 236e3d30ba6f6d3f8e932c60183aabede27e5727 Mon Sep 17 00:00:00 2001 From: nikhil Date: Sun, 1 Aug 2021 18:44:53 -0400 Subject: [PATCH 2/5] More fixes --- youtube_dl/extractor/nbc.py | 50 +++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 7c3902f4e..7cfc030a1 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -490,28 +490,58 @@ class NBCOlympicsStreamIE(AdobePassIE): }, } _DATA_URL_TEMPLATE = 'http://stream.nbcolympics.com/data/%s_%s.json' + _LEAP_URL_TEMPLATE = 'https://api-leap.nbcsports.com/feeds/assets/%s?application=NBCOlympics&platform=%s&format=nbc-player&env=staging' def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) pid = self._search_regex(r'pid\s*=\s*(\d+);', webpage, 'pid') + event_config = self._download_json( self._DATA_URL_TEMPLATE % ('event_config', pid), - pid)['eventConfig'] + pid, + 'Downloading event config', + )['eventConfig'] resource = event_config.get('resourceId', 'NBCOlympics') title = self._live_title(event_config['eventTitle']) - source_url = self._download_json( - self._DATA_URL_TEMPLATE % ('live_sources', pid), - pid)['videoSources'][0]['sourceUrl'] + + leap_config = self._download_json( + self._LEAP_URL_TEMPLATE % (pid, 'desktop'), + pid, + 'Downloading leap config', + ) + source_url = leap_config['videoSources'][0]['cdnSources']['primary'][0]['sourceUrl'] + + ap_resource = self._get_mvpd_resource( + resource, + re.sub(r'[^\w\d ]+', '', event_config['eventTitle']), + pid, + event_config['ratingId'], + ) media_token = self._extract_mvpd_auth( - url, pid, event_config.get('requestorId', 'NBCOlympics'), resource) - formats = self._extract_m3u8_formats(self._download_webpage( - 'http://sp.auth.adobe.com/tvs/v1/sign', pid, query={ + url, pid, event_config.get('requestorId', 'NBCOlympics'), ap_resource) + + tokenized_url = self._download_json( + 'https://tokens.playmakerservices.com/', + pid, + 'Retrieving tokenized URL', + data=json.dumps({ + 'application': 'NBCSports', + 'authentication-type': 'adobe-pass', 'cdn': 'akamai', - 'mediaToken': base64.b64encode(media_token.encode()), - 'resource': base64.b64encode(resource.encode()), + # Indicates that the player communicates its token not via the path but via a cookie? + #'inPath': 'false', + 'pid': pid, + 'platform': 'desktop', + 'requestorId': 'NBCOlympics', + 'resourceId': base64.b64encode(ap_resource.encode()).decode(), + 'token': base64.b64encode(media_token.encode()).decode(), 'url': source_url, - }), pid, 'mp4') + 'version': 'v1', + }).encode(), + )['akamai'][0]['tokenizedUrl'] + + formats = self._extract_m3u8_formats(tokenized_url, pid, 'mp4') self._sort_formats(formats) return { From b511872fbf55382ffa60d54a202ae90d4e8b3578 Mon Sep 17 00:00:00 2001 From: nikhil Date: Mon, 2 Aug 2021 20:33:49 -0400 Subject: [PATCH 3/5] Unset ffmpeg's `-seekable`, `-http_seekable`, and `-icy` flags --- youtube_dl/downloader/external.py | 13 +++++++++++++ youtube_dl/extractor/nbc.py | 4 ++++ 2 files changed, 17 insertions(+) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index c31f8910a..61aefd8d4 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -243,6 +243,19 @@ class FFmpegFD(ExternalFD): # https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127 # http://trac.ffmpeg.org/ticket/6125#comment:10 args += ['-seekable', '1' if seekable else '0'] + http_seekable = info_dict.get('_http_seekable') + if http_seekable is not None: + # setting -http_seekable prevents ffmpeg from guessing if the server + # supports seeking in other kinds of requests (by adding the same header + # as above: `Range: bytes=0-`) + args += ['-http_seekable', '1' if http_seekable else '0'] + icy = info_dict.get('_icy') + if icy is not None: + # setting -icy 0 prevents ffmpeg from sending the header `Icy-Metadata: 1`, + # which can cause also problems + # https://github.com/ytdl-org/youtube-dl/pull/29688 + # https://trac.ffmpeg.org/ticket/5460#comment:5 + args += ['-icy', '1' if icy else '0'] args += self._configuration_args() diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 7cfc030a1..46b44a9be 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -542,6 +542,10 @@ class NBCOlympicsStreamIE(AdobePassIE): )['akamai'][0]['tokenizedUrl'] formats = self._extract_m3u8_formats(tokenized_url, pid, 'mp4') + for f in formats: + f['_seekable'] = False + f['_http_seekable'] = False + f['_icy'] = False self._sort_formats(formats) return { From b675a6e6b9c4e2a54058746d1084dd67c8096dee Mon Sep 17 00:00:00 2001 From: nikhil Date: Mon, 2 Aug 2021 23:22:57 -0400 Subject: [PATCH 4/5] Satisfy flake8, coding conventions, tests --- youtube_dl/extractor/nbc.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 46b44a9be..75e8932b0 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -478,11 +478,11 @@ class NBCOlympicsStreamIE(AdobePassIE): IE_NAME = 'nbcolympics:stream' _VALID_URL = r'https?://stream\.nbcolympics\.com/(?P[0-9a-z-]+)' _TEST = { - 'url': 'http://stream.nbcolympics.com/2018-winter-olympics-nbcsn-evening-feb-8', + 'url': 'https://stream.nbcolympics.com/womens-soccer-group-round-11', 'info_dict': { - 'id': '203493', + 'id': '2019740', 'ext': 'mp4', - 'title': 're:Curling, Alpine, Luge [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'title': r"re:Women's Group Stage - Netherlands vs\. Brazil [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$", }, 'params': { # m3u8 download @@ -516,7 +516,7 @@ class NBCOlympicsStreamIE(AdobePassIE): resource, re.sub(r'[^\w\d ]+', '', event_config['eventTitle']), pid, - event_config['ratingId'], + event_config.get('ratingId', 'NO VALUE'), ) media_token = self._extract_mvpd_auth( url, pid, event_config.get('requestorId', 'NBCOlympics'), ap_resource) @@ -529,8 +529,10 @@ class NBCOlympicsStreamIE(AdobePassIE): 'application': 'NBCSports', 'authentication-type': 'adobe-pass', 'cdn': 'akamai', - # Indicates that the player communicates its token not via the path but via a cookie? - #'inPath': 'false', + # Indicates that the player communicates its token not via the path + # but via a cookie? NBC's player specifies `'false'` but field just + # doesn't seem to have an effect. + # 'inPath': 'false', 'pid': pid, 'platform': 'desktop', 'requestorId': 'NBCOlympics', From ce604ae7dce406aaf8fde8f25ee40bd390255b58 Mon Sep 17 00:00:00 2001 From: nikhil Date: Tue, 3 Aug 2021 00:16:23 -0400 Subject: [PATCH 5/5] Support _non_-tokenized source URLs too --- youtube_dl/extractor/nbc.py | 81 ++++++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 32 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 75e8932b0..ace1e3859 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -477,18 +477,34 @@ class NBCOlympicsIE(InfoExtractor): class NBCOlympicsStreamIE(AdobePassIE): IE_NAME = 'nbcolympics:stream' _VALID_URL = r'https?://stream\.nbcolympics\.com/(?P[0-9a-z-]+)' - _TEST = { - 'url': 'https://stream.nbcolympics.com/womens-soccer-group-round-11', - 'info_dict': { - 'id': '2019740', - 'ext': 'mp4', - 'title': r"re:Women's Group Stage - Netherlands vs\. Brazil [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$", + _TESTS = [ + # "Tokenized" .m3u8 source URL + { + 'url': 'https://stream.nbcolympics.com/womens-soccer-group-round-11', + 'info_dict': { + 'id': '2019740', + 'ext': 'mp4', + 'title': r"re:Women's Group Stage - Netherlands vs\. Brazil [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$", + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, - 'params': { - # m3u8 download - 'skip_download': True, + # Plain .m3u8 source URL + { + 'url': 'https://stream.nbcolympics.com/gymnastics-event-finals-mens-floor-pommel-horse-womens-vault-bars', + 'info_dict': { + 'id': '2021729', + 'ext': 'mp4', + 'title': r're:Event Finals: M Floor, W Vault, M Pommel, W Uneven Bars [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, - } + ] _DATA_URL_TEMPLATE = 'http://stream.nbcolympics.com/data/%s_%s.json' _LEAP_URL_TEMPLATE = 'https://api-leap.nbcsports.com/feeds/assets/%s?application=NBCOlympics&platform=%s&format=nbc-player&env=staging' @@ -521,29 +537,30 @@ class NBCOlympicsStreamIE(AdobePassIE): media_token = self._extract_mvpd_auth( url, pid, event_config.get('requestorId', 'NBCOlympics'), ap_resource) - tokenized_url = self._download_json( - 'https://tokens.playmakerservices.com/', - pid, - 'Retrieving tokenized URL', - data=json.dumps({ - 'application': 'NBCSports', - 'authentication-type': 'adobe-pass', - 'cdn': 'akamai', - # Indicates that the player communicates its token not via the path - # but via a cookie? NBC's player specifies `'false'` but field just - # doesn't seem to have an effect. - # 'inPath': 'false', - 'pid': pid, - 'platform': 'desktop', - 'requestorId': 'NBCOlympics', - 'resourceId': base64.b64encode(ap_resource.encode()).decode(), - 'token': base64.b64encode(media_token.encode()).decode(), - 'url': source_url, - 'version': 'v1', - }).encode(), - )['akamai'][0]['tokenizedUrl'] + if event_config.get('cdnToken') is True: + source_url = self._download_json( + 'https://tokens.playmakerservices.com/', + pid, + 'Retrieving tokenized URL', + data=json.dumps({ + 'application': 'NBCSports', + 'authentication-type': 'adobe-pass', + 'cdn': 'akamai', + # Indicates that the player communicates its token not via the path + # but via a cookie? NBC's player specifies `'false'` but field just + # doesn't seem to have an effect. + # 'inPath': 'false', + 'pid': pid, + 'platform': 'desktop', + 'requestorId': 'NBCOlympics', + 'resourceId': base64.b64encode(ap_resource.encode()).decode(), + 'token': base64.b64encode(media_token.encode()).decode(), + 'url': source_url, + 'version': 'v1', + }).encode(), + )['akamai'][0]['tokenizedUrl'] - formats = self._extract_m3u8_formats(tokenized_url, pid, 'mp4') + formats = self._extract_m3u8_formats(source_url, pid, 'mp4') for f in formats: f['_seekable'] = False f['_http_seekable'] = False