From ee1e05581e32114c52e75e90983a66fb25fbc730 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 2 Mar 2021 16:24:47 +0530 Subject: [PATCH] [mtv] Fix extractor by reverting changes made in youtube-dlc youtube-dl has since fixed the extractor and the changes from the two sources are incompatible --- yt_dlp/extractor/mtv.py | 68 +++++------------------------------------ 1 file changed, 7 insertions(+), 61 deletions(-) diff --git a/yt_dlp/extractor/mtv.py b/yt_dlp/extractor/mtv.py index 68e81ad47..f5e30d22d 100644 --- a/yt_dlp/extractor/mtv.py +++ b/yt_dlp/extractor/mtv.py @@ -7,7 +7,6 @@ from ..compat import ( compat_str, compat_xpath, - compat_urlparse, ) from ..utils import ( ExtractorError, @@ -23,7 +22,6 @@ unescapeHTML, update_url_query, url_basename, - get_domain, xpath_text, ) @@ -45,7 +43,7 @@ def _remove_template_parameter(url): # Remove the templates, like &device={device} return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url) - def _get_feed_url(self, uri, url=None): + def _get_feed_url(self, uri): return self._FEED_URL def _get_thumbnail_url(self, uri, itemdoc): @@ -211,9 +209,9 @@ def _get_feed_query(self, uri): data['lang'] = self._LANG return data - def _get_videos_info(self, uri, use_hls=True, url=None): + def _get_videos_info(self, uri, use_hls=True): video_id = self._id_from_uri(uri) - feed_url = self._get_feed_url(uri, url) + feed_url = self._get_feed_url(uri) info_url = update_url_query(feed_url, self._get_feed_query(uri)) return self._get_videos_info_from_url(info_url, video_id, use_hls) @@ -259,41 +257,7 @@ def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None): def _extract_child_with_type(parent, t): return next(c for c in parent['children'] if c.get('type') == t) - def _extract_new_triforce_mgid(self, webpage, url='', video_id=None): - if url == '': - return - domain = get_domain(url) - if domain is None: - raise ExtractorError( - '[%s] could not get domain' % self.IE_NAME, - expected=True) - url = url.replace("https://", "http://") - enc_url = compat_urlparse.quote(url, safe='') - _TRIFORCE_V8_TEMPLATE = 'https://%s/feeds/triforce/manifest/v8?url=%s' - triforce_manifest_url = _TRIFORCE_V8_TEMPLATE % (domain, enc_url) - - manifest = self._download_json(triforce_manifest_url, video_id, fatal=False) - if manifest: - if manifest.get('manifest').get('type') == 'redirect': - self.to_screen('Found a redirect. Downloading manifest from new location') - new_loc = manifest.get('manifest').get('newLocation') - new_loc = new_loc.replace("https://", "http://") - enc_new_loc = compat_urlparse.quote(new_loc, safe='') - triforce_manifest_new_loc = _TRIFORCE_V8_TEMPLATE % (domain, enc_new_loc) - manifest = self._download_json(triforce_manifest_new_loc, video_id, fatal=False) - - item_id = try_get(manifest, lambda x: x['manifest']['reporting']['itemId'], compat_str) - if not item_id: - self.to_screen('No id found!') - return - - # 'episode' can be anything. 'content' is used often as well - _MGID_TEMPLATE = 'mgid:arc:episode:%s:%s' - mgid = _MGID_TEMPLATE % (domain, item_id) - - return mgid - - def _extract_mgid(self, webpage, url, title=None, data_zone=None): + def _extract_mgid(self, webpage): try: # the url can be http://media.mtvnservices.com/fb/{mgid}.swf # or http://media.mtvnservices.com/{mgid} @@ -304,21 +268,6 @@ def _extract_mgid(self, webpage, url, title=None, data_zone=None): except RegexNotFoundError: mgid = None - if not title: - title = url_basename(url) - - try: - window_data = self._parse_json(self._search_regex( - r'(?s)window.__DATA__ = (?P{.+});', webpage, - 'JSON Window Data', default=None, fatal=False, group='json'), title, fatal=False) - main_container = None - for i in range(len(window_data['children'])): - if window_data['children'][i]['type'] == 'MainContainer': - main_container = window_data['children'][i] - mgid = main_container['children'][0]['props']['media']['video']['config']['uri'] - except (KeyError, IndexError, TypeError): - pass - if mgid is None or ':' not in mgid: mgid = self._search_regex( [r'data-mgid="(.*?)"', r'swfobject\.embedSWF\(".*?(mgid:.*?)"'], @@ -331,10 +280,7 @@ def _extract_mgid(self, webpage, url, title=None, data_zone=None): r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=None) if not mgid: - mgid = self._extract_new_triforce_mgid(webpage, url) - - if not mgid: - mgid = self._extract_triforce_mgid(webpage, data_zone) + mgid = self._extract_triforce_mgid(webpage) if not mgid: data = self._parse_json(self._search_regex( @@ -348,8 +294,8 @@ def _extract_mgid(self, webpage, url, title=None, data_zone=None): def _real_extract(self, url): title = url_basename(url) webpage = self._download_webpage(url, title) - mgid = self._extract_mgid(webpage, url, title=title) - videos_info = self._get_videos_info(mgid, url=url) + mgid = self._extract_mgid(webpage) + videos_info = self._get_videos_info(mgid) return videos_info