[mtv] Fix extractor by reverting changes made in youtube-dlc

youtube-dl has since fixed the extractor and the changes from the two sources are incompatible
2024-07-02 18:36:27 +02:00 · 2021-03-02 16:24:47 +05:30 · 2021-03-02 16:24:47 +05:30 · ee1e05581e
commit ee1e05581e
parent ec5e77c558
1 changed files with 7 additions and 61 deletions
--- a/yt_dlp/extractor/mtv.py
+++ b/yt_dlp/extractor/mtv.py
@ -7,7 +7,6 @@
 from ..compat import (
    compat_str,
    compat_xpath,
-    compat_urlparse,
 )
 from ..utils import (
    ExtractorError,
@ -23,7 +22,6 @@
    unescapeHTML,
    update_url_query,
    url_basename,
-    get_domain,
    xpath_text,
 )

@ -45,7 +43,7 @@ def _remove_template_parameter(url):
        # Remove the templates, like &device={device}
        return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url)

-    def _get_feed_url(self, uri, url=None):
+    def _get_feed_url(self, uri):
        return self._FEED_URL

    def _get_thumbnail_url(self, uri, itemdoc):
@ -211,9 +209,9 @@ def _get_feed_query(self, uri):
            data['lang'] = self._LANG
        return data

-    def _get_videos_info(self, uri, use_hls=True, url=None):
+    def _get_videos_info(self, uri, use_hls=True):
        video_id = self._id_from_uri(uri)
-        feed_url = self._get_feed_url(uri, url)
+        feed_url = self._get_feed_url(uri)
        info_url = update_url_query(feed_url, self._get_feed_query(uri))
        return self._get_videos_info_from_url(info_url, video_id, use_hls)

@ -259,41 +257,7 @@ def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None):
    def _extract_child_with_type(parent, t):
        return next(c for c in parent['children'] if c.get('type') == t)

-    def _extract_new_triforce_mgid(self, webpage, url='', video_id=None):
-        if url == '':
-            return
-        domain = get_domain(url)
-        if domain is None:
-            raise ExtractorError(
-                '[%s] could not get domain' % self.IE_NAME,
-                expected=True)
-        url = url.replace("https://", "http://")
-        enc_url = compat_urlparse.quote(url, safe='')
-        _TRIFORCE_V8_TEMPLATE = 'https://%s/feeds/triforce/manifest/v8?url=%s'
-        triforce_manifest_url = _TRIFORCE_V8_TEMPLATE % (domain, enc_url)
-
-        manifest = self._download_json(triforce_manifest_url, video_id, fatal=False)
-        if manifest:
-            if manifest.get('manifest').get('type') == 'redirect':
-                self.to_screen('Found a redirect. Downloading manifest from new location')
-                new_loc = manifest.get('manifest').get('newLocation')
-                new_loc = new_loc.replace("https://", "http://")
-                enc_new_loc = compat_urlparse.quote(new_loc, safe='')
-                triforce_manifest_new_loc = _TRIFORCE_V8_TEMPLATE % (domain, enc_new_loc)
-                manifest = self._download_json(triforce_manifest_new_loc, video_id, fatal=False)
-
-        item_id = try_get(manifest, lambda x: x['manifest']['reporting']['itemId'], compat_str)
-        if not item_id:
-            self.to_screen('No id found!')
-            return
-
-        # 'episode' can be anything. 'content' is used often as well
-        _MGID_TEMPLATE = 'mgid:arc:episode:%s:%s'
-        mgid = _MGID_TEMPLATE % (domain, item_id)
-
-        return mgid
-
-    def _extract_mgid(self, webpage, url, title=None, data_zone=None):
+    def _extract_mgid(self, webpage):
        try:
            # the url can be http://media.mtvnservices.com/fb/{mgid}.swf
            # or http://media.mtvnservices.com/{mgid}
@ -304,21 +268,6 @@ def _extract_mgid(self, webpage, url, title=None, data_zone=None):
        except RegexNotFoundError:
            mgid = None

-        if not title:
-            title = url_basename(url)
-
-        try:
-            window_data = self._parse_json(self._search_regex(
-                r'(?s)window.__DATA__ = (?P<json>{.+});', webpage,
-                'JSON Window Data', default=None, fatal=False, group='json'), title, fatal=False)
-            main_container = None
-            for i in range(len(window_data['children'])):
-                if window_data['children'][i]['type'] == 'MainContainer':
-                    main_container = window_data['children'][i]
-            mgid = main_container['children'][0]['props']['media']['video']['config']['uri']
-        except (KeyError, IndexError, TypeError):
-            pass
-
        if mgid is None or ':' not in mgid:
            mgid = self._search_regex(
                [r'data-mgid="(.*?)"', r'swfobject\.embedSWF\(".*?(mgid:.*?)"'],
@ -331,10 +280,7 @@ def _extract_mgid(self, webpage, url, title=None, data_zone=None):
                r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=None)

        if not mgid:
-            mgid = self._extract_new_triforce_mgid(webpage, url)
-
-        if not mgid:
-            mgid = self._extract_triforce_mgid(webpage, data_zone)
+            mgid = self._extract_triforce_mgid(webpage)

        if not mgid:
            data = self._parse_json(self._search_regex(
@ -348,8 +294,8 @@ def _extract_mgid(self, webpage, url, title=None, data_zone=None):
    def _real_extract(self, url):
        title = url_basename(url)
        webpage = self._download_webpage(url, title)
-        mgid = self._extract_mgid(webpage, url, title=title)
-        videos_info = self._get_videos_info(mgid, url=url)
+        mgid = self._extract_mgid(webpage)
+        videos_info = self._get_videos_info(mgid)
        return videos_info