[cbs] allow to pass content id to the extractor(closes #9589)

2024-11-27 21:46:54 +01:00 · 2016-05-23 09:30:26 +01:00 · 2016-05-23 09:30:26 +01:00 · 42a7439717
commit 42a7439717
parent b1e9ebd080
1 changed files with 9 additions and 6 deletions
--- a/youtube_dl/extractor/cbs.py
+++ b/youtube_dl/extractor/cbs.py
@ -1,5 +1,7 @@
 from __future__ import unicode_literals

+import re
+
 from .theplatform import ThePlatformIE
 from ..utils import (
    xpath_text,
@ -21,7 +23,7 @@ class CBSBaseIE(ThePlatformIE):


 class CBSIE(CBSBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+)'
+    _VALID_URL = r'(?:cbs:(?P<content_id>\w+)|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<display_id>[^/]+))'

    _TESTS = [{
        'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
@ -66,7 +68,8 @@ class CBSIE(CBSBaseIE):
    TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true'

    def _real_extract(self, url):
-        display_id = self._match_id(url)
+        content_id, display_id = re.match(self._VALID_URL, url).groups()
+        if not content_id:
            webpage = self._download_webpage(url, display_id)
            content_id = self._search_regex(
                [r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"],