[brightcove:legacy] Improve embeds detection (closes #11523)

This commit is contained in:
Sergey M․ 2016-12-24 22:46:27 +07:00
parent 264e77c406
commit 53a664edf4
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
2 changed files with 29 additions and 8 deletions

View File

@ -232,13 +232,16 @@ def _extract_brightcove_urls(cls, webpage):
"""Return a list of all Brightcove URLs from the webpage """ """Return a list of all Brightcove URLs from the webpage """
url_m = re.search( url_m = re.search(
r'<meta\s+property=[\'"]og:video[\'"]\s+content=[\'"](https?://(?:secure|c)\.brightcove.com/[^\'"]+)[\'"]', r'''(?x)
webpage) <meta\s+
(?:property|itemprop)=([\'"])(?:og:video|embedURL)\1[^>]+
content=([\'"])(?P<url>https?://(?:secure|c)\.brightcove.com/(?:(?!\2).)+)\2
''', webpage)
if url_m: if url_m:
url = unescapeHTML(url_m.group(1)) url = unescapeHTML(url_m.group('url'))
# Some sites don't add it, we can't download with this url, for example: # Some sites don't add it, we can't download with this url, for example:
# http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/ # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
if 'playerKey' in url or 'videoId' in url: if 'playerKey' in url or 'videoId' in url or 'idVideo' in url:
return [url] return [url]
matches = re.findall( matches = re.findall(
@ -259,7 +262,7 @@ def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})
# Change the 'videoId' and others field to '@videoPlayer' # Change the 'videoId' and others field to '@videoPlayer'
url = re.sub(r'(?<=[?&])(videoI(d|D)|bctid)', '%40videoPlayer', url) url = re.sub(r'(?<=[?&])(videoI(d|D)|idVideo|bctid)', '%40videoPlayer', url)
# Change bckey (used by bcove.me urls) to playerKey # Change bckey (used by bcove.me urls) to playerKey
url = re.sub(r'(?<=[?&])bckey', 'playerKey', url) url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -344,10 +344,10 @@ class GenericIE(InfoExtractor):
}, },
'skip': 'There is a limit of 200 free downloads / month for the test song', 'skip': 'There is a limit of 200 free downloads / month for the test song',
}, },
# embedded brightcove video
# it also tests brightcove videos that need to set the 'Referer' in the
# http requests
{ {
# embedded brightcove video
# it also tests brightcove videos that need to set the 'Referer'
# in the http requests
'add_ie': ['BrightcoveLegacy'], 'add_ie': ['BrightcoveLegacy'],
'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/', 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
'info_dict': { 'info_dict': {
@ -361,6 +361,24 @@ class GenericIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}, },
{
# embedded with itemprop embedURL and video id spelled as `idVideo`
'add_id': ['BrightcoveLegacy'],
'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
'info_dict': {
'id': '5255628253001',
'ext': 'mp4',
'title': 'md5:37c519b1128915607601e75a87995fc0',
'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
'uploader': 'BFM BUSINESS',
'uploader_id': '876450612001',
'timestamp': 1482255315,
'upload_date': '20161220',
},
'params': {
'skip_download': True,
},
},
{ {
# https://github.com/rg3/youtube-dl/issues/2253 # https://github.com/rg3/youtube-dl/issues/2253
'url': 'http://bcove.me/i6nfkrc3', 'url': 'http://bcove.me/i6nfkrc3',