[brightcove:new] extract protocol-less embed URLs(closes #2914)

This commit is contained in:
remitamine 2016-03-16 11:46:53 +01:00
parent 23edc49509
commit 354dbbd880

View File

@ -413,8 +413,8 @@ def _extract_urls(webpage):
# Look for iframe embeds [1] # Look for iframe embeds [1]
for _, url in re.findall( for _, url in re.findall(
r'<iframe[^>]+src=(["\'])((?:https?:)//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage): r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
entries.append(url) entries.append(url if url.startswith('http') else 'http:' + url)
# Look for embed_in_page embeds [2] # Look for embed_in_page embeds [2]
for video_id, account_id, player_id, embed in re.findall( for video_id, account_id, player_id, embed in re.findall(