[common] Relax JWPlayer regex and remove duplicate urls(#12768)

This commit is contained in:
Remita Amine 2017-04-17 08:48:24 +01:00
parent 1c35b3da44
commit bf1b87cd91

View File

@ -2182,7 +2182,7 @@ def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native
def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json): def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
mobj = re.search( mobj = re.search(
r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)', r'(?s)jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\).*?\.setup\s*\((?P<options>[^)]+)\)',
webpage) webpage)
if mobj: if mobj:
try: try:
@ -2258,11 +2258,17 @@ def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None, def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
urls = []
formats = [] formats = []
for source in jwplayer_sources_data: for source in jwplayer_sources_data:
source_url = self._proto_relative_url(source['file']) source_url = self._proto_relative_url(source.get('file'))
if not source_url:
continue
if base_url: if base_url:
source_url = compat_urlparse.urljoin(base_url, source_url) source_url = compat_urlparse.urljoin(base_url, source_url)
if source_url in urls:
continue
urls.append(source_url)
source_type = source.get('type') or '' source_type = source.get('type') or ''
ext = mimetype2ext(source_type) or determine_ext(source_url) ext = mimetype2ext(source_type) or determine_ext(source_url)
if source_type == 'hls' or ext == 'm3u8': if source_type == 'hls' or ext == 'm3u8':