[limelight] Improve embeds extraction (closes #12761)

* Move extraction code to extractor
* Add extraction for LimelightEmbeddedPlayerFlash embeds
* Extract multiple video
This commit is contained in:
Sergey M․ 2017-04-17 00:23:16 +07:00
parent 751c89a27d
commit e5d39886ec
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
2 changed files with 43 additions and 0 deletions

View File

@ -85,6 +85,7 @@
from .openload import OpenloadIE from .openload import OpenloadIE
from .videopress import VideoPressIE from .videopress import VideoPressIE
from .rutube import RutubeIE from .rutube import RutubeIE
from .limelight import LimelightBaseIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -2483,6 +2484,11 @@ def _real_extract(self, url):
return self.url_result(piksel_url, PikselIE.ie_key()) return self.url_result(piksel_url, PikselIE.ie_key())
# Look for Limelight embeds # Look for Limelight embeds
limelight_urls = LimelightBaseIE._extract_urls(webpage, url)
if limelight_urls:
return self.playlist_result(
limelight_urls, video_id, video_title, video_description)
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage) mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
if mobj: if mobj:
lm = { lm = {

View File

@ -9,6 +9,7 @@
determine_ext, determine_ext,
float_or_none, float_or_none,
int_or_none, int_or_none,
smuggle_url,
unsmuggle_url, unsmuggle_url,
ExtractorError, ExtractorError,
) )
@ -18,6 +19,42 @@ class LimelightBaseIE(InfoExtractor):
_PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s' _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
_API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json' _API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
@classmethod
def _extract_urls(cls, webpage, source_url):
lm = {
'Media': 'media',
'Channel': 'channel',
'ChannelList': 'channel_list',
}
entries = []
for kind, video_id in re.findall(
r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})',
webpage):
print('video_id', video_id)
entries.append(cls.url_result(
smuggle_url(
'limelight:%s:%s' % (lm[kind], video_id),
{'source_url': source_url}),
'Limelight%s' % kind, video_id))
for mobj in re.finditer(
# As per [1] class attribute should be exactly equal to
# LimelightEmbeddedPlayerFlash but numerous examples seen
# that don't exactly match it (e.g. [2]).
# 1. http://support.3playmedia.com/hc/en-us/articles/227732408-Limelight-Embedding-the-Captions-Plugin-with-the-Limelight-Player-on-Your-Webpage
# 2. http://www.sedona.com/FacilitatorTraining2017
r'''(?sx)
<object[^>]+class=(["\'])(?:(?!\1).)*\bLimelightEmbeddedPlayerFlash\b(?:(?!\1).)*\1[^>]*>.*?
<param[^>]+
name=(["\'])flashVars\2[^>]+
value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
''', webpage):
entries.append(cls.url_result(
smuggle_url(
'limelight:media:%s' % mobj.group('id'),
{'source_url': source_url}),
'LimelightMedia', mobj.group('id')))
return entries
def _call_playlist_service(self, item_id, method, fatal=True, referer=None): def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
headers = {} headers = {}
if referer: if referer: