[slideshare] Fix extraction

This commit is contained in:
Sergey M․ 2015-01-01 00:26:19 +06:00
parent 68f705cac5
commit b7a7319c38

View File

@ -30,7 +30,7 @@ def _real_extract(self, url):
page_title = mobj.group('title') page_title = mobj.group('title')
webpage = self._download_webpage(url, page_title) webpage = self._download_webpage(url, page_title)
slideshare_obj = self._search_regex( slideshare_obj = self._search_regex(
r'var slideshare_object = ({.*?}); var user_info =', r'var\s+slideshare_object\s*=\s*({.*?});\s*var\s+user_info\s*=',
webpage, 'slideshare object') webpage, 'slideshare object')
info = json.loads(slideshare_obj) info = json.loads(slideshare_obj)
if info['slideshow']['type'] != 'video': if info['slideshow']['type'] != 'video':
@ -41,7 +41,7 @@ def _real_extract(self, url):
ext = info['jsplayer']['video_extension'] ext = info['jsplayer']['video_extension']
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext) video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
description = self._html_search_regex( description = self._html_search_regex(
r'<p\s+(?:style="[^"]*"\s+)?class=".*?description.*?"[^>]*>(.*?)</p>', webpage, r'(?s)<p[^>]+itemprop="description"[^>]*>(.+?)</p>', webpage,
'description', fatal=False) 'description', fatal=False)
return { return {