[slideshare] fix description extraction

This commit is contained in:
Remita Amine 2016-07-05 12:01:04 +01:00
parent 252a1f75d2
commit 77082c7b9e

View File

@ -9,6 +9,7 @@
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
get_element_by_id,
) )
@ -40,7 +41,7 @@ def _real_extract(self, url):
bucket = info['jsplayer']['video_bucket'] bucket = info['jsplayer']['video_bucket']
ext = info['jsplayer']['video_extension'] ext = info['jsplayer']['video_extension']
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext) video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
description = self._html_search_regex( description = get_element_by_id('slideshow-description-paragraph', webpage) or self._html_search_regex(
r'(?s)<p[^>]+itemprop="description"[^>]*>(.+?)</p>', webpage, r'(?s)<p[^>]+itemprop="description"[^>]*>(.+?)</p>', webpage,
'description', fatal=False) 'description', fatal=False)
@ -51,5 +52,5 @@ def _real_extract(self, url):
'ext': ext, 'ext': ext,
'url': video_url, 'url': video_url,
'thumbnail': info['slideshow']['pin_image_url'], 'thumbnail': info['slideshow']['pin_image_url'],
'description': description, 'description': description.strip() if description else None,
} }