[slideshare] fix description extraction

2024-11-04 12:07:12 +01:00 · 2016-07-05 12:01:04 +01:00 · 2016-07-05 12:01:04 +01:00 · 77082c7b9e
commit 77082c7b9e
parent 252a1f75d2
1 changed files with 3 additions and 2 deletions
--- a/youtube_dl/extractor/slideshare.py
+++ b/youtube_dl/extractor/slideshare.py
@ -9,6 +9,7 @@
 )
 from ..utils import (
    ExtractorError,
    get_element_by_id,
 )
@ -40,7 +41,7 @@ def _real_extract(self, url):
        bucket = info['jsplayer']['video_bucket']
        ext = info['jsplayer']['video_extension']
        video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
-        description = self._html_search_regex(
+        description = get_element_by_id('slideshow-description-paragraph', webpage) or self._html_search_regex(
            r'(?s)<p[^>]+itemprop="description"[^>]*>(.+?)</p>', webpage,
            'description', fatal=False)
@ -51,5 +52,5 @@ def _real_extract(self, url):
            'ext': ext,
            'url': video_url,
            'thumbnail': info['slideshow']['pin_image_url'],
-            'description': description,
+            'description': description.strip() if description else None,
        }