mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-12-20 18:27:46 +01:00
5e9a033e6e
Every now and then, imgur.com goes crazy and gives us a generic title and description (otherwise it looks all fine though). Simply update the test case to allow for that craziness.
98 lines
3.5 KiB
Python
98 lines
3.5 KiB
Python
from __future__ import unicode_literals
|
|
|
|
import re
|
|
|
|
from .common import InfoExtractor
|
|
from ..utils import (
|
|
int_or_none,
|
|
js_to_json,
|
|
mimetype2ext,
|
|
ExtractorError,
|
|
)
|
|
|
|
|
|
class ImgurIE(InfoExtractor):
|
|
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.mp4|\.gifv)?'
|
|
|
|
_TESTS = [{
|
|
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
|
'info_dict': {
|
|
'id': 'A61SaA1',
|
|
'ext': 'mp4',
|
|
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
|
|
'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',
|
|
},
|
|
}, {
|
|
'url': 'https://imgur.com/A61SaA1',
|
|
'info_dict': {
|
|
'id': 'A61SaA1',
|
|
'ext': 'mp4',
|
|
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
|
|
'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',
|
|
},
|
|
}]
|
|
|
|
def _real_extract(self, url):
|
|
video_id = self._match_id(url)
|
|
webpage = self._download_webpage(url, video_id)
|
|
|
|
width = int_or_none(self._search_regex(
|
|
r'<param name="width" value="([0-9]+)"',
|
|
webpage, 'width', fatal=False))
|
|
height = int_or_none(self._search_regex(
|
|
r'<param name="height" value="([0-9]+)"',
|
|
webpage, 'height', fatal=False))
|
|
|
|
video_elements = self._search_regex(
|
|
r'(?s)<div class="video-elements">(.*?)</div>',
|
|
webpage, 'video elements', default=None)
|
|
if not video_elements:
|
|
raise ExtractorError(
|
|
'No sources found for video %s. Maybe an image?' % video_id,
|
|
expected=True)
|
|
|
|
formats = []
|
|
for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements):
|
|
formats.append({
|
|
'format_id': m.group('type').partition('/')[2],
|
|
'url': self._proto_relative_url(m.group('src')),
|
|
'ext': mimetype2ext(m.group('type')),
|
|
'acodec': 'none',
|
|
'width': width,
|
|
'height': height,
|
|
'http_headers': {
|
|
'User-Agent': 'youtube-dl (like wget)',
|
|
},
|
|
})
|
|
|
|
gif_json = self._search_regex(
|
|
r'(?s)var\s+videoItem\s*=\s*(\{.*?\})',
|
|
webpage, 'GIF code', fatal=False)
|
|
if gif_json:
|
|
gifd = self._parse_json(
|
|
gif_json, video_id, transform_source=js_to_json)
|
|
formats.append({
|
|
'format_id': 'gif',
|
|
'preference': -10,
|
|
'width': width,
|
|
'height': height,
|
|
'ext': 'gif',
|
|
'acodec': 'none',
|
|
'vcodec': 'gif',
|
|
'container': 'gif',
|
|
'url': self._proto_relative_url(gifd['gifUrl']),
|
|
'filesize': gifd.get('size'),
|
|
'http_headers': {
|
|
'User-Agent': 'youtube-dl (like wget)',
|
|
},
|
|
})
|
|
|
|
self._sort_formats(formats)
|
|
|
|
return {
|
|
'id': video_id,
|
|
'formats': formats,
|
|
'description': self._og_search_description(webpage),
|
|
'title': self._og_search_title(webpage),
|
|
}
|