[twitter] Don't check /cards/ URLs

Fixes #9181

In this tweet, there are two cards:
1. https://twitter.com/i/cards/tfw/v1/719944006306701313
   This shows #TeamCap vs. #TeamIronMan
2. https://twitter.com/i/videos/tweet/719944021058060289
   This is the real video and can be handled by TwitterCardIE

In all current test_Twitter* tests, /videos/tweet/ approach works fine.
This commit is contained in:
Yen Chi Hsuan 2016-04-16 18:57:50 +08:00
parent 8c65e4a527
commit 36b7d9dbfa
No known key found for this signature in database
GPG Key ID: 3FDDD575826C5C30

View File

@ -260,6 +260,17 @@ class TwitterIE(InfoExtractor):
'upload_date': '20140615', 'upload_date': '20140615',
}, },
'add_ie': ['Vine'], 'add_ie': ['Vine'],
}, {
'url': 'https://twitter.com/captainamerica/status/719944021058060289',
# md5 constantly changes
'info_dict': {
'id': '719944021058060289',
'ext': 'mp4',
'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
'description': 'Captain America on Twitter: "@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI"',
'uploader_id': 'captainamerica',
'uploader': 'Captain America',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -284,17 +295,6 @@ def _real_extract(self, url):
'title': username + ' - ' + title, 'title': username + ' - ' + title,
} }
card_id = self._search_regex(
r'["\']/i/cards/tfw/v1/(\d+)', webpage, 'twitter card url', default=None)
if card_id:
card_url = 'https://twitter.com/i/cards/tfw/v1/' + card_id
info.update({
'_type': 'url_transparent',
'ie_key': 'TwitterCard',
'url': card_url,
})
return info
mobj = re.search(r'''(?x) mobj = re.search(r'''(?x)
<video[^>]+class="animated-gif"(?P<more_info>[^>]+)>\s* <video[^>]+class="animated-gif"(?P<more_info>[^>]+)>\s*
<source[^>]+video-src="(?P<url>[^"]+)" <source[^>]+video-src="(?P<url>[^"]+)"