[thisoldhouse] Improve video id extraction (closes #24549)

This commit is contained in:
Sergey M․ 2020-04-11 20:07:12 +07:00
parent 533f3e3557
commit 2f19835726
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -19,20 +19,6 @@ class ThisOldHouseIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, {
'url': 'https://www.thisoldhouse.com/21083431/seaside-transformation-the-westerly-project',
'note': 'test for updated video URL',
'info_dict': {
'id': '5e2b70e95216cc0001615120',
'ext': 'mp4',
'title': 'E12 | The Westerly Project | Seaside Transformation',
'description': 'Kevin and Tommy take the tour with the homeowners and Jeff. Norm presents his pine coffee table. Jenn gives Tommy the garden tour. Everyone meets at the flagpole to raise the flags.',
'timestamp': 1579755600,
'upload_date': '20200123',
},
'params': {
'skip_download': True,
},
}, { }, {
'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins', 'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
'only_matching': True, 'only_matching': True,
@ -45,6 +31,10 @@ class ThisOldHouseIE(InfoExtractor):
}, { }, {
'url': 'https://www.thisoldhouse.com/21113884/s41-e13-paradise-lost', 'url': 'https://www.thisoldhouse.com/21113884/s41-e13-paradise-lost',
'only_matching': True, 'only_matching': True,
}, {
# iframe www.thisoldhouse.com
'url': 'https://www.thisoldhouse.com/21083431/seaside-transformation-the-westerly-project',
'only_matching': True,
}] }]
_ZYPE_TMPL = 'https://player.zype.com/embed/%s.html?api_key=hsOk_yMSPYNrT22e9pu8hihLXjaZf0JW5jsOWv4ZqyHJFvkJn6rtToHl09tbbsbe' _ZYPE_TMPL = 'https://player.zype.com/embed/%s.html?api_key=hsOk_yMSPYNrT22e9pu8hihLXjaZf0JW5jsOWv4ZqyHJFvkJn6rtToHl09tbbsbe'
@ -52,6 +42,6 @@ def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
video_id = self._search_regex( video_id = self._search_regex(
r'<iframe[^>]+src=[\'"](?:https?:)?//(?:www\.|)thisoldhouse(?:\.chorus\.build|\.com)/videos/zype/([0-9a-f]{24})', r'<iframe[^>]+src=[\'"](?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})',
webpage, 'video id') webpage, 'video id')
return self.url_result(self._ZYPE_TMPL % video_id, 'Zype', video_id) return self.url_result(self._ZYPE_TMPL % video_id, 'Zype', video_id)