[tagesschau] Restrict playlist entry regex

This commit is contained in:
Sergey M․ 2016-05-01 07:15:23 +06:00
parent 854cc54bc1
commit 68bb2fef95
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -200,6 +200,10 @@ class TagesschauIE(InfoExtractor):
}, { }, {
'url': 'http://www.tagesschau.de/100sekunden/index.html', 'url': 'http://www.tagesschau.de/100sekunden/index.html',
'only_matching': True, 'only_matching': True,
}, {
# playlist article with collapsing sections
'url': 'http://www.tagesschau.de/wirtschaft/faq-freihandelszone-eu-usa-101.html',
'only_matching': True,
}] }]
@classmethod @classmethod
@ -275,7 +279,7 @@ def _real_extract(self, url):
if webpage_type == 'website': # Article if webpage_type == 'website': # Article
entries = [] entries = []
for num, (entry_title, media_kind, download_text) in enumerate(re.findall( for num, (entry_title, media_kind, download_text) in enumerate(re.findall(
r'(?s)<p[^>]+class="infotext"[^>]*>.*?<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX, r'(?s)<p[^>]+class="infotext"[^>]*>\s*(?:<a[^>]+>)?\s*<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX,
webpage), 1): webpage), 1):
entries.append({ entries.append({
'id': '%s-%d' % (display_id, num), 'id': '%s-%d' % (display_id, num),