[zdf] Add chapter extraction (#2198)

Authored by: iw0nderhow
This commit is contained in:
chris 2022-01-01 23:07:31 +01:00 committed by GitHub
parent 8896899216
commit f5c2c2c9b0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -136,18 +136,18 @@ class ZDFBaseIE(InfoExtractor):
class ZDFIE(ZDFBaseIE): class ZDFIE(ZDFBaseIE):
_VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html' _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html'
_TESTS = [{ _TESTS = [{
# Same as https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html 'url': 'https://www.zdf.de/nachrichten/heute-journal/heute-journal-vom-30-12-2021-100.html',
'url': 'https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html',
'md5': '0aff3e7bc72c8813f5e0fae333316a1d',
'info_dict': { 'info_dict': {
'id': '141007_ab18_10wochensommer_film', 'id': '211230_sendung_hjo',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Ab 18! - 10 Wochen Sommer', 'description': 'md5:47dff85977bde9fb8cba9e9c9b929839',
'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26', 'duration': 1890.0,
'duration': 2660, 'upload_date': '20211230',
'timestamp': 1608604200, 'chapters': list,
'upload_date': '20201222', 'thumbnail': 'md5:e65f459f741be5455c952cd820eb188e',
}, 'title': 'heute journal vom 30.12.2021',
'timestamp': 1640897100,
}
}, { }, {
'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html', 'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
'info_dict': { 'info_dict': {
@ -158,6 +158,7 @@ class ZDFIE(ZDFBaseIE):
'duration': 2615, 'duration': 2615,
'timestamp': 1465021200, 'timestamp': 1465021200,
'upload_date': '20160604', 'upload_date': '20160604',
'thumbnail': 'https://www.zdf.de/assets/mauve-im-labor-100~768x432?cb=1464909117806',
}, },
}, { }, {
'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html', 'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html',
@ -169,7 +170,8 @@ class ZDFIE(ZDFBaseIE):
'description': 'Die Neue an der Schule verdreht Ismail den Kopf.', 'description': 'Die Neue an der Schule verdreht Ismail den Kopf.',
'title': 'Alles ist verzaubert', 'title': 'Alles ist verzaubert',
'timestamp': 1635520560, 'timestamp': 1635520560,
'upload_date': '20211029' 'upload_date': '20211029',
'thumbnail': 'https://www.zdf.de/assets/teaser-funk-alles-ist-verzaubert-100~1920x1080?cb=1636466431799',
}, },
}, { }, {
# Same as https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche # Same as https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche
@ -196,6 +198,10 @@ class ZDFIE(ZDFBaseIE):
# Same as https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html # Same as https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html
'url': 'https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html', 'url': 'https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html',
'only_matching': True 'only_matching': True
}, {
# Same as https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html
'url': 'https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html',
'only_matching': True
}] }]
def _extract_entry(self, url, player, content, video_id): def _extract_entry(self, url, player, content, video_id):
@ -234,12 +240,21 @@ class ZDFIE(ZDFBaseIE):
}) })
thumbnails.append(thumbnail) thumbnails.append(thumbnail)
chapter_marks = t.get('streamAnchorTag') or []
chapter_marks.append({'anchorOffset': int_or_none(t.get('duration'))})
chapters = [{
'start_time': chap.get('anchorOffset'),
'end_time': next_chap.get('anchorOffset'),
'title': chap.get('anchorLabel')
} for chap, next_chap in zip(chapter_marks, chapter_marks[1:])]
return merge_dicts(info, { return merge_dicts(info, {
'title': title, 'title': title,
'description': content.get('leadParagraph') or content.get('teasertext'), 'description': content.get('leadParagraph') or content.get('teasertext'),
'duration': int_or_none(t.get('duration')), 'duration': int_or_none(t.get('duration')),
'timestamp': unified_timestamp(content.get('editorialDate')), 'timestamp': unified_timestamp(content.get('editorialDate')),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'chapters': chapters or None
}) })
def _extract_regular(self, url, player, video_id): def _extract_regular(self, url, player, video_id):