From f5c2c2c9b0fa68978777c8b3362c5091fd07bc20 Mon Sep 17 00:00:00 2001 From: chris <6024426+iw0nderhow@users.noreply.github.com> Date: Sat, 1 Jan 2022 23:07:31 +0100 Subject: [PATCH] [zdf] Add chapter extraction (#2198) Authored by: iw0nderhow --- yt_dlp/extractor/zdf.py | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/yt_dlp/extractor/zdf.py b/yt_dlp/extractor/zdf.py index 6f7f801e1..0aa5184f7 100644 --- a/yt_dlp/extractor/zdf.py +++ b/yt_dlp/extractor/zdf.py @@ -136,18 +136,18 @@ def _extract_player(self, webpage, video_id, fatal=True): class ZDFIE(ZDFBaseIE): _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P[^/?#&]+)\.html' _TESTS = [{ - # Same as https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html - 'url': 'https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html', - 'md5': '0aff3e7bc72c8813f5e0fae333316a1d', + 'url': 'https://www.zdf.de/nachrichten/heute-journal/heute-journal-vom-30-12-2021-100.html', 'info_dict': { - 'id': '141007_ab18_10wochensommer_film', + 'id': '211230_sendung_hjo', 'ext': 'mp4', - 'title': 'Ab 18! - 10 Wochen Sommer', - 'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26', - 'duration': 2660, - 'timestamp': 1608604200, - 'upload_date': '20201222', - }, + 'description': 'md5:47dff85977bde9fb8cba9e9c9b929839', + 'duration': 1890.0, + 'upload_date': '20211230', + 'chapters': list, + 'thumbnail': 'md5:e65f459f741be5455c952cd820eb188e', + 'title': 'heute journal vom 30.12.2021', + 'timestamp': 1640897100, + } }, { 'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html', 'info_dict': { @@ -158,6 +158,7 @@ class ZDFIE(ZDFBaseIE): 'duration': 2615, 'timestamp': 1465021200, 'upload_date': '20160604', + 'thumbnail': 'https://www.zdf.de/assets/mauve-im-labor-100~768x432?cb=1464909117806', }, }, { 'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html', @@ -169,7 +170,8 @@ class ZDFIE(ZDFBaseIE): 'description': 'Die Neue an der Schule verdreht Ismail den Kopf.', 'title': 'Alles ist verzaubert', 'timestamp': 1635520560, - 'upload_date': '20211029' + 'upload_date': '20211029', + 'thumbnail': 'https://www.zdf.de/assets/teaser-funk-alles-ist-verzaubert-100~1920x1080?cb=1636466431799', }, }, { # Same as https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche @@ -196,6 +198,10 @@ class ZDFIE(ZDFBaseIE): # Same as https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html 'url': 'https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html', 'only_matching': True + }, { + # Same as https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html + 'url': 'https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html', + 'only_matching': True }] def _extract_entry(self, url, player, content, video_id): @@ -234,12 +240,21 @@ def _extract_entry(self, url, player, content, video_id): }) thumbnails.append(thumbnail) + chapter_marks = t.get('streamAnchorTag') or [] + chapter_marks.append({'anchorOffset': int_or_none(t.get('duration'))}) + chapters = [{ + 'start_time': chap.get('anchorOffset'), + 'end_time': next_chap.get('anchorOffset'), + 'title': chap.get('anchorLabel') + } for chap, next_chap in zip(chapter_marks, chapter_marks[1:])] + return merge_dicts(info, { 'title': title, 'description': content.get('leadParagraph') or content.get('teasertext'), 'duration': int_or_none(t.get('duration')), 'timestamp': unified_timestamp(content.get('editorialDate')), 'thumbnails': thumbnails, + 'chapters': chapters or None }) def _extract_regular(self, url, player, video_id):