From d380fc161487ef2e14b204f22e13e16e1a6ceb64 Mon Sep 17 00:00:00 2001 From: HobbyistDev <105957301+HobbyistDev@users.noreply.github.com> Date: Fri, 5 Aug 2022 23:49:45 +0900 Subject: [PATCH] [extractor/kompas] Add extractor (#4562) Authored by: HobbyistDev --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/kompas.py | 68 +++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 yt_dlp/extractor/kompas.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index c3d947483..3abae19b0 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -765,6 +765,7 @@ from .kickstarter import KickStarterIE from .kinja import KinjaEmbedIE from .kinopoisk import KinoPoiskIE +from .kompas import KompasVideoIE from .konserthusetplay import KonserthusetPlayIE from .koo import KooIE from .kth import KTHIE diff --git a/yt_dlp/extractor/kompas.py b/yt_dlp/extractor/kompas.py new file mode 100644 index 000000000..d400c42f3 --- /dev/null +++ b/yt_dlp/extractor/kompas.py @@ -0,0 +1,68 @@ +from .common import InfoExtractor +from ..utils import ( + clean_html, + float_or_none, + traverse_obj, + try_call, +) + +# Video from www.kompas.tv and video.kompas.com seems use jixie player +# see [1] https://jixie.atlassian.net/servicedesk/customer/portal/2/article/1339654214?src=-1456335525, +# [2] https://scripts.jixie.media/jxvideo.3.1.min.js for more info + + +class KompasVideoIE(InfoExtractor): + _VALID_URL = r'https?://video\.kompas\.com/\w+/(?P\d+)/(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://video.kompas.com/watch/164474/kim-jong-un-siap-kirim-nuklir-lawan-as-dan-korsel', + 'info_dict': { + 'id': '164474', + 'ext': 'mp4', + 'title': 'Kim Jong Un Siap Kirim Nuklir Lawan AS dan Korsel', + 'description': 'md5:262530c4fb7462398235f9a5dba92456', + 'uploader_id': '9262bf2590d558736cac4fff7978fcb1', + 'display_id': 'kim-jong-un-siap-kirim-nuklir-lawan-as-dan-korsel', + 'duration': 85.066667, + 'categories': ['news'], + 'thumbnail': 'https://video.jixie.media/1001/164474/164474_1280x720.jpg', + 'tags': 'count:9', + } + }] + + def _real_extract(self, url): + video_id, display_id = self._match_valid_url(url).group('id', 'slug') + webpage = self._download_webpage(url, display_id) + + json_data = self._download_json( + 'https://apidam.jixie.io/api/public/stream', display_id, + query={'metadata': 'full', 'video_id': video_id})['data'] + + formats, subtitles = [], {} + for stream in json_data['streams']: + if stream.get('type') == 'HLS': + fmt, sub = self._extract_m3u8_formats_and_subtitles(stream.get('url'), display_id, ext='mp4') + formats.extend(fmt) + self._merge_subtitles(sub, target=subtitles) + else: + formats.append({ + 'url': stream.get('url'), + 'width': stream.get('width'), + 'height': stream.get('height'), + 'ext': 'mp4', + }) + + self._sort_formats(formats) + return { + 'id': video_id, + 'display_id': display_id, + 'formats': formats, + 'subtitles': subtitles, + 'title': json_data.get('title') or self._html_search_meta(['og:title', 'twitter:title'], webpage), + 'description': (clean_html(traverse_obj(json_data, ('metadata', 'description'))) + or self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage)), + 'thumbnails': traverse_obj(json_data, ('metadata', 'thumbnails')), + 'duration': float_or_none(traverse_obj(json_data, ('metadata', 'duration'))), + 'tags': try_call(lambda: json_data['metadata']['keywords'].split(',')), + 'categories': try_call(lambda: json_data['metadata']['categories'].split(',')), + 'uploader_id': json_data.get('owner_id'), + }