From 5a63454b3637b3603434026cddfeac509218b90e Mon Sep 17 00:00:00 2001 From: Martin Renold Date: Sun, 21 Jan 2024 03:45:38 +0100 Subject: [PATCH] [ie/mx3] Add extractors (#8736) Authored by: martinxyz --- yt_dlp/extractor/_extractors.py | 5 + yt_dlp/extractor/mx3.py | 171 ++++++++++++++++++++++++++++++++ 2 files changed, 176 insertions(+) create mode 100644 yt_dlp/extractor/mx3.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 31bef1eb5..c4f1ccb8e 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1137,6 +1137,11 @@ MusicdexArtistIE, MusicdexPlaylistIE, ) +from .mx3 import ( + Mx3IE, + Mx3NeoIE, + Mx3VolksmusikIE, +) from .mxplayer import ( MxplayerIE, MxplayerShowIE, diff --git a/yt_dlp/extractor/mx3.py b/yt_dlp/extractor/mx3.py new file mode 100644 index 000000000..cb9f50e0c --- /dev/null +++ b/yt_dlp/extractor/mx3.py @@ -0,0 +1,171 @@ +import re + +from .common import InfoExtractor +from ..networking import HEADRequest +from ..utils import ( + get_element_by_class, + int_or_none, + try_call, + url_or_none, + urlhandle_detect_ext, +) +from ..utils.traversal import traverse_obj + + +class Mx3BaseIE(InfoExtractor): + _VALID_URL_TMPL = r'https?://(?:www\.)?%s/t/(?P\w+)' + _FORMATS = [{ + 'url': 'player_asset', + 'format_id': 'default', + 'quality': 0, + }, { + 'url': 'player_asset?quality=hd', + 'format_id': 'hd', + 'quality': 1, + }, { + 'url': 'download', + 'format_id': 'download', + 'quality': 2, + }, { + 'url': 'player_asset?quality=source', + 'format_id': 'source', + 'quality': 2, + }] + + def _extract_formats(self, track_id): + formats = [] + for fmt in self._FORMATS: + format_url = f'https://{self._DOMAIN}/tracks/{track_id}/{fmt["url"]}' + urlh = self._request_webpage( + HEADRequest(format_url), track_id, fatal=False, expected_status=404, + note=f'Checking for format {fmt["format_id"]}') + if urlh and urlh.status == 200: + formats.append({ + **fmt, + 'url': format_url, + 'ext': urlhandle_detect_ext(urlh), + 'filesize': int_or_none(urlh.headers.get('Content-Length')), + }) + return formats + + def _real_extract(self, url): + track_id = self._match_id(url) + webpage = self._download_webpage(url, track_id) + more_info = get_element_by_class('single-more-info', webpage) + data = self._download_json(f'https://{self._DOMAIN}/t/{track_id}.json', track_id, fatal=False) + + def get_info_field(name): + return self._html_search_regex( + rf']*>\s*{name}\s*\s*]*>(.*?)', + more_info, name, default=None, flags=re.DOTALL) + + return { + 'id': track_id, + 'formats': self._extract_formats(track_id), + 'genre': self._html_search_regex( + r']+class="single-band-genre"[^>]*>([^<]+)', webpage, 'genre', default=None), + 'release_year': int_or_none(get_info_field('Year of creation')), + 'description': get_info_field('Description'), + 'tags': try_call(lambda: get_info_field('Tag').split(', '), list), + **traverse_obj(data, { + 'title': ('title', {str}), + 'artist': (('performer_name', 'artist'), {str}), + 'album_artist': ('artist', {str}), + 'composer': ('composer_name', {str}), + 'thumbnail': (('picture_url_xlarge', 'picture_url'), {url_or_none}), + }, get_all=False), + } + + +class Mx3IE(Mx3BaseIE): + _DOMAIN = 'mx3.ch' + _VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN) + _TESTS = [{ + 'url': 'https://mx3.ch/t/1Cru', + 'md5': '7ba09e9826b4447d4e1ce9d69e0e295f', + 'info_dict': { + 'id': '1Cru', + 'ext': 'wav', + 'artist': 'Godina', + 'album_artist': 'Tortue Tortue', + 'composer': 'Olivier Godinat', + 'genre': 'Rock', + 'thumbnail': 'https://mx3.ch/pictures/mx3/file/0101/4643/square_xlarge/1-s-envoler-1.jpg?1630272813', + 'title': "S'envoler", + 'release_year': 2021, + 'tags': [], + } + }, { + 'url': 'https://mx3.ch/t/1LIY', + 'md5': '48293cb908342547827f963a5a2e9118', + 'info_dict': { + 'id': '1LIY', + 'ext': 'mov', + 'artist': 'Tania Kimfumu', + 'album_artist': 'The Broots', + 'composer': 'Emmanuel Diserens', + 'genre': 'Electro', + 'thumbnail': 'https://mx3.ch/pictures/mx3/file/0110/0003/video_xlarge/frame_0000.png?1686963670', + 'title': 'The Broots-Larytta remix "Begging For Help"', + 'release_year': 2023, + 'tags': ['the broots', 'cassata records', 'larytta'], + 'description': '"Begging for Help" Larytta Remix Official Video\nRealized By Kali Donkilie in 2023', + } + }, { + 'url': 'https://mx3.ch/t/1C6E', + 'md5': '1afcd578493ddb8e5008e94bb6d97e25', + 'info_dict': { + 'id': '1C6E', + 'ext': 'wav', + 'artist': 'Alien Bubblegum', + 'album_artist': 'Alien Bubblegum', + 'composer': 'Alien Bubblegum', + 'genre': 'Punk', + 'thumbnail': 'https://mx3.ch/pictures/mx3/file/0101/1551/square_xlarge/pandora-s-box-cover-with-title.png?1627054733', + 'title': 'Wide Awake', + 'release_year': 2021, + 'tags': ['alien bubblegum', 'bubblegum', 'alien', 'pop punk', 'poppunk'], + } + }] + + +class Mx3NeoIE(Mx3BaseIE): + _DOMAIN = 'neo.mx3.ch' + _VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN) + _TESTS = [{ + 'url': 'https://neo.mx3.ch/t/1hpd', + 'md5': '6d9986bbae5cac3296ec8813bf965eb2', + 'info_dict': { + 'id': '1hpd', + 'ext': 'wav', + 'artist': 'Baptiste Lopez', + 'album_artist': 'Kammerorchester Basel', + 'composer': 'Jannik Giger', + 'genre': 'Composition, Orchestra', + 'title': 'Troisième œil. Für Kammerorchester (2023)', + 'thumbnail': 'https://neo.mx3.ch/pictures/neo/file/0000/0241/square_xlarge/kammerorchester-basel-group-photo-2_c_-lukasz-rajchert.jpg?1560341252', + 'release_year': 2023, + 'tags': [], + } + }] + + +class Mx3VolksmusikIE(Mx3BaseIE): + _DOMAIN = 'volksmusik.mx3.ch' + _VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN) + _TESTS = [{ + 'url': 'https://volksmusik.mx3.ch/t/Zx', + 'md5': 'dd967a7b0c1ef898f3e072cf9c2eae3c', + 'info_dict': { + 'id': 'Zx', + 'ext': 'mp3', + 'artist': 'Ländlerkapelle GrischArt', + 'album_artist': 'Ländlerkapelle GrischArt', + 'composer': 'Urs Glauser', + 'genre': 'Instrumental, Graubünden', + 'title': 'Chämilouf', + 'thumbnail': 'https://volksmusik.mx3.ch/pictures/vxm/file/0000/3815/square_xlarge/grischart1.jpg?1450530120', + 'release_year': 2012, + 'tags': [], + } + }]