diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index 3eafd08e57..d116cd7c67 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -18,7 +18,7 @@ body: options: - label: I'm reporting a broken site required: true - - label: I've verified that I'm running yt-dlp version **2022.11.11** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.01.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true @@ -62,7 +62,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2022.11.11 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.01.06 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -70,8 +70,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2022.11.11, Current version: 2022.11.11 - yt-dlp is up to date (2022.11.11) + Latest version: 2023.01.06, Current version: 2023.01.06 + yt-dlp is up to date (2023.01.06) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index 295a0f254b..2bbf93a939 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -18,7 +18,7 @@ body: options: - label: I'm reporting a new site support request required: true - - label: I've verified that I'm running yt-dlp version **2022.11.11** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.01.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true @@ -74,7 +74,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2022.11.11 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.01.06 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -82,8 +82,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2022.11.11, Current version: 2022.11.11 - yt-dlp is up to date (2022.11.11) + Latest version: 2023.01.06, Current version: 2023.01.06 + yt-dlp is up to date (2023.01.06) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index 6c4e970808..d1d3514f22 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -18,7 +18,7 @@ body: options: - label: I'm requesting a site-specific feature required: true - - label: I've verified that I'm running yt-dlp version **2022.11.11** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.01.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true @@ -70,7 +70,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2022.11.11 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.01.06 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -78,8 +78,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2022.11.11, Current version: 2022.11.11 - yt-dlp is up to date (2022.11.11) + Latest version: 2023.01.06, Current version: 2023.01.06 + yt-dlp is up to date (2023.01.06) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index b224f3d326..8c851a945b 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -18,7 +18,7 @@ body: options: - label: I'm reporting a bug unrelated to a specific site required: true - - label: I've verified that I'm running yt-dlp version **2022.11.11** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.01.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true @@ -55,7 +55,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2022.11.11 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.01.06 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -63,8 +63,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2022.11.11, Current version: 2022.11.11 - yt-dlp is up to date (2022.11.11) + Latest version: 2023.01.06, Current version: 2023.01.06 + yt-dlp is up to date (2023.01.06) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml index d58dc2e940..444df3c321 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -20,7 +20,7 @@ body: required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - - label: I've verified that I'm running yt-dlp version **2022.11.11** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.01.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true @@ -51,7 +51,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2022.11.11 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.01.06 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -59,7 +59,7 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2022.11.11, Current version: 2022.11.11 - yt-dlp is up to date (2022.11.11) + Latest version: 2023.01.06, Current version: 2023.01.06 + yt-dlp is up to date (2023.01.06) render: shell diff --git a/.github/ISSUE_TEMPLATE/6_question.yml b/.github/ISSUE_TEMPLATE/6_question.yml index 213bf91566..997278f21f 100644 --- a/.github/ISSUE_TEMPLATE/6_question.yml +++ b/.github/ISSUE_TEMPLATE/6_question.yml @@ -26,7 +26,7 @@ body: required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - - label: I've verified that I'm running yt-dlp version **2022.11.11** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.01.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates required: true @@ -57,7 +57,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2022.11.11 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.01.06 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -65,7 +65,7 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2022.11.11, Current version: 2022.11.11 - yt-dlp is up to date (2022.11.11) + Latest version: 2023.01.06, Current version: 2023.01.06 + yt-dlp is up to date (2023.01.06) render: shell diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 5abc6ce41e..7c271565ff 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -2,8 +2,6 @@ ### Description of your *pull request* and other information - - +### 2023.01.06 + +* Fix config locations by [Grub4k](https://github.com/Grub4k), [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [downloader/aria2c] Disable native progress +* [utils] `mimetype2ext`: `weba` is not standard +* [utils] `windows_enable_vt_mode`: Better error handling +* [build] Add minimal `pyproject.toml` +* [update] Fix updater file removal on windows by [Grub4K](https://github.com/Grub4K) +* [cleanup] Misc fixes and cleanup +* [extractor/aitube] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/drtv] Add series extractors by [FrederikNS](https://github.com/FrederikNS) +* [extractor/volejtv] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/xanimu] Add extractor by [JChris246](https://github.com/JChris246) +* [extractor/youtube] Retry manifest refresh for live-from-start by [mzhou](https://github.com/mzhou) +* [extractor/biliintl] Add `/media` to `VALID_URL` by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/biliIntl] Add fallback to `video_data` by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/crunchyroll:show] Add `language` to entries by [Chrissi2812](https://github.com/Chrissi2812) +* [extractor/joj] Fix extractor by [OndrejBakan](https://github.com/OndrejBakan), [pukkandan](https://github.com/pukkandan) +* [extractor/nbc] Update graphql query by [jacobtruman](https://github.com/jacobtruman) +* [extractor/reddit] Add subreddit as `channel_id` by [gschizas](https://github.com/gschizas) +* [extractor/tiktok] Add `TikTokLive` extractor by [JC-Chung](https://github.com/JC-Chung) + +### 2023.01.02 + +* **Improve plugin architecture** by [Grub4K](https://github.com/Grub4K), [coletdjnz](https://github.com/coletdjnz), [flashdagger](https://github.com/flashdagger), [pukkandan](https://github.com/pukkandan) + * Plugins can be loaded in any distribution of yt-dlp (binary, pip, source, etc.) and can be distributed and installed as packages. See [the readme](https://github.com/yt-dlp/yt-dlp/tree/05997b6e98e638d97d409c65bb5eb86da68f3b64#plugins) for more information +* Add `--compat-options 2021,2022` + * This allows devs to change defaults and make other potentially breaking changes more easily. If you need everything to work exactly as-is, put Use `--compat 2022` in your config to guard against future compat changes. +* [downloader/aria2c] Native progress for aria2c via RPC by [Lesmiscore](https://github.com/Lesmiscore), [pukkandan](https://github.com/pukkandan) +* Merge youtube-dl: Upto [commit/195f22f](https://github.com/ytdl-org/youtube-dl/commit/195f22f6) by [Grub4k](https://github.com/Grub4k), [pukkandan](https://github.com/pukkandan) +* Add pre-processor stage `video` +* Let `--parse/replace-in-metadata` run at any post-processing stage +* Add `--enable-file-urls` by [coletdjnz](https://github.com/coletdjnz) +* Add new field `aspect_ratio` +* Add `ac4` to known codecs +* Add `weba` to known extensions +* [FFmpegVideoConvertor] Add `gif` to `--recode-video` +* Add message when there are no subtitles/thumbnails +* Deprioritize HEVC-over-FLV formats by [Lesmiscore](https://github.com/Lesmiscore) +* Make early reject of `--match-filter` stricter +* Fix `--cookies-from-browser` CLI parsing +* Fix `original_url` in playlists +* Fix bug in writing playlist info-json +* Fix bugs in `PlaylistEntries` +* [downloader/ffmpeg] Fix headers for video+audio formats by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* [extractor] Add a way to distinguish IEs that returns only videos +* [extractor] Implement universal format sorting and deprecate `_sort_formats` +* [extractor] Let `_extract_format` functions obey `--ignore-no-formats` +* [extractor/generic] Add `fragment_query` extractor arg for DASH and HLS by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +* [extractor/generic] Decode unicode-escaped embed URLs by [bashonly](https://github.com/bashonly) +* [extractor/generic] Don't report redirect to https +* [extractor/generic] Fix JSON LD manifest extraction by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +* [extractor/generic] Use `Accept-Encoding: identity` for initial request by [coletdjnz](https://github.com/coletdjnz) +* [FormatSort] Add `mov` to `vext` +* [jsinterp] Escape regex that looks like nested set +* [webvtt] Handle premature EOF by [flashdagger](https://github.com/flashdagger) +* [utils] `classproperty`: Add cache support +* [utils] `get_exe_version`: Detect broken executables by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan) +* [utils] `js_to_json`: Fix bug in [f55523c](https://github.com/yt-dlp/yt-dlp/commit/f55523c) by [ChillingPepper](https://github.com/ChillingPepper), [pukkandan](https://github.com/pukkandan) +* [utils] Make `ExtractorError` mutable +* [utils] Move `FileDownloader.parse_bytes` into utils +* [utils] Move format sorting code into `utils` +* [utils] `windows_enable_vt_mode`: Proper implementation by [Grub4K](https://github.com/Grub4K) +* [update] Workaround [#5632](https://github.com/yt-dlp/yt-dlp/issues/5632) +* [docs] Improvements +* [cleanup] Misc fixes and cleanup +* [cleanup] Use `random.choices` by [freezboltz](https://github.com/freezboltz) +* [extractor/airtv] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/amazonminitv] Add extractors by [GautamMKGarg](https://github.com/GautamMKGarg), [nyuszika7h](https://github.com/nyuszika7h) +* [extractor/beatbump] Add extractors by [Bobscorn](https://github.com/Bobscorn), [pukkandan](https://github.com/pukkandan) +* [extractor/europarl] Add EuroParlWebstream extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/kanal2] Add extractor by [bashonly](https://github.com/bashonly), [glensc](https://github.com/glensc), [pukkandan](https://github.com/pukkandan) +* [extractor/kankanews] Add extractor by [synthpop123](https://github.com/synthpop123) +* [extractor/kick] Add extractor by [bashonly](https://github.com/bashonly) +* [extractor/mediastream] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [elyse0](https://github.com/elyse0) +* [extractor/noice] Add NoicePodcast extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/oneplace] Add OnePlacePodcast extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/rumble] Add RumbleIE extractor by [flashdagger](https://github.com/flashdagger) +* [extractor/screencastify] Add extractor by [bashonly](https://github.com/bashonly) +* [extractor/trtcocuk] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/Veoh] Add user extractor by [tntmod54321](https://github.com/tntmod54321) +* [extractor/videoken] Add extractors by [bashonly](https://github.com/bashonly) +* [extractor/webcamerapl] Add extractor by [milkknife](https://github.com/milkknife) +* [extractor/amazon] Add `AmazonReviews` extractor by [bashonly](https://github.com/bashonly) +* [extractor/netverse] Add `NetverseSearch` extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/vimeo] Add `VimeoProIE` by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +* [extractor/xiami] Remove extractors by [synthpop123](https://github.com/synthpop123) +* [extractor/youtube] Add `piped.video` by [Bnyro](https://github.com/Bnyro) +* [extractor/youtube] Consider language in format de-duplication +* [extractor/youtube] Extract DRC formats +* [extractor/youtube] Fix `ytuser:` +* [extractor/youtube] Fix bug in handling of music URLs +* [extractor/youtube] Subtitles cannot be translated to `und` +* [extractor/youtube:tab] Extract metadata from channel items by [coletdjnz](https://github.com/coletdjnz) +* [extractor/ARD] Add vtt subtitles by [CapacitorSet](https://github.com/CapacitorSet) +* [extractor/ArteTV] Extract chapters by [bashonly](https://github.com/bashonly), [iw0nderhow](https://github.com/iw0nderhow) +* [extractor/bandcamp] Add `album_artist` by [stelcodes](https://github.com/stelcodes) +* [extractor/bilibili] Fix `--no-playlist` for anthology +* [extractor/bilibili] Improve `_VALID_URL` by [skbeh](https://github.com/skbeh) +* [extractor/biliintl:series] Make partial download of series faster +* [extractor/BiliLive] Fix extractor +* [extractor/brightcove] Add `BrightcoveNewBaseIE` and fix embed extraction +* [extractor/cda] Support premium and misc improvements by [selfisekai](https://github.com/selfisekai) +* [extractor/ciscowebex] Support password-protected videos by [damianoamatruda](https://github.com/damianoamatruda) +* [extractor/curiositystream] Fix auth by [mnn](https://github.com/mnn) +* [extractor/embedly] Handle vimeo embeds +* [extractor/fifa] Fix Preplay extraction by [dirkf](https://github.com/dirkf) +* [extractor/foxsports] Fix extractor by [bashonly](https://github.com/bashonly) +* [extractor/gronkh] Fix `_VALID_URL` by [muddi900](https://github.com/muddi900) +* [extractor/hotstar] Improve format metadata +* [extractor/iqiyi] Fix `Iq` JS regex by [bashonly](https://github.com/bashonly) +* [extractor/la7] Improve extractor by [nixxo](https://github.com/nixxo) +* [extractor/mediaset] Better embed detection and error messages by [nixxo](https://github.com/nixxo) +* [extractor/mixch] Support `--wait-for-video` +* [extractor/naver] Improve `_VALID_URL` for `NaverNowIE` by [bashonly](https://github.com/bashonly) +* [extractor/naver] Treat fan subtitles as separate language +* [extractor/netverse] Extract comments by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/nosnl] Add support for /video by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/odnoklassniki] Extract subtitles by [bashonly](https://github.com/bashonly) +* [extractor/pinterest] Fix extractor by [bashonly](https://github.com/bashonly) +* [extractor/plutotv] Fix videos with non-zero start by [digitall](https://github.com/digitall) +* [extractor/polskieradio] Adapt to next.js redesigns by [selfisekai](https://github.com/selfisekai) +* [extractor/reddit] Add vcodec to fallback format by [chengzhicn](https://github.com/chengzhicn) +* [extractor/reddit] Extract crossposted media by [bashonly](https://github.com/bashonly) +* [extractor/reddit] Extract video embeds in text posts by [bashonly](https://github.com/bashonly) +* [extractor/rutube] Support private videos by [mexus](https://github.com/mexus) +* [extractor/sibnet] Separate from VKIE +* [extractor/slideslive] Fix extractor by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* [extractor/slideslive] Support embeds and slides by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +* [extractor/soundcloud] Support user permalink by [nosoop](https://github.com/nosoop) +* [extractor/spankbang] Fix extractor by [JChris246](https://github.com/JChris246) +* [extractor/stv] Detect DRM +* [extractor/swearnet] Fix description bug +* [extractor/tencent] Fix geo-restricted video by [elyse0](https://github.com/elyse0) +* [extractor/tiktok] Fix subs, `DouyinIE`, improve `_VALID_URL` by [bashonly](https://github.com/bashonly) +* [extractor/tiktok] Update `_VALID_URL`, add `api_hostname` arg by [bashonly](https://github.com/bashonly) +* [extractor/tiktok] Update API hostname by [redraskal](https://github.com/redraskal) +* [extractor/twitcasting] Fix videos with password by [Spicadox](https://github.com/Spicadox), [bashonly](https://github.com/bashonly) +* [extractor/twitter] Heed `--no-playlist` for multi-video tweets by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* [extractor/twitter] Refresh guest token when expired by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* [extractor/twitter:spaces] Add `Referer` to m3u8 by [nixxo](https://github.com/nixxo) +* [extractor/udemy] Fix lectures that have no URL and detect DRM +* [extractor/unsupported] Add more URLs +* [extractor/urplay] Support for audio-only formats by [barsnick](https://github.com/barsnick) +* [extractor/wistia] Improve extension detection by [Grub4k](https://github.com/Grub4k), [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +* [extractor/yle_areena] Support restricted videos by [docbender](https://github.com/docbender) +* [extractor/youku] Fix extractor by [KurtBestor](https://github.com/KurtBestor) +* [extractor/youporn] Fix metadata by [marieell](https://github.com/marieell) +* [extractor/redgifs] Fix bug in [8c188d5](https://github.com/yt-dlp/yt-dlp/commit/8c188d5d09177ed213a05c900d3523867c5897fd) + + ### 2022.11.11 * Merge youtube-dl: Upto [commit/de39d12](https://github.com/ytdl-org/youtube-dl/commit/de39d128) diff --git a/Collaborators.md b/Collaborators.md index 3f24d5c476..3bce437c9b 100644 --- a/Collaborators.md +++ b/Collaborators.md @@ -42,7 +42,7 @@ ## [Ashish0804](https://github.com/Ashish0804) [Inactive] * Improved/fixed support for HiDive, HotStar, Hungama, LBRY, LinkedInLearning, Mxplayer, SonyLiv, TV2, Vimeo, VLive etc -## [Lesmiscore](https://github.com/Lesmiscore) (nao20010128nao) +## [Lesmiscore](https://github.com/Lesmiscore) (nao20010128nao) **Bitcoin**: bc1qfd02r007cutfdjwjmyy9w23rjvtls6ncve7r3s **Monacoin**: mona1q3tf7dzvshrhfe3md379xtvt2n22duhglv5dskr @@ -50,3 +50,10 @@ ## [Lesmiscore](https://github.com/Lesmiscore) (nao20010128nao) * Download live from start to end for YouTube * Added support for new websites AbemaTV, mildom, PixivSketch, skeb, radiko, voicy, mirrativ, openrec, whowatch, damtomo, 17.live, mixch etc * Improved/fixed support for fc2, YahooJapanNews, tver, iwara etc + + +## [bashonly](https://github.com/bashonly) + +* `--cookies-from-browser` support for Firefox containers +* Added support for new websites Genius, Kick, NBCStations, Triller, VideoKen etc +* Improved/fixed support for Anvato, Brightcove, Instagram, ParamountPlus, Reddit, SlidesLive, TikTok, Twitter, Vimeo etc diff --git a/Makefile b/Makefile index 8f335927d0..ca7d641ab8 100644 --- a/Makefile +++ b/Makefile @@ -17,8 +17,8 @@ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \ clean-test: rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \ *.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.meta *.part* *.tmp *.temp *.unknown_video *.ytdl \ - *.3gp *.ape *.ass *.avi *.desktop *.f4v *.flac *.flv *.jpeg *.jpg *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 *.mp4 \ - *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.swf *.swp *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp + *.3gp *.ape *.ass *.avi *.desktop *.f4v *.flac *.flv *.gif *.jpeg *.jpg *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 \ + *.mp4 *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.swf *.swp *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp clean-dist: rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \ yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS .mailmap diff --git a/README.md b/README.md index c0a2a420bc..07c74d6c32 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ [![Discord](https://img.shields.io/discord/807245652072857610?color=blue&labelColor=555555&label=&logo=discord&style=for-the-badge)](https://discord.gg/H5MNcFW63r "Discord") [![Supported Sites](https://img.shields.io/badge/-Supported_Sites-brightgreen.svg?style=for-the-badge)](supportedsites.md "Supported Sites") [![License: Unlicense](https://img.shields.io/badge/-Unlicense-blue.svg?style=for-the-badge)](LICENSE "License") -[![CI Status](https://img.shields.io/github/workflow/status/yt-dlp/yt-dlp/Core%20Tests/master?label=Tests&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/actions "CI Status") +[![CI Status](https://img.shields.io/github/actions/workflow/status/yt-dlp/yt-dlp/core.yml?branch=master&label=Tests&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/actions "CI Status") [![Commits](https://img.shields.io/github/commit-activity/m/yt-dlp/yt-dlp?label=commits&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/commits "Commit History") [![Last Commit](https://img.shields.io/github/last-commit/yt-dlp/yt-dlp/master?label=&style=for-the-badge&display_timestamp=committer)](https://github.com/yt-dlp/yt-dlp/commits "Commit History") @@ -61,6 +61,8 @@ * [Modifying metadata examples](#modifying-metadata-examples) * [EXTRACTOR ARGUMENTS](#extractor-arguments) * [PLUGINS](#plugins) + * [Installing Plugins](#installing-plugins) + * [Developing Plugins](#developing-plugins) * [EMBEDDING YT-DLP](#embedding-yt-dlp) * [Embedding examples](#embedding-examples) * [DEPRECATED OPTIONS](#deprecated-options) @@ -74,13 +76,13 @@ # NEW FEATURES -* Merged with **youtube-dl v2021.12.17+ [commit/de39d12](https://github.com/ytdl-org/youtube-dl/commit/de39d128)** and **youtube-dlc v2020.11.11-3+ [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl) +* Merged with **youtube-dl v2021.12.17+ [commit/195f22f](https://github.com/ytdl-org/youtube-dl/commit/195f22f)** and **youtube-dlc v2020.11.11-3+ [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl) * **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API * **[Format Sorting](#sorting-formats)**: The default format sorting options have been changed so that higher resolution and better codecs will be now preferred instead of simply using larger bitrate. Furthermore, you can now specify the sort order using `-S`. This allows for much easier format selection than what is possible by simply using `--format` ([examples](#format-selection-examples)) -* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that the NicoNico livestreams are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details. +* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that NicoNico livestreams are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details. * **YouTube improvements**: * Supports Clips, Stories (`ytstories:`), Search (including filters)**\***, YouTube Music Search, Channel-specific search, Search prefixes (`ytsearch:`, `ytsearchdate:`)**\***, Mixes, YouTube Music Albums/Channels ([except self-uploaded music](https://github.com/yt-dlp/yt-dlp/issues/723)), and Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`) @@ -151,12 +153,15 @@ ### Differences in default behavior * When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this * `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi` * yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior +* yt-dlp tries to parse the external downloader outputs into the standard progress output if possible (Currently implemented: [~~aria2c~~](https://github.com/yt-dlp/yt-dlp/issues/5931)). You can use `--compat-options no-external-downloader-progress` to get the downloader output as-is For ease of use, a few more compat options are available: * `--compat-options all`: Use all compat options (Do NOT use) * `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams` * `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect` +* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date` +* `--compat-options 2022`: Same as `--compat-options no-external-downloader-progress`. Use this to enable all future compat options # INSTALLATION @@ -179,7 +184,7 @@ ## UPDATE If you [installed with PIP](https://github.com/yt-dlp/yt-dlp/wiki/Installation#with-pip), simply re-run the same command that was used to install the program -For other third-party package managers, see [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/Installation) or refer their documentation +For other third-party package managers, see [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/Installation#third-party-package-managers) or refer their documentation @@ -217,7 +222,7 @@ #### Misc -Note: The manpages, shell completion files etc. are available in the [source tarball](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) +**Note**: The manpages, shell completion files etc. are available in the [source tarball](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) ## DEPENDENCIES Python versions 3.7+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly. @@ -233,8 +238,9 @@ ### Strongly recommended * [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging separate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. License [depends on the build](https://www.ffmpeg.org/legal.html) - - **Note**: There are some regressions in newer ffmpeg versions that causes various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds + There are bugs in ffmpeg that causes various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for some of these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds + + **Important**: What you need is ffmpeg *binary*, **NOT** [the python package of the same name](https://pypi.org/project/ffmpeg) ### Networking * [**certifi**](https://github.com/certifi/python-certifi)\* - Provides Mozilla's root certificate bundle. Licensed under [MPLv2](https://github.com/certifi/python-certifi/blob/master/LICENSE) @@ -281,7 +287,7 @@ ### Standalone PyInstaller Builds `pyinst.py` accepts any arguments that can be passed to `pyinstaller`, such as `--onefile/-F` or `--onedir/-D`, which is further [documented here](https://pyinstaller.org/en/stable/usage.html#what-to-generate). -Note that pyinstaller with versions below 4.4 [do not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment. +**Note**: Pyinstaller versions below 4.4 [do not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment. **Important**: Running `pyinstaller` directly **without** using `pyinst.py` is **not** officially supported. This may or may not work correctly. @@ -414,6 +420,8 @@ ## Network Options: --source-address IP Client-side IP address to bind to -4, --force-ipv4 Make all connections via IPv4 -6, --force-ipv6 Make all connections via IPv6 + --enable-file-urls Enable file:// URLs. This is disabled by + default for security reasons. ## Geo-restriction: --geo-verification-proxy URL Use this proxy to verify the IP address for @@ -448,7 +456,9 @@ ## Video Selection: --date DATE Download only videos uploaded on this date. The date can be "YYYYMMDD" or in the format [now|today|yesterday][-N[day|week|month|year]]. - E.g. --date today-2weeks + E.g. "--date today-2weeks" downloads + only videos uploaded on the same day two + weeks ago --datebefore DATE Download only videos uploaded on or before this date. The date formats accepted is the same as --date @@ -525,8 +535,8 @@ ## Download Options: linear=1::2 --retry-sleep fragment:exp=1:20 --skip-unavailable-fragments Skip unavailable fragments for DASH, hlsnative and ISM downloads (default) - (Alias: --no-abort-on-unavailable-fragment) - --abort-on-unavailable-fragment + (Alias: --no-abort-on-unavailable-fragments) + --abort-on-unavailable-fragments Abort download if a fragment is unavailable (Alias: --no-skip-unavailable-fragments) --keep-fragments Keep downloaded fragments on disk after @@ -725,7 +735,7 @@ ## Verbosity and Simulation Options: screen, optionally prefixed with when to print it, separated by a ":". Supported values of "WHEN" are the same as that of - --use-postprocessor, and "video" (default). + --use-postprocessor (default: video). Implies --quiet. Implies --simulate unless --no-simulate or later stages of WHEN are used. This option can be used multiple times @@ -893,11 +903,11 @@ ## Post-Processing Options: specific bitrate like 128K (default 5) --remux-video FORMAT Remux the video into another container if necessary (currently supported: avi, flv, - mkv, mov, mp4, webm, aac, aiff, alac, flac, - m4a, mka, mp3, ogg, opus, vorbis, wav). If - target container does not support the - video/audio codec, remuxing will fail. You - can specify multiple rules; e.g. + gif, mkv, mov, mp4, webm, aac, aiff, alac, + flac, m4a, mka, mp3, ogg, opus, vorbis, + wav). If target container does not support + the video/audio codec, remuxing will fail. + You can specify multiple rules; e.g. "aac>m4a/mov>mp4/mkv" will remux aac to m4a, mov to mp4 and anything else to mkv --recode-video FORMAT Re-encode the video into another format if @@ -952,13 +962,18 @@ ## Post-Processing Options: mkv/mka video files --no-embed-info-json Do not embed the infojson as an attachment to the video file - --parse-metadata FROM:TO Parse additional metadata like title/artist + --parse-metadata [WHEN:]FROM:TO + Parse additional metadata like title/artist from other fields; see "MODIFYING METADATA" - for details - --replace-in-metadata FIELDS REGEX REPLACE + for details. Supported values of "WHEN" are + the same as that of --use-postprocessor + (default: pre_process) + --replace-in-metadata [WHEN:]FIELDS REGEX REPLACE Replace text in a metadata field using the given regex. This option can be used - multiple times + multiple times. Supported values of "WHEN" + are the same as that of --use-postprocessor + (default: pre_process) --xattrs Write metadata to the video file's xattrs (using dublin core and xdg standards) --concat-playlist POLICY Concatenate videos in a playlist. One of @@ -979,18 +994,18 @@ ## Post-Processing Options: --ffmpeg-location PATH Location of the ffmpeg binary; either the path to the binary or its containing directory --exec [WHEN:]CMD Execute a command, optionally prefixed with - when to execute it (after_move if - unspecified), separated by a ":". Supported - values of "WHEN" are the same as that of - --use-postprocessor. Same syntax as the - output template can be used to pass any - field as arguments to the command. After - download, an additional field "filepath" - that contains the final path of the - downloaded file is also available, and if no - fields are passed, %(filepath)q is appended - to the end of the command. This option can - be used multiple times + when to execute it, separated by a ":". + Supported values of "WHEN" are the same as + that of --use-postprocessor (default: + after_move). Same syntax as the output + template can be used to pass any field as + arguments to the command. After download, an + additional field "filepath" that contains + the final path of the downloaded file is + also available, and if no fields are passed, + %(filepath,_filename|)q is appended to the + end of the command. This option can be used + multiple times --no-exec Remove any previously defined --exec --convert-subs FORMAT Convert the subtitles to another format (currently supported: ass, lrc, srt, vtt) @@ -1028,14 +1043,16 @@ ## Post-Processing Options: postprocessor is invoked. It can be one of "pre_process" (after video extraction), "after_filter" (after video passes filter), - "before_dl" (before each video download), - "post_process" (after each video download; - default), "after_move" (after moving video - file to it's final locations), "after_video" - (after downloading and processing all - formats of a video), or "playlist" (at end - of playlist). This option can be used - multiple times to add different postprocessors + "video" (after --format; before + --print/--output), "before_dl" (before each + video download), "post_process" (after each + video download; default), "after_move" + (after moving video file to it's final + locations), "after_video" (after downloading + and processing all formats of a video), or + "playlist" (at end of playlist). This option + can be used multiple times to add different + postprocessors ## SponsorBlock Options: Make chapter entries for, or remove various segments (sponsor, @@ -1102,16 +1119,22 @@ # CONFIGURATION * `yt-dlp.conf` in the home path given by `-P` * If `-P` is not given, the current directory is searched 1. **User Configuration**: - * `${XDG_CONFIG_HOME}/yt-dlp/config` (recommended on Linux/macOS) * `${XDG_CONFIG_HOME}/yt-dlp.conf` + * `${XDG_CONFIG_HOME}/yt-dlp/config` (recommended on Linux/macOS) + * `${XDG_CONFIG_HOME}/yt-dlp/config.txt` + * `${APPDATA}/yt-dlp.conf` * `${APPDATA}/yt-dlp/config` (recommended on Windows) * `${APPDATA}/yt-dlp/config.txt` * `~/yt-dlp.conf` * `~/yt-dlp.conf.txt` + * `~/.yt-dlp/config` + * `~/.yt-dlp/config.txt` See also: [Notes about environment variables](#notes-about-environment-variables) 1. **System Configuration**: * `/etc/yt-dlp.conf` + * `/etc/yt-dlp/config` + * `/etc/yt-dlp/config.txt` E.g. with the following configuration file yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory: ``` @@ -1130,7 +1153,7 @@ # Save all videos under YouTube directory in your home directory -o ~/YouTube/%(title)s.%(ext)s ``` -Note that options in configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. They must also be quoted when necessary as-if it were a UNIX shell. +**Note**: Options in configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. They must also be quoted when necessary as-if it were a UNIX shell. You can use `--ignore-config` if you want to disable all configuration files for a particular yt-dlp run. If `--ignore-config` is found inside any configuration file, no further configuration will be loaded. For example, having the option in the portable configuration file prevents loading of home, user, and system configurations. Additionally, (for backward compatibility) if `--ignore-config` is found inside the system configuration file, the user configuration is not loaded. @@ -1206,7 +1229,7 @@ # OUTPUT TEMPLATE -Note: Due to post-processing (i.e. merging etc.), the actual output filename might differ. Use `--print after_move:filepath` to get the name after all post-processing is complete. +**Note**: Due to post-processing (i.e. merging etc.), the actual output filename might differ. Use `--print after_move:filepath` to get the name after all post-processing is complete. The available fields are: @@ -1327,7 +1350,7 @@ # OUTPUT TEMPLATE Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. E.g. for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKc`, this will result in a `yt-dlp test video-BaW_jenozKc.mp4` file created in the current directory. -Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default). +**Note**: Some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default). **Tip**: Look at the `-j` output to identify which fields are available for the particular URL @@ -1468,7 +1491,7 @@ ## Filtering Formats Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). The comparand of a string comparison needs to be quoted with either double or single quotes if it contains spaces or special characters other than `._-`. -Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering. +**Note**: None of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering. Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats. @@ -1721,7 +1744,7 @@ # EXTRACTOR ARGUMENTS The following extractors use this feature: #### youtube -* `lang`: Language code to prefer translated metadata of this language (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes +* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively * `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients. * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details @@ -1775,26 +1798,78 @@ #### rokfinchannel #### twitter * `force_graphql`: Force usage of the GraphQL API. By default it will only be used if login cookies are provided -NOTE: These options may be changed/removed in the future without concern for backward compatibility +**Note**: These options may be changed/removed in the future without concern for backward compatibility # PLUGINS -Plugins are loaded from `/ytdlp_plugins//__init__.py`; where `` is the directory of the binary (`/yt-dlp`), or the root directory of the module if you are running directly from source-code (`/yt_dlp/__main__.py`). Plugins are currently not supported for the `pip` version +Note that **all** plugins are imported even if not invoked, and that **there are no checks** performed on plugin code. **Use plugins at your own risk and only if you trust the code!** -Plugins can be of ``s `extractor` or `postprocessor`. Extractor plugins do not need to be enabled from the CLI and are automatically invoked when the input URL is suitable for it. Postprocessor plugins can be invoked using `--use-postprocessor NAME`. +Plugins can be of ``s `extractor` or `postprocessor`. +- Extractor plugins do not need to be enabled from the CLI and are automatically invoked when the input URL is suitable for it. +- Extractor plugins take priority over builtin extractors. +- Postprocessor plugins can be invoked using `--use-postprocessor NAME`. -See [ytdlp_plugins](ytdlp_plugins) for example plugins. -Note that **all** plugins are imported even if not invoked, and that **there are no checks** performed on plugin code. Use plugins at your own risk and only if you trust the code +Plugins are loaded from the namespace packages `yt_dlp_plugins.extractor` and `yt_dlp_plugins.postprocessor`. -If you are a plugin author, add [ytdlp-plugins](https://github.com/topics/ytdlp-plugins) as a topic to your repository for discoverability +In other words, the file structure on the disk looks something like: + + yt_dlp_plugins/ + extractor/ + myplugin.py + postprocessor/ + myplugin.py + +yt-dlp looks for these `yt_dlp_plugins` namespace folders in many locations (see below) and loads in plugins from **all** of them. See the [wiki for some known plugins](https://github.com/yt-dlp/yt-dlp/wiki/Plugins) +## Installing Plugins +Plugins can be installed using various methods and locations. + +1. **Configuration directories**: + Plugin packages (containing a `yt_dlp_plugins` namespace folder) can be dropped into the following standard [configuration locations](#configuration): + * **User Plugins** + * `${XDG_CONFIG_HOME}/yt-dlp/plugins//yt_dlp_plugins/` (recommended on Linux/macOS) + * `${XDG_CONFIG_HOME}/yt-dlp-plugins//yt_dlp_plugins/` + * `${APPDATA}/yt-dlp/plugins//yt_dlp_plugins/` (recommended on Windows) + * `${APPDATA}/yt-dlp-plugins//yt_dlp_plugins/` + * `~/.yt-dlp/plugins//yt_dlp_plugins/` + * `~/yt-dlp-plugins//yt_dlp_plugins/` + * **System Plugins** + * `/etc/yt-dlp/plugins//yt_dlp_plugins/` + * `/etc/yt-dlp-plugins//yt_dlp_plugins/` +2. **Executable location**: Plugin packages can similarly be installed in a `yt-dlp-plugins` directory under the executable location: + * Binary: where `/yt-dlp.exe`, `/yt-dlp-plugins//yt_dlp_plugins/` + * Source: where `/yt_dlp/__main__.py`, `/yt-dlp-plugins//yt_dlp_plugins/` + +3. **pip and other locations in `PYTHONPATH`** + * Plugin packages can be installed and managed using `pip`. See [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for an example. + * Note: plugin files between plugin packages installed with pip must have unique filenames. + * Any path in `PYTHONPATH` is searched in for the `yt_dlp_plugins` namespace folder. + * Note: This does not apply for Pyinstaller/py2exe builds. + + +`.zip`, `.egg` and `.whl` archives containing a `yt_dlp_plugins` namespace folder in their root are also supported as plugin packages. +* e.g. `${XDG_CONFIG_HOME}/yt-dlp/plugins/mypluginpkg.zip` where `mypluginpkg.zip` contains `yt_dlp_plugins//myplugin.py` + +Run yt-dlp with `--verbose` to check if the plugin has been loaded. + +## Developing Plugins + +See the [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) repo for a template plugin package and the [Plugin Development](https://github.com/yt-dlp/yt-dlp/wiki/Plugin-Development) section of the wiki for a plugin development guide. + +All public classes with a name ending in `IE`/`PP` are imported from each file for extractors and postprocessors repectively. This respects underscore prefix (e.g. `_MyBasePluginIE` is private) and `__all__`. Modules can similarly be excluded by prefixing the module name with an underscore (e.g. `_myplugin.py`). + +To replace an existing extractor with a subclass of one, set the `plugin_name` class keyword argument (e.g. `class MyPluginIE(ABuiltInIE, plugin_name='myplugin')` will replace `ABuiltInIE` with `MyPluginIE`). Since the extractor replaces the parent, you should exclude the subclass extractor from being imported separately by making it private using one of the methods described above. + +If you are a plugin author, add [yt-dlp-plugins](https://github.com/topics/yt-dlp-plugins) as a topic to your repository for discoverability. + +See the [Developer Instructions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) on how to write and test an extractor. # EMBEDDING YT-DLP diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index c502bdf896..d74ea202f0 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -40,8 +40,12 @@ def main(): _ALL_CLASSES = get_all_ies() # Must be before import + import yt_dlp.plugins from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor + # Filter out plugins + _ALL_CLASSES = [cls for cls in _ALL_CLASSES if not cls.__module__.startswith(f'{yt_dlp.plugins.PACKAGE_NAME}.')] + DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR}) module_src = '\n'.join(( MODULE_TEMPLATE, diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000..97718ec431 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,5 @@ +[build-system] +build-backend = 'setuptools.build_meta' +# https://github.com/yt-dlp/yt-dlp/issues/5941 +# https://github.com/pypa/distutils/issues/17 +requires = ['setuptools > 50'] diff --git a/setup.cfg b/setup.cfg index 2def390f51..6deaa79715 100644 --- a/setup.cfg +++ b/setup.cfg @@ -26,12 +26,12 @@ markers = [tox:tox] skipsdist = true -envlist = py{36,37,38,39,310},pypy{36,37,38,39} +envlist = py{36,37,38,39,310,311},pypy{36,37,38,39} skip_missing_interpreters = true [testenv] # tox deps = - pytest + pytest commands = pytest {posargs:"-m not download"} passenv = HOME # For test_compat_expanduser setenv = diff --git a/setup.py b/setup.py index 88716152a4..e2520ff6fc 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,12 @@ #!/usr/bin/env python3 -import os.path -import subprocess +# Allow execution from anywhere +import os import sys + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +import subprocess import warnings try: diff --git a/supportedsites.md b/supportedsites.md index fbada177e4..5cef7ac907 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -51,6 +51,8 @@ # Supported sites - **afreecatv:live**: [afreecatv] afreecatv.com - **afreecatv:user** - **AirMozilla** + - **AirTV** + - **AitubeKZVideo** - **AliExpressLive** - **AlJazeera** - **Allocine** @@ -60,6 +62,10 @@ # Supported sites - **Alura**: [alura] - **AluraCourse**: [aluracourse] - **Amara** + - **AmazonMiniTV** + - **amazonminitv:season**: Amazon MiniTV Series, "minitv:season:" prefix + - **amazonminitv:series** + - **AmazonReviews** - **AmazonStore** - **AMCNetworks** - **AmericasTestKitchen** @@ -130,6 +136,8 @@ # Supported sites - **BBVTV**: [bbvtv] - **BBVTVLive**: [bbvtv] - **BBVTVRecordings**: [bbvtv] + - **BeatBumpPlaylist** + - **BeatBumpVideo** - **Beatport** - **Beeg** - **BehindKink** @@ -157,7 +165,7 @@ # Supported sites - **BilibiliSpacePlaylist** - **BilibiliSpaceVideo** - **BiliIntl**: [biliintl] - - **BiliIntlSeries**: [biliintl] + - **biliIntl:series**: [biliintl] - **BiliLive** - **BioBioChileTV** - **Biography** @@ -345,6 +353,8 @@ # Supported sites - **DrTuber** - **drtv** - **drtv:live** + - **drtv:season** + - **drtv:series** - **DTube** - **duboku**: www.duboku.io - **duboku:list**: www.duboku.io entire series @@ -387,6 +397,7 @@ # Supported sites - **ESPNCricInfo** - **EsriVideo** - **Europa** + - **EuroParlWebstream** - **EuropeanTour** - **Eurosport** - **EUScreen** @@ -599,6 +610,8 @@ # Supported sites - **JWPlatform** - **Kakao** - **Kaltura** + - **Kanal2** + - **KankaNews** - **Karaoketv** - **KarriereVideos** - **Katsomo** @@ -607,8 +620,10 @@ # Supported sites - **Ketnet** - **khanacademy** - **khanacademy:unit** + - **Kick** - **Kicker** - **KickStarter** + - **KickVOD** - **KinjaEmbed** - **KinoPoisk** - **KompasVideo** @@ -709,6 +724,7 @@ # Supported sites - **Mediasite** - **MediasiteCatalog** - **MediasiteNamedCatalog** + - **MediaStream** - **MediaWorksNZVOD** - **Medici** - **megaphone.fm**: megaphone.fm embedded players @@ -845,6 +861,7 @@ # Supported sites - **NetPlusTVRecordings**: [netplus] - **Netverse** - **NetversePlaylist** + - **NetverseSearch**: "netsearch:" prefix - **Netzkino** - **Newgrounds** - **Newgrounds:playlist** @@ -887,6 +904,7 @@ # Supported sites - **njoy:embed** - **NJPWWorld**: [njpwworld] 新日本プロレスワールド - **NobelPrize** + - **NoicePodcast** - **NonkTube** - **NoodleMagazine** - **Noovo** @@ -933,6 +951,7 @@ # Supported sites - **on24**: ON24 - **OnDemandKorea** - **OneFootball** + - **OnePlacePodcast** - **onet.pl** - **onet.tv** - **onet.tv:channel** @@ -1022,11 +1041,13 @@ # Supported sites - **PokerGoCollection**: [pokergo] - **PolsatGo** - **PolskieRadio** + - **polskieradio:audition** + - **polskieradio:category** - **polskieradio:kierowcow** + - **polskieradio:legacy** - **polskieradio:player** - **polskieradio:podcast** - **polskieradio:​podcast:list** - - **PolskieRadioCategory** - **Popcorntimes** - **PopcornTV** - **PornCom** @@ -1155,6 +1176,7 @@ # Supported sites - **rtvslo.si** - **RUHD** - **Rule34Video** + - **Rumble** - **RumbleChannel** - **RumbleEmbed** - **Ruptly** @@ -1189,6 +1211,7 @@ # Supported sites - **screen.yahoo:search**: Yahoo screen search; "yvsearch:" prefix - **Screen9** - **Screencast** + - **Screencastify** - **ScreencastOMatic** - **ScrippsNetworks** - **scrippsnetworks:watch** @@ -1212,6 +1235,7 @@ # Supported sites - **ShugiinItvLive**: 衆議院インターネット審議中継 - **ShugiinItvLiveRoom**: 衆議院インターネット審議中継 (中継) - **ShugiinItvVod**: 衆議院インターネット審議中継 (ビデオライブラリ) + - **SibnetEmbed** - **simplecast** - **simplecast:episode** - **simplecast:podcast** @@ -1227,7 +1251,7 @@ # Supported sites - **skynewsarabia:video** - **SkyNewsAU** - **Slideshare** - - **SlidesLive**: (**Currently broken**) + - **SlidesLive** - **Slutload** - **Smotrim** - **Snotr** @@ -1241,6 +1265,7 @@ # Supported sites - **soundcloud:set**: [soundcloud] - **soundcloud:trackstation**: [soundcloud] - **soundcloud:user**: [soundcloud] + - **soundcloud:​user:permalink**: [soundcloud] - **SoundcloudEmbed** - **soundgasm** - **soundgasm:profile** @@ -1352,10 +1377,14 @@ # Supported sites - **ThisAmericanLife** - **ThisAV** - **ThisOldHouse** + - **ThisVid** + - **ThisVidMember** + - **ThisVidPlaylist** - **ThreeSpeak** - **ThreeSpeakUser** - **TikTok** - **tiktok:effect**: (**Currently broken**) + - **tiktok:live** - **tiktok:sound**: (**Currently broken**) - **tiktok:tag**: (**Currently broken**) - **tiktok:user**: (**Currently broken**) @@ -1383,6 +1412,7 @@ # Supported sites - **TrovoChannelClip**: All Clips of a trovo.live channel; "trovoclip:" prefix - **TrovoChannelVod**: All VODs of a trovo.live channel; "trovovod:" prefix - **TrovoVod** + - **TrtCocukVideo** - **TrueID** - **TruNews** - **Truth** @@ -1483,6 +1513,7 @@ # Supported sites - **VeeHD** - **Veo** - **Veoh** + - **veoh:user** - **Vesti**: Вести.Ru - **Vevo** - **VevoPlaylist** @@ -1502,6 +1533,11 @@ # Supported sites - **video.sky.it:live** - **VideoDetective** - **videofy.me** + - **VideoKen** + - **VideoKenCategory** + - **VideoKenPlayer** + - **VideoKenPlaylist** + - **VideoKenTopic** - **videomore** - **videomore:season** - **videomore:video** @@ -1521,6 +1557,7 @@ # Supported sites - **vimeo:group**: [vimeo] - **vimeo:likes**: [vimeo] Vimeo user likes - **vimeo:ondemand**: [vimeo] + - **vimeo:pro**: [vimeo] - **vimeo:review**: [vimeo] Review pages on vimeo - **vimeo:user**: [vimeo] - **vimeo:watchlater**: [vimeo] Vimeo watch later list, ":vimeowatchlater" keyword (requires authentication) @@ -1549,6 +1586,7 @@ # Supported sites - **VoiceRepublic** - **voicy** - **voicy:channel** + - **VolejTV** - **Voot** - **VootSeries** - **VoxMedia** @@ -1591,6 +1629,7 @@ # Supported sites - **WDRElefant** - **WDRPage** - **web.archive:youtube**: web.archive.org saved youtube videos, "ytarchive:" prefix + - **Webcamerapl** - **Webcaster** - **WebcasterFeed** - **WebOfStories** @@ -1604,6 +1643,7 @@ # Supported sites - **wikimedia.org** - **Willow** - **WimTV** + - **WinSportsVideo** - **Wistia** - **WistiaChannel** - **WistiaPlaylist** @@ -1618,6 +1658,7 @@ # Supported sites - **WWE** - **wyborcza:video** - **WyborczaPodcast** + - **Xanimu** - **XBef** - **XboxClips** - **XFileShare**: XFileShare based sites: Aparat, ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, WolfStream, XVideoSharing diff --git a/test/test_config.py b/test/test_config.py new file mode 100644 index 0000000000..a393b65348 --- /dev/null +++ b/test/test_config.py @@ -0,0 +1,227 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest +import unittest.mock + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import contextlib +import itertools +from pathlib import Path + +from yt_dlp.compat import compat_expanduser +from yt_dlp.options import create_parser, parseOpts +from yt_dlp.utils import Config, get_executable_path + +ENVIRON_DEFAULTS = { + 'HOME': None, + 'XDG_CONFIG_HOME': '/_xdg_config_home/', + 'USERPROFILE': 'C:/Users/testing/', + 'APPDATA': 'C:/Users/testing/AppData/Roaming/', + 'HOMEDRIVE': 'C:/', + 'HOMEPATH': 'Users/testing/', +} + + +@contextlib.contextmanager +def set_environ(**kwargs): + saved_environ = os.environ.copy() + + for name, value in {**ENVIRON_DEFAULTS, **kwargs}.items(): + if value is None: + os.environ.pop(name, None) + else: + os.environ[name] = value + + yield + + os.environ.clear() + os.environ.update(saved_environ) + + +def _generate_expected_groups(): + xdg_config_home = os.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config') + appdata_dir = os.getenv('appdata') + home_dir = compat_expanduser('~') + return { + 'Portable': [ + Path(get_executable_path(), 'yt-dlp.conf'), + ], + 'Home': [ + Path('yt-dlp.conf'), + ], + 'User': [ + Path(xdg_config_home, 'yt-dlp.conf'), + Path(xdg_config_home, 'yt-dlp', 'config'), + Path(xdg_config_home, 'yt-dlp', 'config.txt'), + *(( + Path(appdata_dir, 'yt-dlp.conf'), + Path(appdata_dir, 'yt-dlp', 'config'), + Path(appdata_dir, 'yt-dlp', 'config.txt'), + ) if appdata_dir else ()), + Path(home_dir, 'yt-dlp.conf'), + Path(home_dir, 'yt-dlp.conf.txt'), + Path(home_dir, '.yt-dlp', 'config'), + Path(home_dir, '.yt-dlp', 'config.txt'), + ], + 'System': [ + Path('/etc/yt-dlp.conf'), + Path('/etc/yt-dlp/config'), + Path('/etc/yt-dlp/config.txt'), + ] + } + + +class TestConfig(unittest.TestCase): + maxDiff = None + + @set_environ() + def test_config__ENVIRON_DEFAULTS_sanity(self): + expected = make_expected() + self.assertCountEqual( + set(expected), expected, + 'ENVIRON_DEFAULTS produces non unique names') + + def test_config_all_environ_values(self): + for name, value in ENVIRON_DEFAULTS.items(): + for new_value in (None, '', '.', value or '/some/dir'): + with set_environ(**{name: new_value}): + self._simple_grouping_test() + + def test_config_default_expected_locations(self): + files, _ = self._simple_config_test() + self.assertEqual( + files, make_expected(), + 'Not all expected locations have been checked') + + def test_config_default_grouping(self): + self._simple_grouping_test() + + def _simple_grouping_test(self): + expected_groups = make_expected_groups() + for name, group in expected_groups.items(): + for index, existing_path in enumerate(group): + result, opts = self._simple_config_test(existing_path) + expected = expected_from_expected_groups(expected_groups, existing_path) + self.assertEqual( + result, expected, + f'The checked locations do not match the expected ({name}, {index})') + self.assertEqual( + opts.outtmpl['default'], '1', + f'The used result value was incorrect ({name}, {index})') + + def _simple_config_test(self, *stop_paths): + encountered = 0 + paths = [] + + def read_file(filename, default=[]): + nonlocal encountered + path = Path(filename) + paths.append(path) + if path in stop_paths: + encountered += 1 + return ['-o', f'{encountered}'] + + with ConfigMock(read_file): + _, opts, _ = parseOpts([], False) + + return paths, opts + + @set_environ() + def test_config_early_exit_commandline(self): + self._early_exit_test(0, '--ignore-config') + + @set_environ() + def test_config_early_exit_files(self): + for index, _ in enumerate(make_expected(), 1): + self._early_exit_test(index) + + def _early_exit_test(self, allowed_reads, *args): + reads = 0 + + def read_file(filename, default=[]): + nonlocal reads + reads += 1 + + if reads > allowed_reads: + self.fail('The remaining config was not ignored') + elif reads == allowed_reads: + return ['--ignore-config'] + + with ConfigMock(read_file): + parseOpts(args, False) + + @set_environ() + def test_config_override_commandline(self): + self._override_test(0, '-o', 'pass') + + @set_environ() + def test_config_override_files(self): + for index, _ in enumerate(make_expected(), 1): + self._override_test(index) + + def _override_test(self, start_index, *args): + index = 0 + + def read_file(filename, default=[]): + nonlocal index + index += 1 + + if index > start_index: + return ['-o', 'fail'] + elif index == start_index: + return ['-o', 'pass'] + + with ConfigMock(read_file): + _, opts, _ = parseOpts(args, False) + + self.assertEqual( + opts.outtmpl['default'], 'pass', + 'The earlier group did not override the later ones') + + +@contextlib.contextmanager +def ConfigMock(read_file=None): + with unittest.mock.patch('yt_dlp.options.Config') as mock: + mock.return_value = Config(create_parser()) + if read_file is not None: + mock.read_file = read_file + + yield mock + + +def make_expected(*filepaths): + return expected_from_expected_groups(_generate_expected_groups(), *filepaths) + + +def make_expected_groups(*filepaths): + return _filter_expected_groups(_generate_expected_groups(), filepaths) + + +def expected_from_expected_groups(expected_groups, *filepaths): + return list(itertools.chain.from_iterable( + _filter_expected_groups(expected_groups, filepaths).values())) + + +def _filter_expected_groups(expected, filepaths): + if not filepaths: + return expected + + result = {} + for group, paths in expected.items(): + new_paths = [] + for path in paths: + new_paths.append(path) + if path in filepaths: + break + + result[group] = new_paths + + return result + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_plugins.py b/test/test_plugins.py new file mode 100644 index 0000000000..6cde579e1e --- /dev/null +++ b/test/test_plugins.py @@ -0,0 +1,73 @@ +import importlib +import os +import shutil +import sys +import unittest +from pathlib import Path + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +TEST_DATA_DIR = Path(os.path.dirname(os.path.abspath(__file__)), 'testdata') +sys.path.append(str(TEST_DATA_DIR)) +importlib.invalidate_caches() + +from yt_dlp.plugins import PACKAGE_NAME, directories, load_plugins + + +class TestPlugins(unittest.TestCase): + + TEST_PLUGIN_DIR = TEST_DATA_DIR / PACKAGE_NAME + + def test_directories_containing_plugins(self): + self.assertIn(self.TEST_PLUGIN_DIR, map(Path, directories())) + + def test_extractor_classes(self): + for module_name in tuple(sys.modules): + if module_name.startswith(f'{PACKAGE_NAME}.extractor'): + del sys.modules[module_name] + plugins_ie = load_plugins('extractor', 'IE') + + self.assertIn(f'{PACKAGE_NAME}.extractor.normal', sys.modules.keys()) + self.assertIn('NormalPluginIE', plugins_ie.keys()) + + # don't load modules with underscore prefix + self.assertFalse( + f'{PACKAGE_NAME}.extractor._ignore' in sys.modules.keys(), + 'loaded module beginning with underscore') + self.assertNotIn('IgnorePluginIE', plugins_ie.keys()) + + # Don't load extractors with underscore prefix + self.assertNotIn('_IgnoreUnderscorePluginIE', plugins_ie.keys()) + + # Don't load extractors not specified in __all__ (if supplied) + self.assertNotIn('IgnoreNotInAllPluginIE', plugins_ie.keys()) + self.assertIn('InAllPluginIE', plugins_ie.keys()) + + def test_postprocessor_classes(self): + plugins_pp = load_plugins('postprocessor', 'PP') + self.assertIn('NormalPluginPP', plugins_pp.keys()) + + def test_importing_zipped_module(self): + zip_path = TEST_DATA_DIR / 'zipped_plugins.zip' + shutil.make_archive(str(zip_path)[:-4], 'zip', str(zip_path)[:-4]) + sys.path.append(str(zip_path)) # add zip to search paths + importlib.invalidate_caches() # reset the import caches + + try: + for plugin_type in ('extractor', 'postprocessor'): + package = importlib.import_module(f'{PACKAGE_NAME}.{plugin_type}') + self.assertIn(zip_path / PACKAGE_NAME / plugin_type, map(Path, package.__path__)) + + plugins_ie = load_plugins('extractor', 'IE') + self.assertIn('ZippedPluginIE', plugins_ie.keys()) + + plugins_pp = load_plugins('postprocessor', 'PP') + self.assertIn('ZippedPluginPP', plugins_pp.keys()) + + finally: + sys.path.remove(str(zip_path)) + os.remove(zip_path) + importlib.invalidate_caches() # reset the import caches + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index 49ab3796b9..3d5a6ea6ba 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -954,6 +954,85 @@ def test_escape_url(self): ) self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0') + def test_js_to_json_vars_strings(self): + self.assertDictEqual( + json.loads(js_to_json( + '''{ + 'null': a, + 'nullStr': b, + 'true': c, + 'trueStr': d, + 'false': e, + 'falseStr': f, + 'unresolvedVar': g, + }''', + { + 'a': 'null', + 'b': '"null"', + 'c': 'true', + 'd': '"true"', + 'e': 'false', + 'f': '"false"', + 'g': 'var', + } + )), + { + 'null': None, + 'nullStr': 'null', + 'true': True, + 'trueStr': 'true', + 'false': False, + 'falseStr': 'false', + 'unresolvedVar': 'var' + } + ) + + self.assertDictEqual( + json.loads(js_to_json( + '''{ + 'int': a, + 'intStr': b, + 'float': c, + 'floatStr': d, + }''', + { + 'a': '123', + 'b': '"123"', + 'c': '1.23', + 'd': '"1.23"', + } + )), + { + 'int': 123, + 'intStr': '123', + 'float': 1.23, + 'floatStr': '1.23', + } + ) + + self.assertDictEqual( + json.loads(js_to_json( + '''{ + 'object': a, + 'objectStr': b, + 'array': c, + 'arrayStr': d, + }''', + { + 'a': '{}', + 'b': '"{}"', + 'c': '[]', + 'd': '"[]"', + } + )), + { + 'object': {}, + 'objectStr': '{}', + 'array': [], + 'arrayStr': '[]', + } + ) + def test_js_to_json_realworld(self): inp = '''{ 'clip':{'provider':'pseudo'} @@ -1874,6 +1953,8 @@ def test_get_compatible_ext(self): vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['m4a']), 'mkv') self.assertEqual(get_compatible_ext( vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['webm']), 'webm') + self.assertEqual(get_compatible_ext( + vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['weba']), 'webm') self.assertEqual(get_compatible_ext( vcodecs=['h264'], acodecs=['mp4a'], vexts=['mov'], aexts=['m4a']), 'mp4') diff --git a/test/testdata/yt_dlp_plugins/extractor/_ignore.py b/test/testdata/yt_dlp_plugins/extractor/_ignore.py new file mode 100644 index 0000000000..57faf75bbc --- /dev/null +++ b/test/testdata/yt_dlp_plugins/extractor/_ignore.py @@ -0,0 +1,5 @@ +from yt_dlp.extractor.common import InfoExtractor + + +class IgnorePluginIE(InfoExtractor): + pass diff --git a/test/testdata/yt_dlp_plugins/extractor/ignore.py b/test/testdata/yt_dlp_plugins/extractor/ignore.py new file mode 100644 index 0000000000..816a16aa20 --- /dev/null +++ b/test/testdata/yt_dlp_plugins/extractor/ignore.py @@ -0,0 +1,12 @@ +from yt_dlp.extractor.common import InfoExtractor + + +class IgnoreNotInAllPluginIE(InfoExtractor): + pass + + +class InAllPluginIE(InfoExtractor): + pass + + +__all__ = ['InAllPluginIE'] diff --git a/test/testdata/yt_dlp_plugins/extractor/normal.py b/test/testdata/yt_dlp_plugins/extractor/normal.py new file mode 100644 index 0000000000..b09009bdc6 --- /dev/null +++ b/test/testdata/yt_dlp_plugins/extractor/normal.py @@ -0,0 +1,9 @@ +from yt_dlp.extractor.common import InfoExtractor + + +class NormalPluginIE(InfoExtractor): + pass + + +class _IgnoreUnderscorePluginIE(InfoExtractor): + pass diff --git a/test/testdata/yt_dlp_plugins/postprocessor/normal.py b/test/testdata/yt_dlp_plugins/postprocessor/normal.py new file mode 100644 index 0000000000..315b85a488 --- /dev/null +++ b/test/testdata/yt_dlp_plugins/postprocessor/normal.py @@ -0,0 +1,5 @@ +from yt_dlp.postprocessor.common import PostProcessor + + +class NormalPluginPP(PostProcessor): + pass diff --git a/test/testdata/zipped_plugins/yt_dlp_plugins/extractor/zipped.py b/test/testdata/zipped_plugins/yt_dlp_plugins/extractor/zipped.py new file mode 100644 index 0000000000..01542e0d8d --- /dev/null +++ b/test/testdata/zipped_plugins/yt_dlp_plugins/extractor/zipped.py @@ -0,0 +1,5 @@ +from yt_dlp.extractor.common import InfoExtractor + + +class ZippedPluginIE(InfoExtractor): + pass diff --git a/test/testdata/zipped_plugins/yt_dlp_plugins/postprocessor/zipped.py b/test/testdata/zipped_plugins/yt_dlp_plugins/postprocessor/zipped.py new file mode 100644 index 0000000000..223822bd6f --- /dev/null +++ b/test/testdata/zipped_plugins/yt_dlp_plugins/postprocessor/zipped.py @@ -0,0 +1,5 @@ +from yt_dlp.postprocessor.common import PostProcessor + + +class ZippedPluginPP(PostProcessor): + pass diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 8d28783d86..1fb44e7f9e 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -32,7 +32,8 @@ from .extractor.common import UnsupportedURLIE from .extractor.openload import PhantomJSwrapper from .minicurses import format_text -from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors +from .plugins import directories as plugin_directories +from .postprocessor import _PLUGIN_CLASSES as plugin_pps from .postprocessor import ( EmbedThumbnailPP, FFmpegFixupDuplicateMoovPP, @@ -317,6 +318,7 @@ class YoutubeDL: If not provided and the key is encrypted, yt-dlp will ask interactively prefer_insecure: Use HTTP instead of HTTPS to retrieve information. (Only supported by some extractors) + enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons. http_headers: A dictionary of custom headers to be used for all requests proxy: URL of the proxy server to use geo_verification_proxy: URL of the proxy to use for IP address verification @@ -584,7 +586,6 @@ def __init__(self, params=None, auto_init=True): self._playlist_urls = set() self.cache = Cache(self) - windows_enable_vt_mode() stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout self._out_files = Namespace( out=stdout, @@ -593,6 +594,12 @@ def __init__(self, params=None, auto_init=True): console=None if compat_os_name == 'nt' else next( filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None) ) + + try: + windows_enable_vt_mode() + except Exception as e: + self.write_debug(f'Failed to enable VT mode: {e}') + self._allow_colors = Namespace(**{ type_: not self.params.get('no_color') and supports_terminal_sequences(stream) for type_, stream in self._out_files.items_ if type_ != 'console' @@ -1068,7 +1075,7 @@ def _outtmpl_expandpath(outtmpl): # correspondingly that is not what we want since we need to keep # '%%' intact for template dict substitution step. Working around # with boundary-alike separator hack. - sep = ''.join([random.choice(ascii_letters) for _ in range(32)]) + sep = ''.join(random.choices(ascii_letters, k=32)) outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$') # outtmpl should be expand_path'ed before template dict substitution @@ -1626,8 +1633,8 @@ def process_ie_result(self, ie_result, download=True, extra_info=None): if result_type in ('url', 'url_transparent'): ie_result['url'] = sanitize_url( ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https') - if ie_result.get('original_url'): - extra_info.setdefault('original_url', ie_result['original_url']) + if ie_result.get('original_url') and not extra_info.get('original_url'): + extra_info = {'original_url': ie_result['original_url'], **extra_info} extract_flat = self.params.get('extract_flat', False) if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) @@ -1862,11 +1869,10 @@ def __process_playlist(self, ie_result, download): self.to_screen('[download] Downloading item %s of %s' % ( self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS))) - extra.update({ + entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({ 'playlist_index': playlist_index, 'playlist_autonumber': i + 1, - }) - entry_result = self.__process_iterable_entry(entry, download, extra) + }, extra)) if not entry_result: failures += 1 if failures >= max_failures: @@ -2977,6 +2983,16 @@ def process_info(self, info_dict): # Does nothing under normal operation - for backward compatibility of process_info self.post_extract(info_dict) + + def replace_info_dict(new_info): + nonlocal info_dict + if new_info == info_dict: + return + info_dict.clear() + info_dict.update(new_info) + + new_info, _ = self.pre_process(info_dict, 'video') + replace_info_dict(new_info) self._num_downloads += 1 # info_dict['_filename'] needs to be set for backward compatibility @@ -3090,13 +3106,6 @@ def _write_link_file(link_type): for link_type, should_write in write_links.items()): return - def replace_info_dict(new_info): - nonlocal info_dict - if new_info == info_dict: - return - info_dict.clear() - info_dict.update(new_info) - new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move) replace_info_dict(new_info) @@ -3388,6 +3397,7 @@ def sanitize_info(info_dict, remove_private_keys=False): reject = lambda k, v: v is None or k.startswith('__') or k in { 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber', + '_format_sort_fields', } else: reject = lambda k, v: False @@ -3457,7 +3467,8 @@ def run_pp(self, pp, infodict): return infodict def run_all_pps(self, key, info, *, additional_pps=None): - self._forceprint(key, info) + if key != 'video': + self._forceprint(key, info) for pp in (additional_pps or []) + self._pps[key]: info = self.run_pp(pp, info) return info @@ -3726,7 +3737,10 @@ def print_debug_header(self): # These imports can be slow. So import them only as needed from .extractor.extractors import _LAZY_LOADER - from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors + from .extractor.extractors import ( + _PLUGIN_CLASSES as plugin_ies, + _PLUGIN_OVERRIDES as plugin_ie_overrides + ) def get_encoding(stream): ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)) @@ -3771,10 +3785,6 @@ def get_encoding(stream): write_debug('Lazy loading extractors is forcibly disabled') else: write_debug('Lazy loading extractors is disabled') - if plugin_extractors or plugin_postprocessors: - write_debug('Plugins: %s' % [ - '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}') - for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())]) if self.params['compat_opts']: write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts'])) @@ -3808,6 +3818,21 @@ def get_encoding(stream): proxy_map.update(handler.proxies) write_debug(f'Proxy map: {proxy_map}') + for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items(): + display_list = ['%s%s' % ( + klass.__name__, '' if klass.__name__ == name else f' as {name}') + for name, klass in plugins.items()] + if plugin_type == 'Extractor': + display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})' + for parent, plugins in plugin_ie_overrides.items()) + if not display_list: + continue + write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}') + + plugin_dirs = plugin_directories() + if plugin_dirs: + write_debug(f'Plugin directories: {plugin_dirs}') + # Not implemented if False and self.params.get('call_home'): ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode() @@ -3857,9 +3882,12 @@ def _setup_opener(self): # https://github.com/ytdl-org/youtube-dl/issues/8227) file_handler = urllib.request.FileHandler() - def file_open(*args, **kwargs): - raise urllib.error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons') - file_handler.file_open = file_open + if not self.params.get('enable_file_urls'): + def file_open(*args, **kwargs): + raise urllib.error.URLError( + 'file:// URLs are explicitly disabled in yt-dlp for security reasons. ' + 'Use --enable-file-urls to enable at your own risk.') + file_handler.file_open = file_open opener = urllib.request.build_opener( proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler) @@ -3921,7 +3949,7 @@ def _write_description(self, label, ie_result, descfn): elif not self.params.get('overwrites', True) and os.path.exists(descfn): self.to_screen(f'[info] {label.title()} description is already present') elif ie_result.get('description') is None: - self.report_warning(f'There\'s no {label} description to write') + self.to_screen(f'[info] There\'s no {label} description to write') return False else: try: @@ -3937,15 +3965,18 @@ def _write_subtitles(self, info_dict, filename): ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error''' ret = [] subtitles = info_dict.get('requested_subtitles') - if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')): + if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')): # subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE return ret - + elif not subtitles: + self.to_screen('[info] There\'s no subtitles for the requested languages') + return ret sub_filename_base = self.prepare_filename(info_dict, 'subtitle') if not sub_filename_base: self.to_screen('[info] Skipping writing video subtitles') return ret + for sub_lang, sub_info in subtitles.items(): sub_format = sub_info['ext'] sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext')) @@ -3992,6 +4023,9 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None thumbnails, ret = [], [] if write_all or self.params.get('writethumbnail', False): thumbnails = info_dict.get('thumbnails') or [] + if not thumbnails: + self.to_screen(f'[info] There\'s no {label} thumbnails to download') + return ret multiple = write_all and len(thumbnails) > 1 if thumb_filename_base is None: diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 202f102ba9..df1a54138d 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -332,7 +332,7 @@ def parse_chapters(name, value): mobj = range_ != '-' and re.fullmatch(r'([^-]+)?\s*-\s*([^-]+)?', range_) dur = mobj and (parse_timestamp(mobj.group(1) or '0'), parse_timestamp(mobj.group(2) or 'inf')) if None in (dur or [None]): - raise ValueError(f'invalid {name} time range "{regex}". Must be of the form *start-end') + raise ValueError(f'invalid {name} time range "{regex}". Must be of the form "*start-end"') ranges.append(dur) continue try: @@ -386,10 +386,12 @@ def metadataparser_actions(f): raise ValueError(f'{cmd} is invalid; {err}') yield action - parse_metadata = opts.parse_metadata or [] if opts.metafromtitle is not None: - parse_metadata.append('title:%s' % opts.metafromtitle) - opts.parse_metadata = list(itertools.chain(*map(metadataparser_actions, parse_metadata))) + opts.parse_metadata.setdefault('pre_process', []).append('title:%s' % opts.metafromtitle) + opts.parse_metadata = { + k: list(itertools.chain(*map(metadataparser_actions, v))) + for k, v in opts.parse_metadata.items() + } # Other options if opts.playlist_items is not None: @@ -561,11 +563,11 @@ def report_deprecation(val, old, new=None): def get_postprocessors(opts): yield from opts.add_postprocessors - if opts.parse_metadata: + for when, actions in opts.parse_metadata.items(): yield { 'key': 'MetadataParser', - 'actions': opts.parse_metadata, - 'when': 'pre_process' + 'actions': actions, + 'when': when } sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove if sponsorblock_query: @@ -701,7 +703,7 @@ def parse_options(argv=None): postprocessors = list(get_postprocessors(opts)) - print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[2:]) + print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[3:]) any_getting = any(getattr(opts, k) for k in ( 'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename', 'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl' @@ -853,6 +855,7 @@ def parse_options(argv=None): 'legacyserverconnect': opts.legacy_server_connect, 'nocheckcertificate': opts.no_check_certificate, 'prefer_insecure': opts.prefer_insecure, + 'enable_file_urls': opts.enable_file_urls, 'http_headers': opts.headers, 'proxy': opts.proxy, 'socket_timeout': opts.socket_timeout, diff --git a/yt_dlp/cache.py b/yt_dlp/cache.py index 4f9fb78d37..7be91eae5d 100644 --- a/yt_dlp/cache.py +++ b/yt_dlp/cache.py @@ -5,6 +5,7 @@ import re import shutil import traceback +import urllib.parse from .utils import expand_path, traverse_obj, version_tuple, write_json_file from .version import __version__ @@ -22,11 +23,9 @@ def _get_root_dir(self): return expand_path(res) def _get_cache_fn(self, section, key, dtype): - assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \ - 'invalid section %r' % section - assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key - return os.path.join( - self._get_root_dir(), section, f'{key}.{dtype}') + assert re.match(r'^[\w.-]+$', section), f'invalid section {section!r}' + key = urllib.parse.quote(key, safe='').replace('%', ',') # encode non-ascii characters + return os.path.join(self._get_root_dir(), section, f'{key}.{dtype}') @property def enabled(self): diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 5751383712..3917af448a 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -1,9 +1,11 @@ import enum +import json import os.path import re import subprocess import sys import time +import uuid from .fragment import FragmentFD from ..compat import functools @@ -20,8 +22,10 @@ determine_ext, encodeArgument, encodeFilename, + find_available_port, handle_youtubedl_headers, remove_end, + sanitized_Request, traverse_obj, ) @@ -60,7 +64,6 @@ def real_download(self, filename, info_dict): } if filename != '-': fsize = os.path.getsize(encodeFilename(tmpfilename)) - self.to_screen(f'\r[{self.get_basename()}] Downloaded {fsize} bytes') self.try_rename(tmpfilename, filename) status.update({ 'downloaded_bytes': fsize, @@ -129,8 +132,7 @@ def _call_downloader(self, tmpfilename, info_dict): self._debug_cmd(cmd) if 'fragments' not in info_dict: - _, stderr, returncode = Popen.run( - cmd, text=True, stderr=subprocess.PIPE if self._CAPTURE_STDERR else None) + _, stderr, returncode = self._call_process(cmd, info_dict) if returncode and stderr: self.to_stderr(stderr) return returncode @@ -140,7 +142,7 @@ def _call_downloader(self, tmpfilename, info_dict): retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry, frag_index=None, fatal=not skip_unavailable_fragments) for retry in retry_manager: - _, stderr, returncode = Popen.run(cmd, text=True, stderr=subprocess.PIPE) + _, stderr, returncode = self._call_process(cmd, info_dict) if not returncode: break # TODO: Decide whether to retry based on error code @@ -172,6 +174,9 @@ def _call_downloader(self, tmpfilename, info_dict): self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename)) return 0 + def _call_process(self, cmd, info_dict): + return Popen.run(cmd, text=True, stderr=subprocess.PIPE) + class CurlFD(ExternalFD): AVAILABLE_OPT = '-V' @@ -256,6 +261,15 @@ def supports_manifest(manifest): def _aria2c_filename(fn): return fn if os.path.isabs(fn) else f'.{os.path.sep}{fn}' + def _call_downloader(self, tmpfilename, info_dict): + # FIXME: Disabled due to https://github.com/yt-dlp/yt-dlp/issues/5931 + if False and 'no-external-downloader-progress' not in self.params.get('compat_opts', []): + info_dict['__rpc'] = { + 'port': find_available_port() or 19190, + 'secret': str(uuid.uuid4()), + } + return super()._call_downloader(tmpfilename, info_dict) + def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-c', '--console-log-level=warn', '--summary-interval=0', '--download-result=hide', @@ -276,6 +290,12 @@ def _make_cmd(self, tmpfilename, info_dict): cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=') cmd += self._configuration_args() + if '__rpc' in info_dict: + cmd += [ + '--enable-rpc', + f'--rpc-listen-port={info_dict["__rpc"]["port"]}', + f'--rpc-secret={info_dict["__rpc"]["secret"]}'] + # aria2c strips out spaces from the beginning/end of filenames and paths. # We work around this issue by adding a "./" to the beginning of the # filename and relative path, and adding a "/" at the end of the path. @@ -304,6 +324,88 @@ def _make_cmd(self, tmpfilename, info_dict): cmd += ['--', info_dict['url']] return cmd + def aria2c_rpc(self, rpc_port, rpc_secret, method, params=()): + # Does not actually need to be UUID, just unique + sanitycheck = str(uuid.uuid4()) + d = json.dumps({ + 'jsonrpc': '2.0', + 'id': sanitycheck, + 'method': method, + 'params': [f'token:{rpc_secret}', *params], + }).encode('utf-8') + request = sanitized_Request( + f'http://localhost:{rpc_port}/jsonrpc', + data=d, headers={ + 'Content-Type': 'application/json', + 'Content-Length': f'{len(d)}', + 'Ytdl-request-proxy': '__noproxy__', + }) + with self.ydl.urlopen(request) as r: + resp = json.load(r) + assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server' + return resp['result'] + + def _call_process(self, cmd, info_dict): + if '__rpc' not in info_dict: + return super()._call_process(cmd, info_dict) + + send_rpc = functools.partial(self.aria2c_rpc, info_dict['__rpc']['port'], info_dict['__rpc']['secret']) + started = time.time() + + fragmented = 'fragments' in info_dict + frag_count = len(info_dict['fragments']) if fragmented else 1 + status = { + 'filename': info_dict.get('_filename'), + 'status': 'downloading', + 'elapsed': 0, + 'downloaded_bytes': 0, + 'fragment_count': frag_count if fragmented else None, + 'fragment_index': 0 if fragmented else None, + } + self._hook_progress(status, info_dict) + + def get_stat(key, *obj, average=False): + val = tuple(filter(None, map(float, traverse_obj(obj, (..., ..., key))))) or [0] + return sum(val) / (len(val) if average else 1) + + with Popen(cmd, text=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) as p: + # Add a small sleep so that RPC client can receive response, + # or the connection stalls infinitely + time.sleep(0.2) + retval = p.poll() + while retval is None: + # We don't use tellStatus as we won't know the GID without reading stdout + # Ref: https://aria2.github.io/manual/en/html/aria2c.html#aria2.tellActive + active = send_rpc('aria2.tellActive') + completed = send_rpc('aria2.tellStopped', [0, frag_count]) + + downloaded = get_stat('totalLength', completed) + get_stat('completedLength', active) + speed = get_stat('downloadSpeed', active) + total = frag_count * get_stat('totalLength', active, completed, average=True) + if total < downloaded: + total = None + + status.update({ + 'downloaded_bytes': int(downloaded), + 'speed': speed, + 'total_bytes': None if fragmented else total, + 'total_bytes_estimate': total, + 'eta': (total - downloaded) / (speed or 1), + 'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None, + 'elapsed': time.time() - started + }) + self._hook_progress(status, info_dict) + + if not active and len(completed) >= frag_count: + send_rpc('aria2.shutdown') + retval = p.wait() + break + + time.sleep(0.1) + retval = p.poll() + + return '', p.stderr.read(), retval + class HttpieFD(ExternalFD): AVAILABLE_OPT = '--version' diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 1677d38e25..d9d59c234d 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -79,6 +79,7 @@ ) from .airmozilla import AirMozillaIE from .airtv import AirTVIE +from .aitube import AitubeKZVideoIE from .aljazeera import AlJazeeraIE from .alphaporno import AlphaPornoIE from .amara import AmaraIE @@ -87,7 +88,10 @@ AluraCourseIE ) from .amcnetworks import AMCNetworksIE -from .amazon import AmazonStoreIE +from .amazon import ( + AmazonStoreIE, + AmazonReviewsIE, +) from .amazonminitv import ( AmazonMiniTVIE, AmazonMiniTVSeasonIE, @@ -184,6 +188,10 @@ from .beeg import BeegIE from .behindkink import BehindKinkIE from .bellmedia import BellMediaIE +from .beatbump import ( + BeatBumpVideoIE, + BeatBumpPlaylistIE, +) from .beatport import BeatportIE from .berufetv import BerufeTVIE from .bet import BetIE @@ -467,6 +475,8 @@ from .drtv import ( DRTVIE, DRTVLiveIE, + DRTVSeasonIE, + DRTVSeriesIE, ) from .dtube import DTubeIE from .dvtv import DVTVIE @@ -827,6 +837,7 @@ from .kakao import KakaoIE from .kaltura import KalturaIE from .kanal2 import Kanal2IE +from .kankanews import KankaNewsIE from .karaoketv import KaraoketvIE from .karrierevideos import KarriereVideosIE from .keezmovies import KeezMoviesIE @@ -836,6 +847,10 @@ KhanAcademyIE, KhanAcademyUnitIE, ) +from .kick import ( + KickIE, + KickVODIE, +) from .kicker import KickerIE from .kickstarter import KickStarterIE from .kinja import KinjaEmbedIE @@ -1155,6 +1170,7 @@ from .netverse import ( NetverseIE, NetversePlaylistIE, + NetverseSearchIE, ) from .newgrounds import ( NewgroundsIE, @@ -1405,6 +1421,8 @@ from .polsatgo import PolsatGoIE from .polskieradio import ( PolskieRadioIE, + PolskieRadioLegacyIE, + PolskieRadioAuditionIE, PolskieRadioCategoryIE, PolskieRadioPlayerIE, PolskieRadioPodcastIE, @@ -1535,7 +1553,10 @@ ) from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE from .rottentomatoes import RottenTomatoesIE -from .rozhlas import RozhlasIE +from .rozhlas import ( + RozhlasIE, + RozhlasVltavaIE, +) from .rte import RteIE, RteRadioIE from .rtlnl import ( RtlNlIE, @@ -1693,6 +1714,7 @@ SoundcloudSetIE, SoundcloudRelatedIE, SoundcloudUserIE, + SoundcloudUserPermalinkIE, SoundcloudTrackStationIE, SoundcloudPlaylistIE, SoundcloudSearchIE, @@ -1854,6 +1876,11 @@ from .thisamericanlife import ThisAmericanLifeIE from .thisav import ThisAVIE from .thisoldhouse import ThisOldHouseIE +from .thisvid import ( + ThisVidIE, + ThisVidMemberIE, + ThisVidPlaylistIE, +) from .threespeak import ( ThreeSpeakIE, ThreeSpeakUserIE, @@ -1866,6 +1893,7 @@ TikTokEffectIE, TikTokTagIE, TikTokVMIE, + TikTokLiveIE, DouyinIE, ) from .tinypic import TinyPicIE @@ -1903,6 +1931,7 @@ TrovoChannelVodIE, TrovoChannelClipIE, ) +from .trtcocuk import TrtCocukVideoIE from .trueid import TrueIDIE from .trunews import TruNewsIE from .truth import TruthIE @@ -2086,6 +2115,13 @@ ) from .videodetective import VideoDetectiveIE from .videofyme import VideofyMeIE +from .videoken import ( + VideoKenIE, + VideoKenPlayerIE, + VideoKenPlaylistIE, + VideoKenCategoryIE, + VideoKenTopicIE, +) from .videomore import ( VideomoreIE, VideomoreVideoIE, @@ -2153,6 +2189,7 @@ VoicyIE, VoicyChannelIE, ) +from .volejtv import VolejTVIE from .voot import ( VootIE, VootSeriesIE, @@ -2235,6 +2272,7 @@ WSJArticleIE, ) from .wwe import WWEIE +from .xanimu import XanimuIE from .xbef import XBefIE from .xboxclips import XboxClipsIE from .xfileshare import XFileShareIE diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py index e0c18c8773..f1f55e87fc 100644 --- a/yt_dlp/extractor/adn.py +++ b/yt_dlp/extractor/adn.py @@ -168,7 +168,7 @@ def _real_extract(self, url): }, data=b'')['token'] links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link') - self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)]) + self._K = ''.join(random.choices('0123456789abcdef', k=16)) message = bytes_to_intlist(json.dumps({ 'k': self._K, 't': token, diff --git a/yt_dlp/extractor/aitube.py b/yt_dlp/extractor/aitube.py new file mode 100644 index 0000000000..89a64503fb --- /dev/null +++ b/yt_dlp/extractor/aitube.py @@ -0,0 +1,60 @@ +from .common import InfoExtractor +from ..utils import int_or_none, merge_dicts + + +class AitubeKZVideoIE(InfoExtractor): + _VALID_URL = r'https?://aitube\.kz/(?:video|embed/)\?(?:[^\?]+)?id=(?P[\w-]+)' + _TESTS = [{ + # id paramater as first parameter + 'url': 'https://aitube.kz/video?id=9291d29b-c038-49a1-ad42-3da2051d353c&playlistId=d55b1f5f-ef2a-4f23-b646-2a86275b86b7&season=1', + 'info_dict': { + 'id': '9291d29b-c038-49a1-ad42-3da2051d353c', + 'ext': 'mp4', + 'duration': 2174.0, + 'channel_id': '94962f73-013b-432c-8853-1bd78ca860fe', + 'like_count': int, + 'channel': 'ASTANA TV', + 'comment_count': int, + 'view_count': int, + 'description': 'Смотреть любимые сериалы и видео, поделиться видео и сериалами с друзьями и близкими', + 'thumbnail': 'https://cdn.static02.aitube.kz/kz.aitudala.aitube.staticaccess/files/ddf2a2ff-bee3-409b-b5f2-2a8202bba75b', + 'upload_date': '20221102', + 'timestamp': 1667370519, + 'title': 'Ангел хранитель 1 серия', + 'channel_follower_count': int, + } + }, { + # embed url + 'url': 'https://aitube.kz/embed/?id=9291d29b-c038-49a1-ad42-3da2051d353c', + 'only_matching': True, + }, { + # id parameter is not as first paramater + 'url': 'https://aitube.kz/video?season=1&id=9291d29b-c038-49a1-ad42-3da2051d353c&playlistId=d55b1f5f-ef2a-4f23-b646-2a86275b86b7', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + nextjs_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['videoInfo'] + json_ld_data = self._search_json_ld(webpage, video_id) + + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + f'https://api-http.aitube.kz/kz.aitudala.aitube.staticaccess/video/{video_id}/video', video_id) + + return merge_dicts({ + 'id': video_id, + 'title': nextjs_data.get('title') or self._html_search_meta(['name', 'og:title'], webpage), + 'description': nextjs_data.get('description'), + 'formats': formats, + 'subtitles': subtitles, + 'view_count': (nextjs_data.get('viewCount') + or int_or_none(self._html_search_meta('ya:ovs:views_total', webpage))), + 'like_count': nextjs_data.get('likeCount'), + 'channel': nextjs_data.get('channelTitle'), + 'channel_id': nextjs_data.get('channelId'), + 'thumbnail': nextjs_data.get('coverUrl'), + 'comment_count': nextjs_data.get('commentCount'), + 'channel_follower_count': int_or_none(nextjs_data.get('channelSubscriberCount')), + }, json_ld_data) diff --git a/yt_dlp/extractor/amazon.py b/yt_dlp/extractor/amazon.py index 4d3170683a..a03f983e0e 100644 --- a/yt_dlp/extractor/amazon.py +++ b/yt_dlp/extractor/amazon.py @@ -1,5 +1,17 @@ +import re + from .common import InfoExtractor -from ..utils import ExtractorError, int_or_none +from ..utils import ( + ExtractorError, + clean_html, + float_or_none, + get_element_by_attribute, + get_element_by_class, + int_or_none, + js_to_json, + traverse_obj, + url_or_none, +) class AmazonStoreIE(InfoExtractor): @@ -9,7 +21,7 @@ class AmazonStoreIE(InfoExtractor): 'url': 'https://www.amazon.co.uk/dp/B098XNCHLD/', 'info_dict': { 'id': 'B098XNCHLD', - 'title': 'md5:dae240564cbb2642170c02f7f0d7e472', + 'title': str, }, 'playlist_mincount': 1, 'playlist': [{ @@ -20,28 +32,32 @@ class AmazonStoreIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 34, }, - }] + }], + 'expected_warnings': ['Unable to extract data'], }, { 'url': 'https://www.amazon.in/Sony-WH-1000XM4-Cancelling-Headphones-Bluetooth/dp/B0863TXGM3', 'info_dict': { 'id': 'B0863TXGM3', - 'title': 'md5:d1d3352428f8f015706c84b31e132169', + 'title': str, }, 'playlist_mincount': 4, + 'expected_warnings': ['Unable to extract data'], }, { 'url': 'https://www.amazon.com/dp/B0845NXCXF/', 'info_dict': { 'id': 'B0845NXCXF', - 'title': 'md5:f3fa12779bf62ddb6a6ec86a360a858e', + 'title': str, }, 'playlist-mincount': 1, + 'expected_warnings': ['Unable to extract data'], }, { 'url': 'https://www.amazon.es/Samsung-Smartphone-s-AMOLED-Quad-c%C3%A1mara-espa%C3%B1ola/dp/B08WX337PQ', 'info_dict': { 'id': 'B08WX337PQ', - 'title': 'md5:f3fa12779bf62ddb6a6ec86a360a858e', + 'title': str, }, 'playlist_mincount': 1, + 'expected_warnings': ['Unable to extract data'], }] def _real_extract(self, url): @@ -52,7 +68,7 @@ def _real_extract(self, url): try: data_json = self._search_json( r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', id, - transform_source=lambda x: x.replace(R'\\u', R'\u')) + transform_source=js_to_json) except ExtractorError as e: retry.error = e @@ -66,3 +82,89 @@ def _real_extract(self, url): 'width': int_or_none(video.get('videoWidth')), } for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')] return self.playlist_result(entries, playlist_id=id, playlist_title=data_json.get('title')) + + +class AmazonReviewsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/gp/customer-reviews/(?P[^/&#$?]+)' + _TESTS = [{ + 'url': 'https://www.amazon.com/gp/customer-reviews/R10VE9VUSY19L3/ref=cm_cr_arp_d_rvw_ttl', + 'info_dict': { + 'id': 'R10VE9VUSY19L3', + 'ext': 'mp4', + 'title': 'Get squad #Suspicious', + 'description': 'md5:7012695052f440a1e064e402d87e0afb', + 'uploader': 'Kimberly Cronkright', + 'average_rating': 1.0, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'expected_warnings': ['Review body was not found in webpage'], + }, { + 'url': 'https://www.amazon.com/gp/customer-reviews/R10VE9VUSY19L3/ref=cm_cr_arp_d_rvw_ttl?language=es_US', + 'info_dict': { + 'id': 'R10VE9VUSY19L3', + 'ext': 'mp4', + 'title': 'Get squad #Suspicious', + 'description': 'md5:7012695052f440a1e064e402d87e0afb', + 'uploader': 'Kimberly Cronkright', + 'average_rating': 1.0, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'expected_warnings': ['Review body was not found in webpage'], + }, { + 'url': 'https://www.amazon.in/gp/customer-reviews/RV1CO8JN5VGXV/', + 'info_dict': { + 'id': 'RV1CO8JN5VGXV', + 'ext': 'mp4', + 'title': 'Not sure about its durability', + 'description': 'md5:1a252c106357f0a3109ebf37d2e87494', + 'uploader': 'Shoaib Gulzar', + 'average_rating': 2.0, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'expected_warnings': ['Review body was not found in webpage'], + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + for retry in self.RetryManager(): + webpage = self._download_webpage(url, video_id) + review_body = get_element_by_attribute('data-hook', 'review-body', webpage) + if not review_body: + retry.error = ExtractorError('Review body was not found in webpage', expected=True) + + formats, subtitles = [], {} + + manifest_url = self._search_regex( + r'data-video-url="([^"]+)"', review_body, 'm3u8 url', default=None) + if url_or_none(manifest_url): + fmts, subtitles = self._extract_m3u8_formats_and_subtitles( + manifest_url, video_id, 'mp4', fatal=False) + formats.extend(fmts) + + video_url = self._search_regex( + r']+\bvalue="([^"]+)"[^>]+\bclass="video-url"', review_body, 'mp4 url', default=None) + if url_or_none(video_url): + formats.append({ + 'url': video_url, + 'ext': 'mp4', + 'format_id': 'http-mp4', + }) + + if not formats: + self.raise_no_formats('No video found for this customer review', expected=True) + + return { + 'id': video_id, + 'title': (clean_html(get_element_by_attribute('data-hook', 'review-title', webpage)) + or self._html_extract_title(webpage)), + 'description': clean_html(traverse_obj(re.findall( + r'(.+?)', review_body), -1)), + 'uploader': clean_html(get_element_by_class('a-profile-name', webpage)), + 'average_rating': float_or_none(clean_html(get_element_by_attribute( + 'data-hook', 'review-star-rating', webpage) or '').partition(' ')[0]), + 'thumbnail': self._search_regex( + r'data-thumbnail-url="([^"]+)"', review_body, 'thumbnail', default=None), + 'formats': formats, + 'subtitles': subtitles, + } diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py index 0a8a8746ab..8660741ce4 100644 --- a/yt_dlp/extractor/ard.py +++ b/yt_dlp/extractor/ard.py @@ -46,6 +46,9 @@ def _parse_media_info(self, media_info, video_id, fsk): subtitles['de'] = [{ 'ext': 'ttml', 'url': subtitle_url, + }, { + 'ext': 'vtt', + 'url': subtitle_url.replace('/ebutt/', '/webvtt/') + '.vtt', }] return { @@ -286,16 +289,16 @@ def _real_extract(self, url): class ARDIE(InfoExtractor): _VALID_URL = r'(?Phttps?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P[^/?#&]+))\.html' _TESTS = [{ - # available till 7.01.2022 - 'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html', - 'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1', + # available till 7.12.2023 + 'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html', + 'md5': 'a438f671e87a7eba04000336a119ccc4', 'info_dict': { - 'id': 'maischberger-die-woche-video100', - 'display_id': 'maischberger-die-woche-video100', + 'id': 'maischberger-video-424', + 'display_id': 'maischberger-video-424', 'ext': 'mp4', - 'duration': 3687.0, - 'title': 'maischberger. die woche vom 7. Januar 2021', - 'upload_date': '20210107', + 'duration': 4452.0, + 'title': 'maischberger am 07.12.2022', + 'upload_date': '20221207', 'thumbnail': r're:^https?://.*\.jpg$', }, }, { diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index 54e4d2d0ce..e3cc5afb05 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -65,6 +65,21 @@ class ArteTVIE(ArteTVBaseIE): }, { 'url': 'https://api.arte.tv/api/player/v2/config/de/LIVE', 'only_matching': True, + }, { + 'url': 'https://www.arte.tv/de/videos/110203-006-A/zaz/', + 'info_dict': { + 'id': '110203-006-A', + 'chapters': 'count:16', + 'description': 'md5:cf592f1df52fe52007e3f8eac813c084', + 'alt_title': 'Zaz', + 'title': 'Baloise Session 2022', + 'timestamp': 1668445200, + 'duration': 4054, + 'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/ubQjmVCGyRx3hmBuZEK9QZ/940x530', + 'upload_date': '20221114', + 'ext': 'mp4', + }, + 'expected_warnings': ['geo restricted'] }] _GEO_BYPASS = True @@ -180,10 +195,6 @@ def _real_extract(self, url): else: self.report_warning(f'Skipping stream with unknown protocol {stream["protocol"]}') - # TODO: chapters from stream['segments']? - # The JS also looks for chapters in config['data']['attributes']['chapters'], - # but I am yet to find a video having those - formats.extend(secondary_formats) self._remove_duplicate_formats(formats) @@ -205,6 +216,11 @@ def _real_extract(self, url): {'url': image['url'], 'id': image.get('caption')} for image in metadata.get('images') or [] if url_or_none(image.get('url')) ], + # TODO: chapters may also be in stream['segments']? + 'chapters': traverse_obj(config, ('data', 'attributes', 'chapters', 'elements', ..., { + 'start_time': 'startTime', + 'title': 'title', + })) or None, } diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py index de81e0de7b..e89b3a69b3 100644 --- a/yt_dlp/extractor/bandcamp.py +++ b/yt_dlp/extractor/bandcamp.py @@ -29,11 +29,18 @@ class BandcampIE(InfoExtractor): 'info_dict': { 'id': '1812978515', 'ext': 'mp3', - 'title': "youtube-dl \"'/\\ä↭ - youtube-dl \"'/\\ä↭ - youtube-dl test song \"'/\\ä↭", + 'title': 'youtube-dl "\'/\\ä↭ - youtube-dl "\'/\\ä↭ - youtube-dl test song "\'/\\ä↭', 'duration': 9.8485, - 'uploader': 'youtube-dl "\'/\\ä↭', + 'uploader': 'youtube-dl "\'/\\ä↭', 'upload_date': '20121129', 'timestamp': 1354224127, + 'track': 'youtube-dl "\'/\\ä↭ - youtube-dl test song "\'/\\ä↭', + 'album_artist': 'youtube-dl "\'/\\ä↭', + 'track_id': '1812978515', + 'artist': 'youtube-dl "\'/\\ä↭', + 'uploader_url': 'https://youtube-dl.bandcamp.com', + 'uploader_id': 'youtube-dl', + 'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg', }, '_skip': 'There is a limit of 200 free downloads / month for the test song' }, { @@ -41,7 +48,8 @@ class BandcampIE(InfoExtractor): 'url': 'http://benprunty.bandcamp.com/track/lanius-battle', 'info_dict': { 'id': '2650410135', - 'ext': 'aiff', + 'ext': 'm4a', + 'acodec': r're:[fa]lac', 'title': 'Ben Prunty - Lanius (Battle)', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Ben Prunty', @@ -54,7 +62,10 @@ class BandcampIE(InfoExtractor): 'track_number': 1, 'track_id': '2650410135', 'artist': 'Ben Prunty', + 'album_artist': 'Ben Prunty', 'album': 'FTL: Advanced Edition Soundtrack', + 'uploader_url': 'https://benprunty.bandcamp.com', + 'uploader_id': 'benprunty', }, }, { # no free download, mp3 128 @@ -75,7 +86,34 @@ class BandcampIE(InfoExtractor): 'track_number': 5, 'track_id': '2584466013', 'artist': 'Mastodon', + 'album_artist': 'Mastodon', 'album': 'Call of the Mastodon', + 'uploader_url': 'https://relapsealumni.bandcamp.com', + 'uploader_id': 'relapsealumni', + }, + }, { + # track from compilation album (artist/album_artist difference) + 'url': 'https://diskotopia.bandcamp.com/track/safehouse', + 'md5': '19c5337bca1428afa54129f86a2f6a69', + 'info_dict': { + 'id': '1978174799', + 'ext': 'mp3', + 'title': 'submerse - submerse - Safehouse', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'submerse', + 'timestamp': 1480779297, + 'upload_date': '20161203', + 'release_timestamp': 1481068800, + 'release_date': '20161207', + 'duration': 154.066, + 'track': 'submerse - Safehouse', + 'track_number': 3, + 'track_id': '1978174799', + 'artist': 'submerse', + 'album_artist': 'Diskotopia', + 'album': 'DSK F/W 2016-2017 Free Compilation', + 'uploader_url': 'https://diskotopia.bandcamp.com', + 'uploader_id': 'diskotopia', }, }] @@ -121,6 +159,9 @@ def _real_extract(self, url): embed = self._extract_data_attr(webpage, title, 'embed', False) current = tralbum.get('current') or {} artist = embed.get('artist') or current.get('artist') or tralbum.get('artist') + album_artist = self._html_search_regex( + r'

[\S\s]*?by\s*\s*\s*([^>]+?)\s*', + webpage, 'album artist', fatal=False) timestamp = unified_timestamp( current.get('publish_date') or tralbum.get('album_publish_date')) @@ -205,6 +246,7 @@ def _real_extract(self, url): 'track_id': track_id, 'artist': artist, 'album': embed.get('album_title'), + 'album_artist': album_artist, 'formats': formats, } diff --git a/yt_dlp/extractor/beatbump.py b/yt_dlp/extractor/beatbump.py new file mode 100644 index 0000000000..0f40ebe7ac --- /dev/null +++ b/yt_dlp/extractor/beatbump.py @@ -0,0 +1,101 @@ +from .common import InfoExtractor +from .youtube import YoutubeIE, YoutubeTabIE + + +class BeatBumpVideoIE(InfoExtractor): + _VALID_URL = r'https://beatbump\.ml/listen\?id=(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://beatbump.ml/listen?id=MgNrAu2pzNs', + 'md5': '5ff3fff41d3935b9810a9731e485fe66', + 'info_dict': { + 'id': 'MgNrAu2pzNs', + 'ext': 'mp4', + 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA', + 'artist': 'Stephen', + 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp', + 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA', + 'upload_date': '20190312', + 'categories': ['Music'], + 'playable_in_embed': True, + 'duration': 169, + 'like_count': int, + 'alt_title': 'Voyeur Girl', + 'view_count': int, + 'track': 'Voyeur Girl', + 'uploader': 'Stephen - Topic', + 'title': 'Voyeur Girl', + 'channel_follower_count': int, + 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA', + 'age_limit': 0, + 'availability': 'public', + 'live_status': 'not_live', + 'album': 'it\'s too much love to know my dear', + 'channel': 'Stephen', + 'comment_count': int, + 'description': 'md5:7ae382a65843d6df2685993e90a8628f', + 'tags': 'count:11', + 'creator': 'Stephen', + 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA', + } + }] + + def _real_extract(self, url): + id_ = self._match_id(url) + return self.url_result(f'https://music.youtube.com/watch?v={id_}', YoutubeIE, id_) + + +class BeatBumpPlaylistIE(InfoExtractor): + _VALID_URL = r'https://beatbump\.ml/(?:release\?id=|artist/|playlist/)(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://beatbump.ml/release?id=MPREb_gTAcphH99wE', + 'playlist_count': 50, + 'info_dict': { + 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0', + 'availability': 'unlisted', + 'view_count': int, + 'title': 'Album - Royalty Free Music Library V2 (50 Songs)', + 'description': '', + 'tags': [], + 'modified_date': '20221223', + } + }, { + 'url': 'https://beatbump.ml/artist/UC_aEa8K-EOJ3D6gOs7HcyNg', + 'playlist_mincount': 1, + 'params': {'flatplaylist': True}, + 'info_dict': { + 'id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', + 'uploader_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg', + 'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg', + 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', + 'channel_follower_count': int, + 'title': 'NoCopyrightSounds - Videos', + 'uploader': 'NoCopyrightSounds', + 'description': 'md5:cd4fd53d81d363d05eee6c1b478b491a', + 'channel': 'NoCopyrightSounds', + 'tags': 'count:12', + 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', + }, + }, { + 'url': 'https://beatbump.ml/playlist/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', + 'playlist_mincount': 1, + 'params': {'flatplaylist': True}, + 'info_dict': { + 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', + 'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds', + 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!', + 'view_count': int, + 'channel_url': 'https://www.youtube.com/@NoCopyrightSounds', + 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', + 'title': 'NCS : All Releases 💿', + 'uploader': 'NoCopyrightSounds', + 'availability': 'public', + 'channel': 'NoCopyrightSounds', + 'tags': [], + 'modified_date': '20221225', + 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', + } + }] + + def _real_extract(self, url): + id_ = self._match_id(url) + return self.url_result(f'https://music.youtube.com/browse/{id_}', YoutubeTabIE, id_) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index bc0424194f..d4b05248f3 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -16,13 +16,16 @@ format_field, int_or_none, make_archive_id, + merge_dicts, mimetype2ext, parse_count, parse_qs, qualities, + smuggle_url, srt_subtitles_timecode, str_or_none, traverse_obj, + unsmuggle_url, url_or_none, urlencode_postdata, ) @@ -303,7 +306,8 @@ def _real_extract(self, url): getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}') if is_anthology: - title += f' p{part_id:02d} {traverse_obj(page_list_json, ((part_id or 1) - 1, "part")) or ""}' + part_id = part_id or 1 + title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}' aid = video_data.get('aid') old_video_id = format_field(aid, None, f'%s_part{part_id or 1}') @@ -880,16 +884,12 @@ def _get_formats(self, *, ep_id=None, aid=None): return formats - def _extract_video_info(self, video_data, *, ep_id=None, aid=None): + def _parse_video_metadata(self, video_data): return { - 'id': ep_id or aid, 'title': video_data.get('title_display') or video_data.get('title'), 'thumbnail': video_data.get('cover'), 'episode_number': int_or_none(self._search_regex( r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)), - 'formats': self._get_formats(ep_id=ep_id, aid=aid), - 'subtitles': self._get_subtitles(ep_id=ep_id, aid=aid), - 'extractor_key': BiliIntlIE.ie_key(), } def _perform_login(self, username, password): @@ -935,6 +935,10 @@ class BiliIntlIE(BiliIntlBaseIE): 'title': 'E2 - The First Night', 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$', 'episode_number': 2, + 'upload_date': '20201009', + 'episode': 'Episode 2', + 'timestamp': 1602259500, + 'description': 'md5:297b5a17155eb645e14a14b385ab547e', } }, { # Non-Bstation page @@ -945,6 +949,10 @@ class BiliIntlIE(BiliIntlBaseIE): 'title': 'E3 - Who?', 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$', 'episode_number': 3, + 'description': 'md5:e1a775e71a35c43f141484715470ad09', + 'episode': 'Episode 3', + 'upload_date': '20211219', + 'timestamp': 1639928700, } }, { # Subtitle with empty content @@ -957,6 +965,17 @@ class BiliIntlIE(BiliIntlBaseIE): 'episode_number': 140, }, 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.' + }, { + 'url': 'https://www.bilibili.tv/en/video/2041863208', + 'info_dict': { + 'id': '2041863208', + 'ext': 'mp4', + 'timestamp': 1670874843, + 'description': 'Scheduled for April 2023.\nStudio: ufotable', + 'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$', + 'upload_date': '20221212', + 'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation', + } }, { 'url': 'https://www.biliintl.com/en/play/34613/341736', 'only_matching': True, @@ -974,42 +993,78 @@ class BiliIntlIE(BiliIntlBaseIE): 'only_matching': True, }] - def _real_extract(self, url): - season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid') - video_id = ep_id or aid + def _make_url(video_id, series_id=None): + if series_id: + return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}' + return f'https://www.bilibili.tv/en/video/{video_id}' + + def _extract_video_metadata(self, url, video_id, season_id): + url, smuggled_data = unsmuggle_url(url, {}) + if smuggled_data.get('title'): + return smuggled_data + webpage = self._download_webpage(url, video_id) # Bstation layout initial_data = ( self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={}) or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None)) video_data = traverse_obj( - initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) + initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {} if season_id and not video_data: # Non-Bstation layout, read through episode list season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id) - video_data = traverse_obj(season_json, - ('sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == ep_id), - expected_type=dict, get_all=False) - return self._extract_video_info(video_data or {}, ep_id=ep_id, aid=aid) + video_data = traverse_obj(season_json, ( + 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id + ), expected_type=dict, get_all=False) + + # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found + return merge_dicts( + self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id), { + 'title': self._html_search_meta('og:title', webpage), + 'description': self._html_search_meta('og:description', webpage) + }) + + def _real_extract(self, url): + season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid') + video_id = ep_id or aid + + return { + 'id': video_id, + **self._extract_video_metadata(url, video_id, season_id), + 'formats': self._get_formats(ep_id=ep_id, aid=aid), + 'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid), + } class BiliIntlSeriesIE(BiliIntlBaseIE): - _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?play/(?P\d+)/?(?:[?#]|$)' + IE_NAME = 'biliIntl:series' + _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P\d+)/?(?:[?#]|$)' _TESTS = [{ 'url': 'https://www.bilibili.tv/en/play/34613', 'playlist_mincount': 15, 'info_dict': { 'id': '34613', - 'title': 'Fly Me to the Moon', - 'description': 'md5:a861ee1c4dc0acfad85f557cc42ac627', - 'categories': ['Romance', 'Comedy', 'Slice of life'], + 'title': 'TONIKAWA: Over the Moon For You', + 'description': 'md5:297b5a17155eb645e14a14b385ab547e', + 'categories': ['Slice of life', 'Comedy', 'Romance'], 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$', 'view_count': int, }, 'params': { 'skip_download': True, }, + }, { + 'url': 'https://www.bilibili.tv/en/media/1048837', + 'info_dict': { + 'id': '1048837', + 'title': 'SPY×FAMILY', + 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17', + 'categories': ['Adventure', 'Action', 'Comedy'], + 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$', + 'view_count': int, + }, + 'playlist_mincount': 25, }, { 'url': 'https://www.biliintl.com/en/play/34613', 'only_matching': True, @@ -1020,9 +1075,12 @@ class BiliIntlSeriesIE(BiliIntlBaseIE): def _entries(self, series_id): series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id) - for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict, default=[]): - episode_id = str(episode.get('episode_id')) - yield self._extract_video_info(episode, ep_id=episode_id) + for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict): + episode_id = str(episode['episode_id']) + yield self.url_result(smuggle_url( + BiliIntlIE._make_url(episode_id, series_id), + self._parse_video_metadata(episode) + ), BiliIntlIE, episode_id) def _real_extract(self, url): series_id = self._match_id(url) @@ -1034,7 +1092,7 @@ def _real_extract(self, url): class BiliLiveIE(InfoExtractor): - _VALID_URL = r'https?://live.bilibili.com/(?P\d+)' + _VALID_URL = r'https?://live.bilibili.com/(?:blanc/)?(?P\d+)' _TESTS = [{ 'url': 'https://live.bilibili.com/196', @@ -1050,6 +1108,9 @@ class BiliLiveIE(InfoExtractor): }, { 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click', 'only_matching': True + }, { + 'url': 'https://live.bilibili.com/blanc/196', + 'only_matching': True }] _FORMATS = { @@ -1111,6 +1172,7 @@ def _real_extract(self, url): 'thumbnail': room_data.get('user_cover'), 'timestamp': stream_data.get('live_time'), 'formats': formats, + 'is_live': True, 'http_headers': { 'Referer': url, }, diff --git a/yt_dlp/extractor/cda.py b/yt_dlp/extractor/cda.py index d1212e686a..1157114b2a 100644 --- a/yt_dlp/extractor/cda.py +++ b/yt_dlp/extractor/cda.py @@ -4,6 +4,7 @@ import hashlib import hmac import json +import random import re from .common import InfoExtractor @@ -27,11 +28,10 @@ class CDAIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P[0-9a-z]+)' _NETRC_MACHINE = 'cdapl' - _BASE_URL = 'http://www.cda.pl/' + _BASE_URL = 'https://www.cda.pl' _BASE_API_URL = 'https://api.cda.pl' _API_HEADERS = { 'Accept': 'application/vnd.cda.public+json', - 'User-Agent': 'pl.cda 1.0 (version 1.2.88 build 15306; Android 9; Xiaomi Redmi 3S)', } # hardcoded in the app _LOGIN_REQUEST_AUTH = 'Basic YzU3YzBlZDUtYTIzOC00MWQwLWI2NjQtNmZmMWMxY2Y2YzVlOklBTm95QlhRRVR6U09MV1hnV3MwMW0xT2VyNWJNZzV4clRNTXhpNGZJUGVGZ0lWUlo5UGVYTDhtUGZaR1U1U3Q' @@ -101,6 +101,38 @@ def _download_age_confirm_page(self, url, video_id, *args, **kwargs): }, **kwargs) def _perform_login(self, username, password): + app_version = random.choice(( + '1.2.88 build 15306', + '1.2.174 build 18469', + )) + android_version = random.randrange(8, 14) + phone_model = random.choice(( + # x-kom.pl top selling Android smartphones, as of 2022-12-26 + # https://www.x-kom.pl/g-4/c/1590-smartfony-i-telefony.html?f201-system-operacyjny=61322-android + 'ASUS ZenFone 8', + 'Motorola edge 20 5G', + 'Motorola edge 30 neo 5G', + 'Motorola moto g22', + 'OnePlus Nord 2T 5G', + 'Samsung Galaxy A32 SM‑A325F', + 'Samsung Galaxy M13', + 'Samsung Galaxy S20 FE 5G', + 'Xiaomi 11T', + 'Xiaomi POCO M4 Pro', + 'Xiaomi Redmi 10', + 'Xiaomi Redmi 10C', + 'Xiaomi Redmi 9C NFC', + 'Xiaomi Redmi Note 10 Pro', + 'Xiaomi Redmi Note 11 Pro', + 'Xiaomi Redmi Note 11', + 'Xiaomi Redmi Note 11S 5G', + 'Xiaomi Redmi Note 11S', + 'realme 10', + 'realme 9 Pro+', + 'vivo Y33s', + )) + self._API_HEADERS['User-Agent'] = f'pl.cda 1.0 (version {app_version}; Android {android_version}; {phone_model})' + cached_bearer = self.cache.load(self._BEARER_CACHE, username) or {} if cached_bearer.get('valid_until', 0) > datetime.datetime.now().timestamp() + 5: self._API_HEADERS['Authorization'] = f'Bearer {cached_bearer["token"]}' @@ -138,9 +170,6 @@ def _api_extract(self, video_id): meta = self._download_json( f'{self._BASE_API_URL}/video/{video_id}', video_id, headers=self._API_HEADERS)['video'] - if meta.get('premium') and not meta.get('premium_free'): - self.report_drm(video_id) - uploader = traverse_obj(meta, 'author', 'login') formats = [{ @@ -151,6 +180,10 @@ def _api_extract(self, video_id): 'filesize': quality.get('length'), } for quality in meta['qualities'] if quality.get('file')] + if meta.get('premium') and not meta.get('premium_free') and not formats: + raise ExtractorError( + 'Video requires CDA Premium - subscription needed', expected=True) + return { 'id': video_id, 'title': meta.get('title'), @@ -167,10 +200,10 @@ def _api_extract(self, video_id): def _web_extract(self, video_id, url): self._set_cookie('cda.pl', 'cda.player', 'html5') webpage = self._download_webpage( - self._BASE_URL + '/video/' + video_id, video_id) + f'{self._BASE_URL}/video/{video_id}/vfilm', video_id) if 'Ten film jest dostępny dla użytkowników premium' in webpage: - raise ExtractorError('This video is only available for premium users.', expected=True) + self.raise_login_required('This video is only available for premium users') if re.search(r'niedostępn[ey] w(?: |\s+)Twoim kraju\s*<', webpage): self.raise_geo_restricted() diff --git a/yt_dlp/extractor/ciscowebex.py b/yt_dlp/extractor/ciscowebex.py index 44595d854c..0fcf022820 100644 --- a/yt_dlp/extractor/ciscowebex.py +++ b/yt_dlp/extractor/ciscowebex.py @@ -1,5 +1,6 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, int_or_none, try_get, unified_timestamp, @@ -38,11 +39,30 @@ def _real_extract(self, url): siteurl = mobj.group('siteurl_1') or mobj.group('siteurl_2') video_id = mobj.group('id') - stream = self._download_json( + password = self.get_param('videopassword') + + headers = {'Accept': 'application/json'} + if password: + headers['accessPwd'] = password + + stream, urlh = self._download_json_handle( 'https://%s.webex.com/webappng/api/v1/recordings/%s/stream' % (subdomain, video_id), - video_id, fatal=False, query={'siteurl': siteurl}) - if not stream: - self.raise_login_required(method='cookies') + video_id, headers=headers, query={'siteurl': siteurl}, expected_status=(403, 429)) + + if urlh.status == 403: + if stream['code'] == 53004: + self.raise_login_required() + if stream['code'] == 53005: + if password: + raise ExtractorError('Wrong password', expected=True) + raise ExtractorError( + 'This video is protected by a password, use the --video-password option', expected=True) + raise ExtractorError(f'{self.IE_NAME} said: {stream["code"]} - {stream["message"]}', expected=True) + + if urlh.status == 429: + self.raise_login_required( + f'{self.IE_NAME} asks you to solve a CAPTCHA. Solve CAPTCHA in browser and', + method='cookies') video_id = stream.get('recordUUID') or video_id @@ -78,7 +98,7 @@ def _real_extract(self, url): 'title': stream['recordName'], 'description': stream.get('description'), 'uploader': stream.get('ownerDisplayName'), - 'uploader_id': stream.get('ownerUserName') or stream.get('ownerId'), # mail or id + 'uploader_id': stream.get('ownerUserName') or stream.get('ownerId'), 'timestamp': unified_timestamp(stream.get('createTime')), 'duration': int_or_none(stream.get('duration'), 1000), 'webpage_url': 'https://%s.webex.com/recordingservice/sites/%s/recording/playback/%s' % (subdomain, siteurl, video_id), diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index a4483576e7..857c01908f 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1267,10 +1267,9 @@ def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=Tr Like _search_regex, but strips HTML tags and unescapes entities. """ res = self._search_regex(pattern, string, name, default, fatal, flags, group) - if res: - return clean_html(res).strip() - else: - return res + if isinstance(res, tuple): + return tuple(map(clean_html, res)) + return clean_html(res) def _get_netrc_login_info(self, netrc_machine=None): username = None @@ -1401,10 +1400,16 @@ def _rta_search(html): # And then there are the jokers who advertise that they use RTA, but actually don't. AGE_LIMIT_MARKERS = [ r'Proudly Labeled RTA', + r'>[^<]*you acknowledge you are at least (\d+) years old', + r'>\s*(?:18\s+U(?:\.S\.C\.|SC)\s+)?(?:§+\s*)?2257\b', ] - if any(re.search(marker, html) for marker in AGE_LIMIT_MARKERS): - return 18 - return 0 + + age_limit = 0 + for marker in AGE_LIMIT_MARKERS: + mobj = re.search(marker, html) + if mobj: + age_limit = max(age_limit, int(traverse_obj(mobj, 1, default=18))) + return age_limit def _media_rating_search(self, html): # See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/ @@ -1764,6 +1769,9 @@ def _sleep(self, timeout, video_id, msg_template=None): def _extract_f4m_formats(self, manifest_url, video_id, preference=None, quality=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), fatal=True, m3u8_id=None, data=None, headers={}, query={}): + if self.get_param('ignore_no_formats_error'): + fatal = False + res = self._download_xml_handle( manifest_url, video_id, 'Downloading f4m manifest', 'Unable to download f4m manifest', @@ -1913,6 +1921,9 @@ def _extract_m3u8_formats_and_subtitles( errnote=None, fatal=True, live=False, data=None, headers={}, query={}): + if self.get_param('ignore_no_formats_error'): + fatal = False + if not m3u8_url: if errnote is not False: errnote = errnote or 'Failed to obtain m3u8 URL' @@ -2192,6 +2203,9 @@ def _xpath_ns(path, namespace=None): return '/'.join(out) def _extract_smil_formats_and_subtitles(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None): + if self.get_param('ignore_no_formats_error'): + fatal = False + res = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source) if res is False: assert not fatal @@ -2467,6 +2481,10 @@ def _extract_mpd_formats(self, *args, **kwargs): def _extract_mpd_formats_and_subtitles( self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): + + if self.get_param('ignore_no_formats_error'): + fatal = False + res = self._download_xml_handle( mpd_url, video_id, note='Downloading MPD manifest' if note is None else note, @@ -2836,6 +2854,9 @@ def _extract_ism_formats(self, *args, **kwargs): return fmts def _extract_ism_formats_and_subtitles(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): + if self.get_param('ignore_no_formats_error'): + fatal = False + res = self._download_xml_handle( ism_url, video_id, note='Downloading ISM manifest' if note is None else note, @@ -3205,7 +3226,7 @@ def manifest_url(manifest): def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json): mobj = re.search( - r'(?s)jwplayer\((?P[\'"])[^\'" ]+(?P=quote)\)(?!).*?\.setup\s*\((?P[^)]+)\)', + r'''(?s)jwplayer\s*\(\s*(?P'|")(?!(?P=q)).+(?P=q)\s*\)(?!).*?\.\s*setup\s*\(\s*(?P(?:\([^)]*\)|[^)])+)\s*\)''', webpage) if mobj: try: @@ -3226,19 +3247,20 @@ def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs): def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): - # JWPlayer backward compatibility: flattened playlists - # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 - if 'playlist' not in jwplayer_data: - jwplayer_data = {'playlist': [jwplayer_data]} - entries = [] + if not isinstance(jwplayer_data, dict): + return entries - # JWPlayer backward compatibility: single playlist item + playlist_items = jwplayer_data.get('playlist') + # JWPlayer backward compatibility: single playlist item/flattened playlists # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10 - if not isinstance(jwplayer_data['playlist'], list): - jwplayer_data['playlist'] = [jwplayer_data['playlist']] + # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 + if not isinstance(playlist_items, list): + playlist_items = (playlist_items or jwplayer_data, ) - for video_data in jwplayer_data['playlist']: + for video_data in playlist_items: + if not isinstance(video_data, dict): + continue # JWPlayer backward compatibility: flattened sources # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35 if 'sources' not in video_data: @@ -3276,6 +3298,13 @@ def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, 'timestamp': int_or_none(video_data.get('pubdate')), 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')), 'subtitles': subtitles, + 'alt_title': clean_html(video_data.get('subtitle')), # attributes used e.g. by Tele5 ... + 'genre': clean_html(video_data.get('genre')), + 'channel': clean_html(dict_get(video_data, ('category', 'channel'))), + 'season_number': int_or_none(video_data.get('season')), + 'episode_number': int_or_none(video_data.get('episode')), + 'release_year': int_or_none(video_data.get('releasedate')), + 'age_limit': int_or_none(video_data.get('age_restriction')), } # https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32 if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']): @@ -3293,7 +3322,7 @@ def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None, m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): - urls = [] + urls = set() formats = [] for source in jwplayer_sources_data: if not isinstance(source, dict): @@ -3302,14 +3331,14 @@ def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None, base_url, self._proto_relative_url(source.get('file'))) if not source_url or source_url in urls: continue - urls.append(source_url) + urls.add(source_url) source_type = source.get('type') or '' ext = mimetype2ext(source_type) or determine_ext(source_url) - if source_type == 'hls' or ext == 'm3u8': + if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url: formats.extend(self._extract_m3u8_formats( source_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=m3u8_id, fatal=False)) - elif source_type == 'dash' or ext == 'mpd': + elif source_type == 'dash' or ext == 'mpd' or 'format=mpd-time-csf' in source_url: formats.extend(self._extract_mpd_formats( source_url, video_id, mpd_id=mpd_id, fatal=False)) elif ext == 'smil': @@ -3324,13 +3353,12 @@ def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None, 'ext': ext, }) else: + format_id = str_or_none(source.get('label')) height = int_or_none(source.get('height')) - if height is None: + if height is None and format_id: # Often no height is provided but there is a label in # format like "1080p", "720p SD", or 1080. - height = int_or_none(self._search_regex( - r'^(\d{3,4})[pP]?(?:\b|$)', str(source.get('label') or ''), - 'height', default=None)) + height = parse_resolution(format_id).get('height') a_format = { 'url': source_url, 'width': int_or_none(source.get('width')), @@ -3338,6 +3366,7 @@ def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None, 'tbr': int_or_none(source.get('bitrate'), scale=1000), 'filesize': int_or_none(source.get('filesize')), 'ext': ext, + 'format_id': format_id } if source_url.startswith('rtmp'): a_format['ext'] = 'flv' @@ -3431,13 +3460,17 @@ def get_testcases(cls, include_onlymatching=False): continue t['name'] = cls.ie_key() yield t + if getattr(cls, '__wrapped__', None): + yield from cls.__wrapped__.get_testcases(include_onlymatching) @classmethod def get_webpage_testcases(cls): tests = vars(cls).get('_WEBPAGE_TESTS', []) for t in tests: t['name'] = cls.ie_key() - return tests + yield t + if getattr(cls, '__wrapped__', None): + yield from cls.__wrapped__.get_webpage_testcases() @classproperty(cache=True) def age_limit(cls): @@ -3483,7 +3516,7 @@ def description(cls, *, markdown=True, search_examples=None): elif cls.IE_DESC: desc += f' {cls.IE_DESC}' if cls.SEARCH_KEY: - desc += f'; "{cls.SEARCH_KEY}:" prefix' + desc += f'{";" if cls.IE_DESC else ""} "{cls.SEARCH_KEY}:" prefix' if search_examples: _COUNTS = ('', '5', '10', 'all') desc += f' (e.g. "{cls.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(search_examples)}")' @@ -3699,10 +3732,12 @@ def __init_subclass__(cls, *, plugin_name=None, **kwargs): if plugin_name: mro = inspect.getmro(cls) super_class = cls.__wrapped__ = mro[mro.index(cls) + 1] - cls.IE_NAME, cls.ie_key = f'{super_class.IE_NAME}+{plugin_name}', super_class.ie_key + cls.PLUGIN_NAME, cls.ie_key = plugin_name, super_class.ie_key + cls.IE_NAME = f'{super_class.IE_NAME}+{plugin_name}' while getattr(super_class, '__wrapped__', None): super_class = super_class.__wrapped__ setattr(sys.modules[super_class.__module__], super_class.__name__, cls) + _PLUGIN_OVERRIDES[super_class].append(cls) return super().__init_subclass__(**kwargs) @@ -3759,3 +3794,6 @@ class UnsupportedURLIE(InfoExtractor): def _real_extract(self, url): raise UnsupportedError(url) + + +_PLUGIN_OVERRIDES = collections.defaultdict(list) diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py index ee344ce8be..836bcb622c 100644 --- a/yt_dlp/extractor/crunchyroll.py +++ b/yt_dlp/extractor/crunchyroll.py @@ -182,7 +182,7 @@ def _real_extract(self, url): self.to_screen( 'To get all formats of a hardsub language, use ' '"--extractor-args crunchyrollbeta:hardsub=". ' - 'See https://github.com/yt-dlp/yt-dlp#crunchyrollbeta for more info', + 'See https://github.com/yt-dlp/yt-dlp#crunchyrollbeta-crunchyroll for more info', only_once=True) else: full_format_langs = set(map(str.lower, available_formats)) @@ -291,7 +291,8 @@ def entries(): 'season_id': episode.get('season_id'), 'season_number': episode.get('season_number'), 'episode': episode.get('title'), - 'episode_number': episode.get('sequence_number') + 'episode_number': episode.get('sequence_number'), + 'language': episode.get('audio_locale'), } return self.playlist_result(entries(), internal_id, series_response.get('title')) diff --git a/yt_dlp/extractor/curiositystream.py b/yt_dlp/extractor/curiositystream.py index 26cf24fbbd..941cf4e79c 100644 --- a/yt_dlp/extractor/curiositystream.py +++ b/yt_dlp/extractor/curiositystream.py @@ -1,4 +1,5 @@ import re +import urllib.parse from .common import InfoExtractor from ..compat import compat_str @@ -23,7 +24,7 @@ def _call_api(self, path, video_id, query=None): auth_cookie = self._get_cookies('https://curiositystream.com').get('auth_token') if auth_cookie: self.write_debug('Obtained auth_token cookie') - self._auth_token = auth_cookie.value + self._auth_token = urllib.parse.unquote(auth_cookie.value) if self._auth_token: headers['X-Auth-Token'] = self._auth_token result = self._download_json( @@ -54,8 +55,11 @@ class CuriosityStreamIE(CuriosityStreamBaseIE): 'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.', 'channel': 'Curiosity Stream', 'categories': ['Technology', 'Interview'], - 'average_rating': 96.79, + 'average_rating': float, 'series_id': '2', + 'thumbnail': r're:https://img.curiositystream.com/.+\.jpg', + 'tags': [], + 'duration': 158 }, 'params': { # m3u8 download diff --git a/yt_dlp/extractor/discovery.py b/yt_dlp/extractor/discovery.py index fd3fc8fb0f..e6e109d5c5 100644 --- a/yt_dlp/extractor/discovery.py +++ b/yt_dlp/extractor/discovery.py @@ -78,7 +78,7 @@ def _real_extract(self, url): 'Downloading token JSON metadata', query={ 'authRel': 'authorization', 'client_id': '3020a40c2356a645b4b4', - 'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]), + 'nonce': ''.join(random.choices(string.ascii_letters, k=32)), 'redirectUri': 'https://www.discovery.com/', })['access_token'] diff --git a/yt_dlp/extractor/drtv.py b/yt_dlp/extractor/drtv.py index 128f439145..d3e197551d 100644 --- a/yt_dlp/extractor/drtv.py +++ b/yt_dlp/extractor/drtv.py @@ -2,22 +2,24 @@ import hashlib import re - from .common import InfoExtractor from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 from ..compat import compat_urllib_parse_unquote from ..utils import ( ExtractorError, - int_or_none, float_or_none, + int_or_none, mimetype2ext, str_or_none, + traverse_obj, try_get, unified_timestamp, update_url_query, url_or_none, ) +SERIES_API = 'https://production-cdn.dr-massive.com/api/page?device=web_browser&item_detail_expand=all&lang=da&max_list_prefetch=3&path=%s' + class DRTVIE(InfoExtractor): _VALID_URL = r'''(?x) @@ -141,13 +143,13 @@ class DRTVIE(InfoExtractor): }] def _real_extract(self, url): - video_id = self._match_id(url) + raw_video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage(url, raw_video_id) if '>Programmet er ikke længere tilgængeligt' in webpage: raise ExtractorError( - 'Video %s is not available' % video_id, expected=True) + 'Video %s is not available' % raw_video_id, expected=True) video_id = self._search_regex( (r'data-(?:material-identifier|episode-slug)="([^"]+)"', @@ -182,6 +184,10 @@ def _real_extract(self, url): data = self._download_json( programcard_url, video_id, 'Downloading video JSON', query=query) + supplementary_data = self._download_json( + SERIES_API % f'/episode/{raw_video_id}', raw_video_id, + default={}) if re.search(r'_\d+$', raw_video_id) else {} + title = str_or_none(data.get('Title')) or re.sub( r'\s*\|\s*(?:TV\s*\|\s*DR|DRTV)$', '', self._og_search_title(webpage)) @@ -313,8 +319,8 @@ def decrypt_uri(e): 'season': str_or_none(data.get('SeasonTitle')), 'season_number': int_or_none(data.get('SeasonNumber')), 'season_id': str_or_none(data.get('SeasonUrn')), - 'episode': str_or_none(data.get('EpisodeTitle')), - 'episode_number': int_or_none(data.get('EpisodeNumber')), + 'episode': traverse_obj(supplementary_data, ('entries', 0, 'item', 'contextualTitle')) or str_or_none(data.get('EpisodeTitle')), + 'episode_number': traverse_obj(supplementary_data, ('entries', 0, 'item', 'episodeNumber')) or int_or_none(data.get('EpisodeNumber')), 'release_year': int_or_none(data.get('ProductionYear')), } @@ -372,3 +378,92 @@ def _real_extract(self, url): 'formats': formats, 'is_live': True, } + + +class DRTVSeasonIE(InfoExtractor): + IE_NAME = 'drtv:season' + _VALID_URL = r'https?://(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/saeson/(?P[\w-]+)_(?P\d+)' + _GEO_COUNTRIES = ['DK'] + _TESTS = [{ + 'url': 'https://www.dr.dk/drtv/saeson/frank-and-kastaniegaarden_9008', + 'info_dict': { + 'id': '9008', + 'display_id': 'frank-and-kastaniegaarden', + 'title': 'Frank & Kastaniegaarden', + 'series': 'Frank & Kastaniegaarden', + }, + 'playlist_mincount': 8 + }, { + 'url': 'https://www.dr.dk/drtv/saeson/frank-and-kastaniegaarden_8761', + 'info_dict': { + 'id': '8761', + 'display_id': 'frank-and-kastaniegaarden', + 'title': 'Frank & Kastaniegaarden', + 'series': 'Frank & Kastaniegaarden', + }, + 'playlist_mincount': 19 + }] + + def _real_extract(self, url): + display_id, season_id = self._match_valid_url(url).group('display_id', 'id') + data = self._download_json(SERIES_API % f'/saeson/{display_id}_{season_id}', display_id) + + entries = [{ + '_type': 'url', + 'url': f'https://www.dr.dk/drtv{episode["path"]}', + 'ie_key': DRTVIE.ie_key(), + 'title': episode.get('title'), + 'episode': episode.get('episodeName'), + 'description': episode.get('shortDescription'), + 'series': traverse_obj(data, ('entries', 0, 'item', 'title')), + 'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber')), + 'episode_number': episode.get('episodeNumber'), + } for episode in traverse_obj(data, ('entries', 0, 'item', 'episodes', 'items'))] + + return { + '_type': 'playlist', + 'id': season_id, + 'display_id': display_id, + 'title': traverse_obj(data, ('entries', 0, 'item', 'title')), + 'series': traverse_obj(data, ('entries', 0, 'item', 'title')), + 'entries': entries, + 'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber')) + } + + +class DRTVSeriesIE(InfoExtractor): + IE_NAME = 'drtv:series' + _VALID_URL = r'https?://(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/serie/(?P[\w-]+)_(?P\d+)' + _GEO_COUNTRIES = ['DK'] + _TESTS = [{ + 'url': 'https://www.dr.dk/drtv/serie/frank-and-kastaniegaarden_6954', + 'info_dict': { + 'id': '6954', + 'display_id': 'frank-and-kastaniegaarden', + 'title': 'Frank & Kastaniegaarden', + 'series': 'Frank & Kastaniegaarden', + }, + 'playlist_mincount': 15 + }] + + def _real_extract(self, url): + display_id, series_id = self._match_valid_url(url).group('display_id', 'id') + data = self._download_json(SERIES_API % f'/serie/{display_id}_{series_id}', display_id) + + entries = [{ + '_type': 'url', + 'url': f'https://www.dr.dk/drtv{season.get("path")}', + 'ie_key': DRTVSeasonIE.ie_key(), + 'title': season.get('title'), + 'series': traverse_obj(data, ('entries', 0, 'item', 'title')), + 'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber')) + } for season in traverse_obj(data, ('entries', 0, 'item', 'show', 'seasons', 'items'))] + + return { + '_type': 'playlist', + 'id': series_id, + 'display_id': display_id, + 'title': traverse_obj(data, ('entries', 0, 'item', 'title')), + 'series': traverse_obj(data, ('entries', 0, 'item', 'title')), + 'entries': entries + } diff --git a/yt_dlp/extractor/embedly.py b/yt_dlp/extractor/embedly.py index 483d018bb4..1b58fca60f 100644 --- a/yt_dlp/extractor/embedly.py +++ b/yt_dlp/extractor/embedly.py @@ -1,24 +1,80 @@ import re import urllib.parse + from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote +from .youtube import YoutubeTabIE +from ..utils import parse_qs, smuggle_url, traverse_obj class EmbedlyIE(InfoExtractor): - _VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P[^#&]+)' + _VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?(?:src|url)=(?:[^#&]+)' _TESTS = [{ 'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1', + 'info_dict': { + 'id': 'UUGLim4T2loE5rwCMdpCIPVg', + 'modified_date': '20221225', + 'view_count': int, + 'uploader_url': 'https://www.youtube.com/@TraciHinesMusic', + 'channel_id': 'UCGLim4T2loE5rwCMdpCIPVg', + 'uploader': 'TraciJHines', + 'channel_url': 'https://www.youtube.com/@TraciHinesMusic', + 'channel': 'TraciJHines', + 'availability': 'public', + 'uploader_id': 'UCGLim4T2loE5rwCMdpCIPVg', + 'description': '', + 'tags': [], + 'title': 'Uploads from TraciJHines', + }, + 'playlist_mincount': 10, + }, { + 'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1', + 'params': {'noplaylist': True}, + 'info_dict': { + 'id': 'SU4fj_aEMVw', + 'ext': 'mp4', + 'title': 'I\'m on Patreon!', + 'age_limit': 0, + 'categories': ['Entertainment'], + 'thumbnail': 'https://i.ytimg.com/vi_webp/SU4fj_aEMVw/maxresdefault.webp', + 'live_status': 'not_live', + 'playable_in_embed': True, + 'channel': 'TraciJHines', + 'uploader_id': 'TraciJHines', + 'channel_url': 'https://www.youtube.com/channel/UCGLim4T2loE5rwCMdpCIPVg', + 'uploader_url': 'http://www.youtube.com/user/TraciJHines', + 'upload_date': '20150211', + 'duration': 282, + 'availability': 'public', + 'channel_follower_count': int, + 'tags': 'count:39', + 'view_count': int, + 'comment_count': int, + 'channel_id': 'UCGLim4T2loE5rwCMdpCIPVg', + 'like_count': int, + 'uploader': 'TraciJHines', + 'description': 'md5:8af6425f50bd46fbf29f3db0fc3a8364', + 'chapters': list, + + }, + }, { + 'url': 'https://cdn.embedly.com/widgets/media.html?src=https://player.vimeo.com/video/1234567?h=abcdefgh', 'only_matching': True, }] @classmethod - def _extract_embed_urls(cls, url, webpage): - # Bypass suitable check + def _extract_from_webpage(cls, url, webpage): + # Bypass "ie=cls" and suitable check for mobj in re.finditer(r'class=["\']embedly-card["\'][^>]href=["\'](?P[^"\']+)', webpage): - yield mobj.group('url') + yield cls.url_result(mobj.group('url')) for mobj in re.finditer(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P[^&]+)', webpage): - yield urllib.parse.unquote(mobj.group('url')) + yield cls.url_result(urllib.parse.unquote(mobj.group('url'))) def _real_extract(self, url): - return self.url_result(compat_urllib_parse_unquote(self._match_id(url))) + qs = parse_qs(url) + src = urllib.parse.unquote(traverse_obj(qs, ('url', 0)) or '') + if src and YoutubeTabIE.suitable(src): + return self.url_result(src, YoutubeTabIE) + return self.url_result(smuggle_url( + urllib.parse.unquote(traverse_obj(qs, ('src', 0), ('url', 0))), + {'http_headers': {'Referer': url}})) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 610e02f906..baa69d2421 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1,10 +1,10 @@ import contextlib import os -from ..utils import load_plugins +from ..plugins import load_plugins # NB: Must be before other imports so that plugins can be correctly injected -_PLUGIN_CLASSES = load_plugins('extractor', 'IE', {}) +_PLUGIN_CLASSES = load_plugins('extractor', 'IE') _LAZY_LOADER = False if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): @@ -24,3 +24,5 @@ globals().update(_PLUGIN_CLASSES) _ALL_CLASSES[:0] = _PLUGIN_CLASSES.values() + +from .common import _PLUGIN_OVERRIDES # noqa: F401 diff --git a/yt_dlp/extractor/fifa.py b/yt_dlp/extractor/fifa.py index dc00edcb31..8b4db3a8ae 100644 --- a/yt_dlp/extractor/fifa.py +++ b/yt_dlp/extractor/fifa.py @@ -17,8 +17,10 @@ class FifaIE(InfoExtractor): 'description': 'md5:f4520d0ee80529c8ba4134a7d692ff8b', 'ext': 'mp4', 'categories': ['FIFA Tournaments'], - 'thumbnail': 'https://digitalhub.fifa.com/transform/fa6f0b3e-a2e9-4cf7-9f32-53c57bcb7360/2006_Final_ITA_FRA', + 'thumbnail': 'https://digitalhub.fifa.com/transform/135e2656-3a51-407b-8810-6c34bec5b59b/FMR_2006_Italy_France_Final_Hero', 'duration': 8165, + 'release_timestamp': 1152403200, + 'release_date': '20060709', }, 'params': {'skip_download': 'm3u8'}, }, { @@ -54,7 +56,7 @@ def _real_extract(self, url): webpage = self._download_webpage(url, video_id) preconnect_link = self._search_regex( - r']+rel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link') + r']+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link') video_details = self._download_json( f'{preconnect_link}/sections/videoDetails/{video_id}', video_id, 'Downloading Video Details', fatal=False) @@ -62,22 +64,9 @@ def _real_extract(self, url): preplay_parameters = self._download_json( f'{preconnect_link}/videoPlayerData/{video_id}', video_id, 'Downloading Preplay Parameters')['preplayParameters'] - cid = preplay_parameters['contentId'] content_data = self._download_json( - f'https://content.uplynk.com/preplay/{cid}/multiple.json', video_id, 'Downloading Content Data', query={ - 'v': preplay_parameters['preplayAPIVersion'], - 'tc': preplay_parameters['tokenCheckAlgorithmVersion'], - 'rn': preplay_parameters['randomNumber'], - 'exp': preplay_parameters['tokenExpirationDate'], - 'ct': preplay_parameters['contentType'], - 'cid': cid, - 'mbtracks': preplay_parameters['tracksAssetNumber'], - 'ad': preplay_parameters['adConfiguration'], - 'ad.preroll': int(preplay_parameters['adPreroll']), - 'ad.cmsid': preplay_parameters['adCMSSourceId'], - 'ad.vid': preplay_parameters['adSourceVideoID'], - 'sig': preplay_parameters['signature'], - }) + 'https://content.uplynk.com/preplay/{contentId}/multiple.json?{queryStr}&sig={signature}'.format(**preplay_parameters), + video_id, 'Downloading Content Data') formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id) diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py index 18363c1b91..47c316664a 100644 --- a/yt_dlp/extractor/funimation.py +++ b/yt_dlp/extractor/funimation.py @@ -210,7 +210,7 @@ def _real_extract(self, url): page = self._download_json( 'https://www.funimation.com/api/showexperience/%s/' % experience_id, display_id, headers=headers, expected_status=403, query={ - 'pinst_id': ''.join([random.choice(string.digits + string.ascii_letters) for _ in range(8)]), + 'pinst_id': ''.join(random.choices(string.digits + string.ascii_letters, k=8)), }, note=f'Downloading {format_name} JSON') sources = page.get('items') or [] if not sources: diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 2281c71f3d..04677b23f1 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -32,6 +32,7 @@ unified_timestamp, unsmuggle_url, url_or_none, + urljoin, variadic, xpath_attr, xpath_text, @@ -1867,11 +1868,13 @@ class GenericIE(InfoExtractor): 'display_id': 'kelis-4th-of-july', 'ext': 'mp4', 'title': 'Kelis - 4th Of July', - 'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg', + 'description': 'Kelis - 4th Of July', + 'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg', }, 'params': { 'skip_download': True, }, + 'expected_warnings': ['Untested major version'], }, { # KVS Player 'url': 'https://www.kvs-demo.com/embed/105/', @@ -1880,35 +1883,12 @@ class GenericIE(InfoExtractor): 'display_id': 'kelis-4th-of-july', 'ext': 'mp4', 'title': 'Kelis - 4th Of July / Embed Player', - 'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg', + 'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg', }, 'params': { 'skip_download': True, }, }, { - # KVS Player - 'url': 'https://thisvid.com/videos/french-boy-pantsed/', - 'md5': '3397979512c682f6b85b3b04989df224', - 'info_dict': { - 'id': '2400174', - 'display_id': 'french-boy-pantsed', - 'ext': 'mp4', - 'title': 'French Boy Pantsed - ThisVid.com', - 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg', - } - }, { - # KVS Player - 'url': 'https://thisvid.com/embed/2400174/', - 'md5': '3397979512c682f6b85b3b04989df224', - 'info_dict': { - 'id': '2400174', - 'display_id': 'french-boy-pantsed', - 'ext': 'mp4', - 'title': 'French Boy Pantsed - ThisVid.com', - 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg', - } - }, { - # KVS Player 'url': 'https://youix.com/video/leningrad-zoj/', 'md5': '94f96ba95706dc3880812b27b7d8a2b8', 'info_dict': { @@ -1916,8 +1896,8 @@ class GenericIE(InfoExtractor): 'display_id': 'leningrad-zoj', 'ext': 'mp4', 'title': 'Клип: Ленинград - ЗОЖ скачать, смотреть онлайн | Youix.com', - 'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg', - } + 'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg', + }, }, { # KVS Player 'url': 'https://youix.com/embed/18485', @@ -1927,19 +1907,20 @@ class GenericIE(InfoExtractor): 'display_id': 'leningrad-zoj', 'ext': 'mp4', 'title': 'Ленинград - ЗОЖ', - 'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg', - } + 'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg', + }, }, { # KVS Player 'url': 'https://bogmedia.org/videos/21217/40-nochey-40-nights-2016/', 'md5': '94166bdb26b4cb1fb9214319a629fc51', 'info_dict': { 'id': '21217', - 'display_id': '40-nochey-40-nights-2016', + 'display_id': '40-nochey-2016', 'ext': 'mp4', 'title': '40 ночей (2016) - BogMedia.org', + 'description': 'md5:4e6d7d622636eb7948275432eb256dc3', 'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg', - } + }, }, { # KVS Player (for sites that serve kt_player.js via non-https urls) @@ -1949,9 +1930,9 @@ class GenericIE(InfoExtractor): 'id': '389508', 'display_id': 'syren-de-mer-onlyfans-05-07-2020have-a-happy-safe-holiday5f014e68a220979bdb8cd-source', 'ext': 'mp4', - 'title': 'Syren De Mer onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плеер', - 'thumbnail': 'http://www.camhub.world/contents/videos_screenshots/389000/389508/preview.mp4.jpg', - } + 'title': 'Syren De Mer onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плеер', + 'thumbnail': r're:https?://www\.camhub\.world/contents/videos_screenshots/389000/389508/preview\.mp4\.jpg', + }, }, { # Reddit-hosted video that will redirect and be processed by RedditIE @@ -2154,7 +2135,52 @@ class GenericIE(InfoExtractor): 'age_limit': 0, 'direct': True, } - } + }, + { + 'note': 'server returns data in brotli compression by default if `accept-encoding: *` is specified.', + 'url': 'https://www.extra.cz/cauky-lidi-70-dil-babis-predstavil-pohadky-prymulanek-nebo-andrejovy-nove-saty-ac867', + 'info_dict': { + 'id': 'cauky-lidi-70-dil-babis-predstavil-pohadky-prymulanek-nebo-andrejovy-nove-saty-ac867', + 'ext': 'mp4', + 'title': 'čauky lidi 70 finall', + 'description': 'čauky lidi 70 finall', + 'thumbnail': 'h', + 'upload_date': '20220606', + 'timestamp': 1654513791, + 'duration': 318.0, + 'direct': True, + 'age_limit': 0, + }, + }, + { + 'note': 'JW Player embed with unicode-escape sequences in URL', + 'url': 'https://www.medici.tv/en/concerts/lahav-shani-mozart-mahler-israel-philharmonic-abu-dhabi-classics', + 'info_dict': { + 'id': 'm', + 'ext': 'mp4', + 'title': 'Lahav Shani conducts the Israel Philharmonic\'s first-ever concert in Abu Dhabi', + 'description': 'Mahler\'s ', + 'uploader': 'www.medici.tv', + 'age_limit': 0, + 'thumbnail': r're:^https?://.+\.jpg', + }, + 'params': { + 'skip_download': True, + }, + }, + { + 'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/', + 'md5': 'e2f0a4c329f7986280b7328e24036d60', + 'info_dict': { + 'id': '284002', + 'display_id': 'just-out-of-the-shower-joi', + 'ext': 'mp4', + 'title': 'Just Out Of The Shower JOI - Shooshtime', + 'thumbnail': 'https://i.shoosh.co/contents/videos_screenshots/284000/284002/preview.mp4.jpg', + 'height': 720, + 'age_limit': 18, + }, + }, ] def report_following_redirect(self, new_url): @@ -2220,43 +2246,87 @@ def itunes(key): 'entries': entries, } - def _kvs_getrealurl(self, video_url, license_code): + @classmethod + def _kvs_get_real_url(cls, video_url, license_code): if not video_url.startswith('function/0/'): return video_url # not obfuscated - url_path, _, url_query = video_url.partition('?') - urlparts = url_path.split('/')[2:] - license = self._kvs_getlicensetoken(license_code) - newmagic = urlparts[5][:32] + parsed = urllib.parse.urlparse(video_url[len('function/0/'):]) + license = cls._kvs_get_license_token(license_code) + urlparts = parsed.path.split('/') - for o in range(len(newmagic) - 1, -1, -1): - new = '' - l = (o + sum(int(n) for n in license[o:])) % 32 + HASH_LENGTH = 32 + hash = urlparts[3][:HASH_LENGTH] + indices = list(range(HASH_LENGTH)) - for i in range(0, len(newmagic)): - if i == o: - new += newmagic[l] - elif i == l: - new += newmagic[o] - else: - new += newmagic[i] - newmagic = new + # Swap indices of hash according to the destination calculated from the license token + accum = 0 + for src in reversed(range(HASH_LENGTH)): + accum += license[src] + dest = (src + accum) % HASH_LENGTH + indices[src], indices[dest] = indices[dest], indices[src] - urlparts[5] = newmagic + urlparts[5][32:] - return '/'.join(urlparts) + '?' + url_query + urlparts[3] = ''.join(hash[index] for index in indices) + urlparts[3][HASH_LENGTH:] + return urllib.parse.urlunparse(parsed._replace(path='/'.join(urlparts))) - def _kvs_getlicensetoken(self, license): - modlicense = license.replace('$', '').replace('0', '1') - center = int(len(modlicense) / 2) + @staticmethod + def _kvs_get_license_token(license): + license = license.replace('$', '') + license_values = [int(char) for char in license] + + modlicense = license.replace('0', '1') + center = len(modlicense) // 2 fronthalf = int(modlicense[:center + 1]) backhalf = int(modlicense[center:]) + modlicense = str(4 * abs(fronthalf - backhalf))[:center + 1] - modlicense = str(4 * abs(fronthalf - backhalf)) - retval = '' - for o in range(0, center + 1): - for i in range(1, 5): - retval += str((int(license[o + i]) + int(modlicense[o])) % 10) - return retval + return [ + (license_values[index + offset] + current) % 10 + for index, current in enumerate(map(int, modlicense)) + for offset in range(4) + ] + + def _extract_kvs(self, url, webpage, video_id): + flashvars = self._search_json( + r'(?s:]*>.*?var\s+flashvars\s*=)', + webpage, 'flashvars', video_id, transform_source=js_to_json) + + # extract the part after the last / as the display_id from the + # canonical URL. + display_id = self._search_regex( + r'(?:' + r'|)', + webpage, 'display_id', fatal=False) + title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)', webpage, 'title') + + thumbnail = flashvars['preview_url'] + if thumbnail.startswith('//'): + protocol, _, _ = url.partition('/') + thumbnail = protocol + thumbnail + + url_keys = list(filter(re.compile(r'^video_(?:url|alt_url\d*)$').match, flashvars.keys())) + formats = [] + for key in url_keys: + if '/get_file/' not in flashvars[key]: + continue + format_id = flashvars.get(f'{key}_text', key) + formats.append({ + 'url': urljoin(url, self._kvs_get_real_url(flashvars[key], flashvars['license_code'])), + 'format_id': format_id, + 'ext': 'mp4', + **(parse_resolution(format_id) or parse_resolution(flashvars[key])), + 'http_headers': {'Referer': url}, + }) + if not formats[-1].get('height'): + formats[-1]['quality'] = 1 + + return { + 'id': flashvars['video_id'], + 'display_id': display_id, + 'title': title, + 'thumbnail': thumbnail, + 'formats': formats, + } def _real_extract(self, url): if url.startswith('//'): @@ -2312,7 +2382,7 @@ def _real_extract(self, url): # It may probably better to solve this by checking Content-Type for application/octet-stream # after a HEAD request, but not sure if we can rely on this. full_response = self._request_webpage(url, video_id, headers={ - 'Accept-Encoding': '*', + 'Accept-Encoding': 'identity', **smuggled_data.get('http_headers', {}) }) new_url = full_response.geturl() @@ -2565,6 +2635,17 @@ def _extract_embeds(self, url, webpage, *, urlh=None, info_dict={}): self.report_detected('video.js embed') return [{'formats': formats, 'subtitles': subtitles}] + # Look for generic KVS player (before json-ld bc of some urls that break otherwise) + found = self._search_regex(( + r']+?\bsrc\s*=\s*(["\'])https?://(?:\S+?/)+kt_player\.js\?v=(?P\d+(?:\.\d+)+)\1[^>]*>', + r'kt_player\s*\(\s*(["\'])(?:(?!\1)[\w\W])+\1\s*,\s*(["\'])https?://(?:\S+?/)+kt_player\.swf\?v=(?P\d+(?:\.\d+)+)\2\s*,', + ), webpage, 'KVS player', group='ver', default=False) + if found: + self.report_detected('KWS Player') + if found.split('.')[0] not in ('4', '5', '6'): + self.report_warning(f'Untested major version ({found}) in player engine - download may fail.') + return [self._extract_kvs(url, webpage, video_id)] + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld(webpage, video_id, default={}) if json_ld.get('url') not in (url, None): @@ -2607,52 +2688,6 @@ def filter_video(urls): ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage)) if found: self.report_detected('JW Player embed') - if not found: - # Look for generic KVS player - found = re.search(r'', webpage) - flashvars = self._parse_json(flashvars.group(1), video_id, transform_source=js_to_json) - - # extract the part after the last / as the display_id from the - # canonical URL. - display_id = self._search_regex( - r'(?:' - r'|)', - webpage, 'display_id', fatal=False - ) - title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)', webpage, 'title') - - thumbnail = flashvars['preview_url'] - if thumbnail.startswith('//'): - protocol, _, _ = url.partition('/') - thumbnail = protocol + thumbnail - - url_keys = list(filter(re.compile(r'video_url|video_alt_url\d*').fullmatch, flashvars.keys())) - formats = [] - for key in url_keys: - if '/get_file/' not in flashvars[key]: - continue - format_id = flashvars.get(f'{key}_text', key) - formats.append({ - 'url': self._kvs_getrealurl(flashvars[key], flashvars['license_code']), - 'format_id': format_id, - 'ext': 'mp4', - **(parse_resolution(format_id) or parse_resolution(flashvars[key])) - }) - if not formats[-1].get('height'): - formats[-1]['quality'] = 1 - - return [{ - 'id': flashvars['video_id'], - 'display_id': display_id, - 'title': title, - 'thumbnail': thumbnail, - 'formats': formats, - }] if not found: # Broaden the search a little bit found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)) @@ -2733,6 +2768,7 @@ def filter_video(urls): entries = [] for video_url in orderedSet(found): + video_url = video_url.encode().decode('unicode-escape') video_url = unescapeHTML(video_url) video_url = video_url.replace('\\/', '/') video_url = urllib.parse.urljoin(url, video_url) diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py index dbc688fb92..eba89f787e 100644 --- a/yt_dlp/extractor/iqiyi.py +++ b/yt_dlp/extractor/iqiyi.py @@ -527,11 +527,14 @@ def _extract_vms_player_js(self, webpage, video_id): webpack_js_url = self._proto_relative_url(self._search_regex( r'