mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-06-01 19:08:14 +02:00
Compare commits
14 Commits
b76f33fdb5
...
cf88d16293
Author | SHA1 | Date | |
---|---|---|---|
|
cf88d16293 | ||
|
12d8ea8246 | ||
|
454bf0baf8 | ||
|
41cef9410d | ||
|
b01f22cdbf | ||
|
97c4c86043 | ||
|
4d72297e54 | ||
|
da483f2c52 | ||
|
c91b90520f | ||
|
adb148358e | ||
|
ed44e23245 | ||
|
b9aeb3e574 | ||
|
c36cac6b54 | ||
|
d0ef06593d |
4
Makefile
4
Makefile
|
@ -73,11 +73,11 @@ codetest:
|
|||
flake8 .
|
||||
|
||||
test:
|
||||
$(PYTHON) -m pytest
|
||||
$(PYTHON) -m pytest -Werror
|
||||
$(MAKE) codetest
|
||||
|
||||
offlinetest: codetest
|
||||
$(PYTHON) -m pytest -k "not download"
|
||||
$(PYTHON) -m pytest -Werror -m "not download"
|
||||
|
||||
CODE_FOLDERS_CMD = find yt_dlp -type f -name '__init__.py' | sed 's,/__init__.py,,' | grep -v '/__' | sort
|
||||
CODE_FOLDERS != $(CODE_FOLDERS_CMD)
|
||||
|
|
|
@ -108,7 +108,6 @@ #### Alternatives
|
|||
[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Win7 SP1+) standalone x86 (32-bit) binary
|
||||
[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows (Win7 SP1+) standalone x64 binary built with `py2exe`<br/> ([Not recommended](#standalone-py2exe-builds-windows))
|
||||
[yt-dlp_linux](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux)|Linux standalone x64 binary
|
||||
[yt-dlp_linux.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux.zip)|Unpackaged Linux executable (no auto-update)
|
||||
[yt-dlp_linux_armv7l](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_armv7l)|Linux standalone armv7l (32-bit) binary
|
||||
[yt-dlp_linux_aarch64](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_aarch64)|Linux standalone aarch64 (64-bit) binary
|
||||
[yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged Windows executable (no auto-update)
|
||||
|
@ -170,7 +169,7 @@ # To update to nightly from stable executable/binary:
|
|||
yt-dlp --update-to nightly
|
||||
|
||||
# To install nightly with pip:
|
||||
python3 -m pip install -U --pre yt-dlp[default]
|
||||
python3 -m pip install -U --pre "yt-dlp[default]"
|
||||
```
|
||||
|
||||
## DEPENDENCIES
|
||||
|
@ -202,7 +201,7 @@ #### Impersonation
|
|||
The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting.
|
||||
|
||||
* [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE)
|
||||
* Can be installed with the `curl-cffi` group, e.g. `pip install yt-dlp[default,curl-cffi]`
|
||||
* Can be installed with the `curl-cffi` group, e.g. `pip install "yt-dlp[default,curl-cffi]"`
|
||||
* Currently only included in `yt-dlp.exe` and `yt-dlp_macos` builds
|
||||
|
||||
|
||||
|
@ -1760,7 +1759,7 @@ # EXTRACTOR ARGUMENTS
|
|||
#### youtube
|
||||
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
||||
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen`, `mediaconnect` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
|
||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. The `android` clients will always be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients.
|
||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
|
||||
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
||||
|
|
|
@ -1,4 +0,0 @@
|
|||
@echo off
|
||||
|
||||
>&2 echo run_tests.bat is deprecated. Please use `devscripts/run_tests.py` instead
|
||||
python %~dp0run_tests.py %~1
|
|
@ -1,4 +0,0 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
>&2 echo 'run_tests.sh is deprecated. Please use `devscripts/run_tests.py` instead'
|
||||
python3 devscripts/run_tests.py "$1"
|
17
pyinst.py
17
pyinst.py
|
@ -1,17 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Allow execution from anywhere
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
import warnings
|
||||
|
||||
from bundle.pyinstaller import main
|
||||
|
||||
warnings.warn(DeprecationWarning('`pyinst.py` is deprecated and will be removed in a future version. '
|
||||
'Use `bundle.pyinstaller` instead'))
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
36
setup.py
36
setup.py
|
@ -1,36 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Allow execution from anywhere
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
import warnings
|
||||
|
||||
|
||||
if sys.argv[1:2] == ['py2exe']:
|
||||
warnings.warn(DeprecationWarning('`setup.py py2exe` is deprecated and will be removed in a future version. '
|
||||
'Use `bundle.py2exe` instead'))
|
||||
|
||||
import bundle.py2exe
|
||||
|
||||
bundle.py2exe.main()
|
||||
|
||||
elif 'build_lazy_extractors' in sys.argv:
|
||||
warnings.warn(DeprecationWarning('`setup.py build_lazy_extractors` is deprecated and will be removed in a future version. '
|
||||
'Use `devscripts.make_lazy_extractors` instead'))
|
||||
|
||||
import subprocess
|
||||
|
||||
os.chdir(sys.path[0])
|
||||
print('running build_lazy_extractors')
|
||||
subprocess.run([sys.executable, 'devscripts/make_lazy_extractors.py'])
|
||||
|
||||
else:
|
||||
|
||||
print(
|
||||
'ERROR: Building by calling `setup.py` is deprecated. '
|
||||
'Use a build frontend like `build` instead. ',
|
||||
'Refer to https://build.pypa.io for more info', file=sys.stderr)
|
||||
sys.exit(1)
|
|
@ -1912,7 +1912,7 @@ def test_search_nextjs_data(self):
|
|||
self.assertEqual(self.ie._search_nextjs_data('', None, fatal=False), {})
|
||||
self.assertEqual(self.ie._search_nextjs_data('', None, default=None), None)
|
||||
self.assertEqual(self.ie._search_nextjs_data('', None, default={}), {})
|
||||
with self.assertRaises(DeprecationWarning):
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {})
|
||||
|
||||
|
||||
|
|
|
@ -101,7 +101,7 @@ def _real_extract(self, url):
|
|||
site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
|
||||
playlist_title = self._og_search_title(webpage, default=None)
|
||||
if site_name and playlist_title:
|
||||
playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0]
|
||||
playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, maxsplit=1)[0]
|
||||
playlist_description = self._og_search_description(webpage, default=None)
|
||||
if playlist_description:
|
||||
playlist_description = playlist_description.replace('\xa0', ' ')
|
||||
|
|
|
@ -3531,7 +3531,7 @@ def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
|
|||
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
|
||||
# of jwplayer.flash.swf
|
||||
rtmp_url_parts = re.split(
|
||||
r'((?:mp4|mp3|flv):)', source_url, 1)
|
||||
r'((?:mp4|mp3|flv):)', source_url, maxsplit=1)
|
||||
if len(rtmp_url_parts) == 3:
|
||||
rtmp_url, prefix, play_path = rtmp_url_parts
|
||||
a_format.update({
|
||||
|
|
|
@ -134,7 +134,7 @@ def _make_playlist_result(self, url):
|
|||
title = re.split(
|
||||
r'(?i)\s*\|\s*ThisVid\.com\s*$',
|
||||
self._og_search_title(webpage, default=None)
|
||||
or self._html_search_regex(r'(?s)<title\b[^>]*>(.+?)</title', webpage, 'title', fatal=False) or '', 1)[0] or None
|
||||
or self._html_search_regex(r'(?s)<title\b[^>]*>(.+?)</title', webpage, 'title', fatal=False) or '', maxsplit=1)[0] or None
|
||||
|
||||
return self.playlist_from_matches(
|
||||
self._generate_playlist_entries(url, playlist_id, webpage),
|
||||
|
|
|
@ -467,13 +467,13 @@ def _real_extract(self, url):
|
|||
'source_preference': 1,
|
||||
'height': height,
|
||||
})
|
||||
elif format_id == 'hls':
|
||||
elif format_id.startswith('hls') and format_id != 'hls_live_playback':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
format_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=format_id, fatal=False, live=is_live)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
elif format_id.startswith('dash_'):
|
||||
elif format_id.startswith('dash') and format_id not in ('dash_live_playback', 'dash_uni'):
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
format_url, video_id, mpd_id=format_id, fatal=False)
|
||||
formats.extend(fmts)
|
||||
|
|
|
@ -2353,6 +2353,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
'format': '17', # 3gp format available on android
|
||||
'extractor_args': {'youtube': {'player_client': ['android']}},
|
||||
},
|
||||
'skip': 'android client broken',
|
||||
},
|
||||
{
|
||||
# Skip download of additional client configs (remix client config in this case)
|
||||
|
@ -2730,7 +2731,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
'heatmap': 'count:100',
|
||||
},
|
||||
'params': {
|
||||
'extractor_args': {'youtube': {'player_client': ['android'], 'player_skip': ['webpage']}},
|
||||
'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}},
|
||||
},
|
||||
},
|
||||
]
|
||||
|
@ -3662,8 +3663,6 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
|
|||
yt_query = {
|
||||
'videoId': video_id,
|
||||
}
|
||||
if _split_innertube_client(client)[0] in ('android', 'android_embedscreen'):
|
||||
yt_query['params'] = 'CgIIAQ=='
|
||||
|
||||
pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]
|
||||
if pp_arg:
|
||||
|
@ -3679,19 +3678,24 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
|
|||
|
||||
def _get_requested_clients(self, url, smuggled_data):
|
||||
requested_clients = []
|
||||
default = ['ios', 'android', 'web']
|
||||
android_clients = []
|
||||
default = ['ios', 'web']
|
||||
allowed_clients = sorted(
|
||||
(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
|
||||
key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
|
||||
for client in self._configuration_arg('player_client'):
|
||||
if client in allowed_clients:
|
||||
requested_clients.append(client)
|
||||
elif client == 'default':
|
||||
if client == 'default':
|
||||
requested_clients.extend(default)
|
||||
elif client == 'all':
|
||||
requested_clients.extend(allowed_clients)
|
||||
else:
|
||||
elif client not in allowed_clients:
|
||||
self.report_warning(f'Skipping unsupported client {client}')
|
||||
elif client.startswith('android'):
|
||||
android_clients.append(client)
|
||||
else:
|
||||
requested_clients.append(client)
|
||||
# Force deprioritization of broken Android clients for format de-duplication
|
||||
requested_clients.extend(android_clients)
|
||||
if not requested_clients:
|
||||
requested_clients = default
|
||||
|
||||
|
@ -3910,6 +3914,14 @@ def build_fragments(f):
|
|||
f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
|
||||
|
||||
client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
|
||||
# Android client formats are broken due to integrity check enforcement
|
||||
# Ref: https://github.com/yt-dlp/yt-dlp/issues/9554
|
||||
is_broken = client_name and client_name.startswith(short_client_name('android'))
|
||||
if is_broken:
|
||||
self.report_warning(
|
||||
f'{video_id}: Android client formats are broken and may yield HTTP Error 403. '
|
||||
'They will be deprioritized', only_once=True)
|
||||
|
||||
name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
|
||||
fps = int_or_none(fmt.get('fps')) or 0
|
||||
dct = {
|
||||
|
@ -3922,7 +3934,7 @@ def build_fragments(f):
|
|||
name, fmt.get('isDrc') and 'DRC',
|
||||
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
|
||||
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
|
||||
throttled and 'THROTTLED', is_damaged and 'DAMAGED',
|
||||
throttled and 'THROTTLED', is_damaged and 'DAMAGED', is_broken and 'BROKEN',
|
||||
(self.get_param('verbose') or all_formats) and client_name,
|
||||
delim=', '),
|
||||
# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
|
||||
|
@ -3940,8 +3952,8 @@ def build_fragments(f):
|
|||
'language': join_nonempty(audio_track.get('id', '').split('.')[0],
|
||||
'desc' if language_preference < -1 else '') or None,
|
||||
'language_preference': language_preference,
|
||||
# Strictly de-prioritize damaged and 3gp formats
|
||||
'preference': -10 if is_damaged else -2 if itag == '17' else None,
|
||||
# Strictly de-prioritize broken, damaged and 3gp formats
|
||||
'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None,
|
||||
}
|
||||
mime_mobj = re.match(
|
||||
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
|
||||
|
|
|
@ -2522,7 +2522,7 @@ def fixup(url):
|
|||
return False
|
||||
# "#" cannot be stripped out since it is part of the URI
|
||||
# However, it can be safely stripped out if following a whitespace
|
||||
return re.split(r'\s#', url, 1)[0].rstrip()
|
||||
return re.split(r'\s#', url, maxsplit=1)[0].rstrip()
|
||||
|
||||
with contextlib.closing(batch_fd) as fd:
|
||||
return [url for url in map(fixup, fd) if url]
|
||||
|
|
Loading…
Reference in New Issue
Block a user