mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-06-01 19:08:14 +02:00
Compare commits
19 Commits
43361bad89
...
c48b325a43
Author | SHA1 | Date | |
---|---|---|---|
|
c48b325a43 | ||
|
12d8ea8246 | ||
|
8e15177b41 | ||
|
dd9ad97b1f | ||
|
61b17437dc | ||
|
7975ddf245 | ||
|
6d8a53d870 | ||
|
4813173e45 | ||
|
41ba4a808b | ||
|
351dc0bc33 | ||
|
518c1afc15 | ||
|
85ec2a337a | ||
|
b207d26f83 | ||
|
01395a3434 | ||
|
cf212d0a33 | ||
|
34bf80c62d | ||
|
0ed18251d2 | ||
|
773b554bf5 | ||
|
5982686f86 |
|
@ -666,7 +666,7 @@ ## Filesystem Options:
|
|||
The name of the browser to load cookies
|
||||
from. Currently supported browsers are:
|
||||
brave, chrome, chromium, edge, firefox,
|
||||
opera, safari, vivaldi. Optionally, the
|
||||
opera, safari, vivaldi, whale. Optionally, the
|
||||
KEYRING used for decrypting Chromium cookies
|
||||
on Linux, the name/path of the PROFILE to
|
||||
load cookies from, and the CONTAINER name
|
||||
|
@ -1760,7 +1760,7 @@ # EXTRACTOR ARGUMENTS
|
|||
#### youtube
|
||||
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
||||
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
|
||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. The `android` clients will always be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients.
|
||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
|
||||
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
||||
|
@ -1813,8 +1813,8 @@ #### tiktok
|
|||
* `app_name`: Default app name to use with mobile API calls, e.g. `trill`
|
||||
* `app_version`: Default app version to use with mobile API calls - should be set along with `manifest_app_version`, e.g. `34.1.2`
|
||||
* `manifest_app_version`: Default numeric app version to use with mobile API calls, e.g. `2023401020`
|
||||
* `aid`: Default app ID to use with API calls, e.g. `1180`
|
||||
* `app_info`: One or more app info strings in the format of `<iid>/[app_name]/[app_version]/[manifest_app_version]/[aid]`, where `iid` is the unique app install ID. `iid` is the only required value; all other values and their `/` separators can be omitted, e.g. `tiktok:app_info=1234567890123456789` or `tiktok:app_info=123,456/trill///1180,789//34.0.1/340001`
|
||||
* `aid`: Default app ID to use with mobile API calls, e.g. `1180`
|
||||
* `app_info`: Enable mobile API extraction with one or more app info strings in the format of `<iid>/[app_name]/[app_version]/[manifest_app_version]/[aid]`, where `iid` is the unique app install ID. `iid` is the only required value; all other values and their `/` separators can be omitted, e.g. `tiktok:app_info=1234567890123456789` or `tiktok:app_info=123,456/trill///1180,789//34.0.1/340001`
|
||||
|
||||
#### rokfinchannel
|
||||
* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
|
||||
|
|
|
@ -156,6 +156,11 @@ def test_sanitize_filename(self):
|
|||
self.assertEqual('yes no', sanitize_filename('yes? no', is_id=False))
|
||||
self.assertEqual('this - that', sanitize_filename('this: that', is_id=False))
|
||||
|
||||
self.assertEqual('abc_<>\\*|de', sanitize_filename('abc/<>\\*|de', keep_bad_win_chars=True, is_id=False))
|
||||
self.assertEqual('xxx_<>\\*|', sanitize_filename('xxx/<>\\*|', keep_bad_win_chars=True, is_id=False))
|
||||
self.assertEqual('yes? no', sanitize_filename('yes? no', keep_bad_win_chars=True, is_id=False))
|
||||
self.assertEqual('this: that', sanitize_filename('this: that', keep_bad_win_chars=True, is_id=False))
|
||||
|
||||
self.assertEqual(sanitize_filename('AT&T'), 'AT&T')
|
||||
aumlaut = 'ä'
|
||||
self.assertEqual(sanitize_filename(aumlaut), aumlaut)
|
||||
|
@ -166,6 +171,10 @@ def test_sanitize_filename(self):
|
|||
sanitize_filename('New World record at 0:12:34'),
|
||||
'New World record at 0_12_34')
|
||||
|
||||
self.assertEqual(
|
||||
sanitize_filename('New World record at 0:12:34', keep_bad_win_chars=True),
|
||||
'New World record at 0:12:34')
|
||||
|
||||
self.assertEqual(sanitize_filename('--gasdgf'), '--gasdgf')
|
||||
self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')
|
||||
self.assertEqual(sanitize_filename('--gasdgf', is_id=False), '_-gasdgf')
|
||||
|
@ -217,6 +226,10 @@ def test_sanitize_ids(self):
|
|||
self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
|
||||
self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
|
||||
|
||||
self.assertEqual(sanitize_filename('_n_cd26wFpw', keep_bad_win_chars=True, is_id=True), '_n_cd26wFpw')
|
||||
self.assertEqual(sanitize_filename('_BD_eEpuzXw', keep_bad_win_chars=True, is_id=True), '_BD_eEpuzXw')
|
||||
self.assertEqual(sanitize_filename('N0Y__7-UOdI', keep_bad_win_chars=True, is_id=True), 'N0Y__7-UOdI')
|
||||
|
||||
def test_sanitize_path(self):
|
||||
if sys.platform != 'win32':
|
||||
return
|
||||
|
|
|
@ -1293,10 +1293,12 @@ def get_value(mdict):
|
|||
na = self.params.get('outtmpl_na_placeholder', 'NA')
|
||||
|
||||
def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
|
||||
return sanitize_filename(str(value), restricted=restricted, is_id=(
|
||||
bool(re.search(r'(^|[_.])id(\.|$)', key))
|
||||
if 'filename-sanitization' in self.params['compat_opts']
|
||||
else NO_DEFAULT))
|
||||
return sanitize_filename(
|
||||
str(value), self.params.get('keep_bad_win_chars', False), restricted=restricted,
|
||||
is_id=(
|
||||
bool(re.search(r'(^|[_.])id(\.|$)', key))
|
||||
if 'filename-sanitization' in self.params['compat_opts']
|
||||
else NO_DEFAULT))
|
||||
|
||||
sanitizer = sanitize if callable(sanitize) else filename_sanitizer
|
||||
sanitize = bool(sanitize)
|
||||
|
|
|
@ -815,6 +815,7 @@ def parse_options(argv=None):
|
|||
'autonumber_start': opts.autonumber_start,
|
||||
'restrictfilenames': opts.restrictfilenames,
|
||||
'windowsfilenames': opts.windowsfilenames,
|
||||
'keep_bad_win_chars': opts.keep_bad_win_chars,
|
||||
'ignoreerrors': opts.ignoreerrors,
|
||||
'force_generic_extractor': opts.force_generic_extractor,
|
||||
'allowed_extractors': opts.allowed_extractors or ['default'],
|
||||
|
|
|
@ -46,7 +46,7 @@
|
|||
from .utils._utils import _YDLLogger
|
||||
from .utils.networking import normalize_url
|
||||
|
||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
|
||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi', 'whale'}
|
||||
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
|
||||
|
||||
|
||||
|
@ -219,6 +219,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
|||
'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
|
||||
'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
|
||||
'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
|
||||
'whale': os.path.join(appdata_local, R'Naver\Naver Whale\User Data'),
|
||||
}[browser_name]
|
||||
|
||||
elif sys.platform == 'darwin':
|
||||
|
@ -230,6 +231,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
|||
'edge': os.path.join(appdata, 'Microsoft Edge'),
|
||||
'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
|
||||
'vivaldi': os.path.join(appdata, 'Vivaldi'),
|
||||
'whale': os.path.join(appdata, 'Naver/Whale'),
|
||||
}[browser_name]
|
||||
|
||||
else:
|
||||
|
@ -241,6 +243,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
|||
'edge': os.path.join(config, 'microsoft-edge'),
|
||||
'opera': os.path.join(config, 'opera'),
|
||||
'vivaldi': os.path.join(config, 'vivaldi'),
|
||||
'whale': os.path.join(config, 'naver-whale'),
|
||||
}[browser_name]
|
||||
|
||||
# Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
|
||||
|
@ -252,6 +255,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
|||
'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
|
||||
'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
|
||||
'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
|
||||
'whale': 'Whale',
|
||||
}[browser_name]
|
||||
|
||||
browsers_without_profiles = {'opera'}
|
||||
|
|
|
@ -288,7 +288,6 @@
|
|||
from .buzzfeed import BuzzFeedIE
|
||||
from .byutv import BYUtvIE
|
||||
from .c56 import C56IE
|
||||
from .cableav import CableAVIE
|
||||
from .callin import CallinIE
|
||||
from .caltrans import CaltransIE
|
||||
from .cam4 import CAM4IE
|
||||
|
@ -548,7 +547,6 @@
|
|||
EggheadLessonIE,
|
||||
)
|
||||
from .eighttracks import EightTracksIE
|
||||
from .einthusan import EinthusanIE
|
||||
from .eitb import EitbIE
|
||||
from .elementorembed import ElementorEmbedIE
|
||||
from .elonet import ElonetIE
|
||||
|
@ -861,10 +859,6 @@
|
|||
)
|
||||
from .ixigua import IxiguaIE
|
||||
from .izlesene import IzleseneIE
|
||||
from .jable import (
|
||||
JableIE,
|
||||
JablePlaylistIE,
|
||||
)
|
||||
from .jamendo import (
|
||||
JamendoIE,
|
||||
JamendoAlbumIE,
|
||||
|
@ -1499,7 +1493,6 @@
|
|||
)
|
||||
from .popcorntimes import PopcorntimesIE
|
||||
from .popcorntv import PopcornTVIE
|
||||
from .porn91 import Porn91IE
|
||||
from .pornbox import PornboxIE
|
||||
from .pornflip import PornFlipIE
|
||||
from .pornhub import (
|
||||
|
@ -2377,7 +2370,6 @@
|
|||
)
|
||||
from .xanimu import XanimuIE
|
||||
from .xboxclips import XboxClipsIE
|
||||
from .xfileshare import XFileShareIE
|
||||
from .xhamster import (
|
||||
XHamsterIE,
|
||||
XHamsterEmbedIE,
|
||||
|
@ -2432,8 +2424,6 @@
|
|||
YouNowMomentIE,
|
||||
)
|
||||
from .youporn import YouPornIE
|
||||
from .yourporn import YourPornIE
|
||||
from .yourupload import YourUploadIE
|
||||
from .zaiko import (
|
||||
ZaikoIE,
|
||||
ZaikoETicketIE,
|
||||
|
|
|
@ -602,7 +602,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'url': 'http://www.bbc.com/news/world-europe-32668511',
|
||||
'info_dict': {
|
||||
'id': 'world-europe-32668511',
|
||||
'title': 'Russia stages massive WW2 parade',
|
||||
'title': 'Russia stages massive WW2 parade despite Western boycott',
|
||||
'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
|
@ -623,6 +623,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'info_dict': {
|
||||
'id': '3662a707-0af9-3149-963f-47bea720b460',
|
||||
'title': 'BUGGER',
|
||||
'description': r're:BUGGER The recent revelations by the whistleblower Edward Snowden were fascinating. .{211}\.{3}$',
|
||||
},
|
||||
'playlist_count': 18,
|
||||
}, {
|
||||
|
@ -631,14 +632,14 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'info_dict': {
|
||||
'id': 'p02mprgb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
|
||||
'description': 'md5:2868290467291b37feda7863f7a83f54',
|
||||
'title': 'Germanwings crash site aerial video',
|
||||
'description': r're:(?s)Aerial video showed the site where the Germanwings flight 4U 9525, .{156} BFM TV\.$',
|
||||
'duration': 47,
|
||||
'timestamp': 1427219242,
|
||||
'upload_date': '20150324',
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/news/1024/media/images/81879000/jpg/_81879090_81879089.jpg',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
|
@ -656,21 +657,24 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'now SIMORGH_DATA with no video',
|
||||
}, {
|
||||
# single video embedded with data-playable containing XML playlists (regional section)
|
||||
'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'info_dict': {
|
||||
'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'id': '39275083',
|
||||
'display_id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'ext': 'mp4',
|
||||
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
||||
'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
|
||||
'description': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
||||
'timestamp': 1434713142,
|
||||
'upload_date': '20150619',
|
||||
'thumbnail': 'https://a.files.bbci.co.uk/worldservice/live/assets/images/2015/06/19/150619132146_honduras_hsopitales_militares_640x360_aptn_nocredit.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# single video from video playlist embedded with vxp-playlist-data JSON
|
||||
'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
|
||||
|
@ -683,22 +687,21 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
# single video story with digitalData
|
||||
# single video story with __PWA_PRELOADED_STATE__
|
||||
'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
|
||||
'info_dict': {
|
||||
'id': 'p02q6gc4',
|
||||
'ext': 'flv',
|
||||
'title': 'Sri Lanka’s spicy secret',
|
||||
'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
|
||||
'timestamp': 1437674293,
|
||||
'upload_date': '20150723',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tasting the spice of life in Jaffna',
|
||||
'description': r're:(?s)BBC Travel Show’s Henry Golding explores the city of Jaffna .{151} aftertaste\.$',
|
||||
'timestamp': 1646058397,
|
||||
'upload_date': '20220228',
|
||||
'duration': 255,
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1920xn/p02vxvkn.jpg',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video story without digitalData
|
||||
'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
|
||||
|
@ -710,12 +713,10 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'timestamp': 1415867444,
|
||||
'upload_date': '20141113',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
'skip': 'redirects to TopGear home page',
|
||||
}, {
|
||||
# single video embedded with Morph
|
||||
# TODO: replacement test page
|
||||
'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
|
||||
'info_dict': {
|
||||
'id': 'p041vhd0',
|
||||
|
@ -726,27 +727,22 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'uploader': 'BBC Sport',
|
||||
'uploader_id': 'bbc_sport',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Georestricted to UK',
|
||||
'skip': 'Video no longer in page',
|
||||
}, {
|
||||
# single video with playlist.sxml URL in playlist param
|
||||
# single video in __INITIAL_DATA__
|
||||
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
||||
'info_dict': {
|
||||
'id': 'p02xycnp',
|
||||
'ext': 'mp4',
|
||||
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
||||
'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
|
||||
'title': 'Ronaldo to Man Utd, Arsenal to spend?',
|
||||
'description': r're:(?s)BBC Sport\'s David Ornstein rounds up the latest transfer reports, .{359} here\.$',
|
||||
'timestamp': 1437750175,
|
||||
'upload_date': '20150724',
|
||||
'thumbnail': r're:https?://.+/.+media/images/69320000/png/_69320754_mmgossipcolumnextraaugust18.png',
|
||||
'duration': 140,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# article with multiple videos embedded with playlist.sxml in playlist param
|
||||
# article with multiple videos embedded with Morph.setPayload
|
||||
'url': 'http://www.bbc.com/sport/0/football/34475836',
|
||||
'info_dict': {
|
||||
'id': '34475836',
|
||||
|
@ -754,6 +750,21 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
# Testing noplaylist
|
||||
'url': 'http://www.bbc.com/sport/0/football/34475836',
|
||||
'info_dict': {
|
||||
'id': 'p034ppnv',
|
||||
'ext': 'mp4',
|
||||
'title': 'All you need to know about Jurgen Klopp',
|
||||
'timestamp': 1444335081,
|
||||
'upload_date': '20151008',
|
||||
'duration': 122.0,
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/onesport/cps/976/cpsprodpb/7542/production/_85981003_klopp.jpg',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
},
|
||||
}, {
|
||||
# school report article with single video
|
||||
'url': 'http://www.bbc.co.uk/schoolreport/35744779',
|
||||
|
@ -762,6 +773,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'title': 'School which breaks down barriers in Jerusalem',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'skip': 'redirects to Young Reporter home page https://www.bbc.co.uk/news/topics/cg41ylwv43pt',
|
||||
}, {
|
||||
# single video with playlist URL from weather section
|
||||
'url': 'http://www.bbc.com/weather/features/33601775',
|
||||
|
@ -778,18 +790,33 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1437785037,
|
||||
'upload_date': '20150725',
|
||||
'duration': 105,
|
||||
},
|
||||
}, {
|
||||
# video with window.__INITIAL_DATA__ and value as JSON string
|
||||
'url': 'https://www.bbc.com/news/av/world-europe-59468682',
|
||||
'info_dict': {
|
||||
'id': 'p0b71qth',
|
||||
'id': 'p0b779gc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Why France is making this woman a national hero',
|
||||
'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
|
||||
'description': r're:(?s)France is honouring the US-born 20th Century singer and activist Josephine .{208} Second World War.',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1638230731,
|
||||
'upload_date': '20211130',
|
||||
'timestamp': 1638215626,
|
||||
'upload_date': '20211129',
|
||||
'duration': 125,
|
||||
},
|
||||
}, {
|
||||
# video with script id __NEXT_DATA__ and value as JSON string
|
||||
'url': 'https://www.bbc.com/news/uk-68546268',
|
||||
'info_dict': {
|
||||
'id': 'p0hj0lq7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nasser Hospital doctor describes his treatment by IDF',
|
||||
'description': r're:(?s)Doctor Abu Sabha said he was detained by Israeli forces after .{276} hostages\."$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1710188248,
|
||||
'upload_date': '20240311',
|
||||
'duration': 104,
|
||||
},
|
||||
}, {
|
||||
# single video article embedded with data-media-vpid
|
||||
|
@ -817,6 +844,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'uploader': 'Radio 3',
|
||||
'uploader_id': 'bbc_radio_three',
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
|
||||
'info_dict': {
|
||||
|
@ -824,6 +852,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'ext': 'mp4',
|
||||
'title': 'md5:2fabf12a726603193a2879a055f72514',
|
||||
'description': 'Learn English words and phrases from this story',
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1200x675/p06pq9gk.jpg',
|
||||
},
|
||||
'add_ie': [BBCCoUkIE.ie_key()],
|
||||
}, {
|
||||
|
@ -832,28 +861,30 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'info_dict': {
|
||||
'id': 'p07c6sb9',
|
||||
'ext': 'mp4',
|
||||
'title': 'How positive thinking is harming your happiness',
|
||||
'alt_title': 'The downsides of positive thinking',
|
||||
'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
|
||||
'title': 'The downsides of positive thinking',
|
||||
'description': 'The downsides of positive thinking',
|
||||
'duration': 235,
|
||||
'thumbnail': r're:https?://.+/p07c9dsr.jpg',
|
||||
'upload_date': '20190604',
|
||||
'categories': ['Psychology'],
|
||||
'thumbnail': r're:https?://.+/p07c9dsr\.(?:jpg|webp|png)',
|
||||
'upload_date': '20220223',
|
||||
'timestamp': 1645632746,
|
||||
},
|
||||
}, {
|
||||
# BBC Sounds
|
||||
'url': 'https://www.bbc.co.uk/sounds/play/m001q78b',
|
||||
'url': 'https://www.bbc.co.uk/sounds/play/w3ct5rgx',
|
||||
'info_dict': {
|
||||
'id': 'm001q789',
|
||||
'id': 'p0hrw4nr',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Night Tracks Mix - Music for the darkling hour',
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0c00hym.jpg',
|
||||
'chapters': 'count:8',
|
||||
'description': 'md5:815fb51cbdaa270040aab8145b3f1d67',
|
||||
'uploader': 'Radio 3',
|
||||
'duration': 1800,
|
||||
'uploader_id': 'bbc_radio_three',
|
||||
},
|
||||
'title': 'Are our coastlines being washed away?',
|
||||
'description': r're:(?s)Around the world, coastlines are constantly changing .{2000,} Images\)$',
|
||||
'timestamp': 1713556800,
|
||||
'upload_date': '20240419',
|
||||
'duration': 1588,
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0hrnxbl.jpg',
|
||||
'uploader': 'World Service',
|
||||
'uploader_id': 'bbc_world_service',
|
||||
'series': 'CrowdScience',
|
||||
'chapters': [],
|
||||
}
|
||||
}, { # onion routes
|
||||
'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
|
||||
'only_matching': True,
|
||||
|
@ -1008,8 +1039,7 @@ def _real_extract(self, url):
|
|||
webpage, 'group id', default=None)
|
||||
if group_id:
|
||||
return self.url_result(
|
||||
'https://www.bbc.co.uk/programmes/%s' % group_id,
|
||||
ie=BBCCoUkIE.ie_key())
|
||||
f'https://www.bbc.co.uk/programmes/{group_id}', BBCCoUkIE)
|
||||
|
||||
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||
programme_id = self._search_regex(
|
||||
|
@ -1069,83 +1099,133 @@ def _real_extract(self, url):
|
|||
}
|
||||
|
||||
# Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
|
||||
# There are several setPayload calls may be present but the video
|
||||
# seems to be always related to the first one
|
||||
morph_payload = self._parse_json(
|
||||
self._search_regex(
|
||||
r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
|
||||
webpage, 'morph payload', default='{}'),
|
||||
playlist_id, fatal=False)
|
||||
# Several setPayload calls may be present but the video(s)
|
||||
# should be in one that mentions leadMedia or videoData
|
||||
morph_payload = self._search_json(
|
||||
r'\bMorph\s*\.\s*setPayload\s*\([^,]+,', webpage, 'morph payload', playlist_id,
|
||||
contains_pattern=r'{(?s:(?:(?!</script>).)+(?:"leadMedia"|\\"videoData\\")\s*:.+)}',
|
||||
default={})
|
||||
if morph_payload:
|
||||
components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
|
||||
for component in components:
|
||||
if not isinstance(component, dict):
|
||||
continue
|
||||
lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
|
||||
if not lead_media:
|
||||
continue
|
||||
identifiers = lead_media.get('identifiers')
|
||||
if not identifiers or not isinstance(identifiers, dict):
|
||||
continue
|
||||
programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
|
||||
for lead_media in traverse_obj(morph_payload, (
|
||||
'body', 'components', ..., 'props', 'leadMedia', {dict})):
|
||||
programme_id = traverse_obj(lead_media, ('identifiers', ('vpid', 'playablePid'), {str}, any))
|
||||
if not programme_id:
|
||||
continue
|
||||
title = lead_media.get('title') or self._og_search_title(webpage)
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
description = lead_media.get('summary')
|
||||
uploader = lead_media.get('masterBrand')
|
||||
uploader_id = lead_media.get('mid')
|
||||
duration = None
|
||||
duration_d = lead_media.get('duration')
|
||||
if isinstance(duration_d, dict):
|
||||
duration = parse_duration(dict_get(
|
||||
duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'title': lead_media.get('title') or self._og_search_title(webpage),
|
||||
**traverse_obj(lead_media, {
|
||||
'description': ('summary', {str}),
|
||||
'duration': ('duration', ('rawDuration', 'formattedDuration', 'spokenDuration'), {parse_duration}),
|
||||
'uploader': ('masterBrand', {str}),
|
||||
'uploader_id': ('mid', {str}),
|
||||
}),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
body = self._parse_json(traverse_obj(morph_payload, (
|
||||
'body', 'content', 'article', 'body')), playlist_id, fatal=False)
|
||||
for video_data in traverse_obj(body, (lambda _, v: v['videoData']['pid'], 'videoData')):
|
||||
if video_data.get('vpid'):
|
||||
video_id = video_data['vpid']
|
||||
formats, subtitles = self._download_media_selector(video_id)
|
||||
entry = {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
else:
|
||||
video_id = video_data['pid']
|
||||
entry = self.url_result(
|
||||
f'https://www.bbc.co.uk/programmes/{video_id}', BBCCoUkIE,
|
||||
video_id, url_transparent=True)
|
||||
entry.update({
|
||||
'timestamp': traverse_obj(morph_payload, (
|
||||
'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601})
|
||||
),
|
||||
**traverse_obj(video_data, {
|
||||
'thumbnail': (('iChefImage', 'image'), {url_or_none}, any),
|
||||
'title': (('title', 'caption'), {str}, any),
|
||||
'duration': ('duration', {parse_duration}),
|
||||
}),
|
||||
})
|
||||
if video_data.get('isLead') and not self._yes_playlist(playlist_id, video_id):
|
||||
return entry
|
||||
entries.append(entry)
|
||||
if entries:
|
||||
playlist_title = traverse_obj(morph_payload, (
|
||||
'body', 'content', 'article', 'headline', {str})) or playlist_title
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
preload_state = self._parse_json(self._search_regex(
|
||||
r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
|
||||
'preload state', default='{}'), playlist_id, fatal=False)
|
||||
if preload_state:
|
||||
current_programme = preload_state.get('programmes', {}).get('current') or {}
|
||||
programme_id = current_programme.get('id')
|
||||
if current_programme and programme_id and current_programme.get('type') == 'playable_item':
|
||||
title = current_programme.get('titles', {}).get('tertiary') or playlist_title
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
synopses = current_programme.get('synopses') or {}
|
||||
network = current_programme.get('network') or {}
|
||||
duration = int_or_none(
|
||||
current_programme.get('duration', {}).get('value'))
|
||||
thumbnail = None
|
||||
image_url = current_programme.get('image_url')
|
||||
if image_url:
|
||||
thumbnail = image_url.replace('{recipe}', 'raw')
|
||||
# various PRELOADED_STATE JSON
|
||||
preload_state = self._search_json(
|
||||
r'window\.__(?:PWA_)?PRELOADED_STATE__\s*=', webpage,
|
||||
'preload state', playlist_id, transform_source=js_to_json, default={})
|
||||
# PRELOADED_STATE with current programmme
|
||||
current_programme = traverse_obj(preload_state, ('programmes', 'current', {dict}))
|
||||
programme_id = traverse_obj(current_programme, ('id', {str}))
|
||||
if programme_id and current_programme.get('type') == 'playable_item':
|
||||
title = traverse_obj(current_programme, ('titles', ('tertiary', 'secondary'), {str}, any)) or playlist_title
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
**traverse_obj(current_programme, {
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
|
||||
'thumbnail': ('image_url', {lambda u: url_or_none(u.replace('{recipe}', 'raw'))}),
|
||||
'duration': ('duration', 'value', {int_or_none}),
|
||||
'uploader': ('network', 'short_title', {str}),
|
||||
'uploader_id': ('network', 'id', {str}),
|
||||
'timestamp': ((('availability', 'from'), ('release', 'date')), {parse_iso8601}, any),
|
||||
'series': ('titles', 'primary', {str}),
|
||||
}),
|
||||
'subtitles': subtitles,
|
||||
'chapters': traverse_obj(preload_state, (
|
||||
'tracklist', 'tracks', lambda _, v: float(v['offset']['start']), {
|
||||
'title': ('titles', {lambda x: join_nonempty(
|
||||
'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
|
||||
'start_time': ('offset', 'start', {float_or_none}),
|
||||
'end_time': ('offset', 'end', {float_or_none}),
|
||||
})
|
||||
),
|
||||
}
|
||||
|
||||
# PWA_PRELOADED_STATE with article video asset
|
||||
asset_id = traverse_obj(preload_state, (
|
||||
'entities', 'articles', lambda k, _: k.rsplit('/', 1)[-1] == playlist_id,
|
||||
'assetVideo', 0, {str}, any))
|
||||
if asset_id:
|
||||
video_id = traverse_obj(preload_state, ('entities', 'videos', asset_id, 'vpid', {str}))
|
||||
if video_id:
|
||||
article = traverse_obj(preload_state, (
|
||||
'entities', 'articles', lambda _, v: v['assetVideo'][0] == asset_id, any))
|
||||
|
||||
def image_url(image_id):
|
||||
return traverse_obj(preload_state, (
|
||||
'entities', 'images', image_id, 'url',
|
||||
{lambda u: url_or_none(u.replace('$recipe', 'raw'))}))
|
||||
|
||||
formats, subtitles = self._download_media_selector(video_id)
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': dict_get(synopses, ('long', 'medium', 'short')),
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'uploader': network.get('short_title'),
|
||||
'uploader_id': network.get('id'),
|
||||
'id': video_id,
|
||||
**traverse_obj(preload_state, ('entities', 'videos', asset_id, {
|
||||
'title': ('title', {str}),
|
||||
'description': (('synopsisLong', 'synopsisMedium', 'synopsisShort'), {str}, any),
|
||||
'thumbnail': (0, {image_url}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
})),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'chapters': traverse_obj(preload_state, (
|
||||
'tracklist', 'tracks', lambda _, v: float_or_none(v['offset']['start']), {
|
||||
'title': ('titles', {lambda x: join_nonempty(
|
||||
'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
|
||||
'start_time': ('offset', 'start', {float_or_none}),
|
||||
'end_time': ('offset', 'end', {float_or_none}),
|
||||
})) or None,
|
||||
'timestamp': traverse_obj(article, ('displayDate', {parse_iso8601})),
|
||||
}
|
||||
else:
|
||||
return self.url_result(
|
||||
f'https://www.bbc.co.uk/programmes/{asset_id}', BBCCoUkIE,
|
||||
asset_id, playlist_title, display_id=playlist_id,
|
||||
description=playlist_description)
|
||||
|
||||
bbc3_config = self._parse_json(
|
||||
self._search_regex(
|
||||
|
@ -1191,6 +1271,28 @@ def _real_extract(self, url):
|
|||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
def parse_model(model):
|
||||
"""Extract single video from model structure"""
|
||||
item_id = traverse_obj(model, ('versions', 0, 'versionId', {str}))
|
||||
if not item_id:
|
||||
return
|
||||
formats, subtitles = self._download_media_selector(item_id)
|
||||
return {
|
||||
'id': item_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(model, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any),
|
||||
'duration': ('versions', 0, 'duration', {int}),
|
||||
'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}),
|
||||
})
|
||||
}
|
||||
|
||||
def is_type(*types):
|
||||
return lambda _, v: v['type'] in types
|
||||
|
||||
initial_data = self._search_regex(
|
||||
r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
|
||||
'quoted preload state', default=None)
|
||||
|
@ -1202,6 +1304,19 @@ def _real_extract(self, url):
|
|||
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
|
||||
initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
|
||||
if initial_data:
|
||||
for video_data in traverse_obj(initial_data, (
|
||||
'stores', 'article', 'articleBodyContent', is_type('video'))):
|
||||
model = traverse_obj(video_data, (
|
||||
'model', 'blocks', is_type('aresMedia'),
|
||||
'model', 'blocks', is_type('aresMediaMetadata'),
|
||||
'model', {dict}, any))
|
||||
entry = parse_model(model)
|
||||
if entry:
|
||||
entries.append(entry)
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
def parse_media(media):
|
||||
if not media:
|
||||
return
|
||||
|
@ -1234,27 +1349,90 @@ def parse_media(media):
|
|||
'subtitles': subtitles,
|
||||
'timestamp': item_time,
|
||||
'description': strip_or_none(item_desc),
|
||||
'duration': int_or_none(item.get('duration')),
|
||||
})
|
||||
for resp in (initial_data.get('data') or {}).values():
|
||||
name = resp.get('name')
|
||||
|
||||
for resp in traverse_obj(initial_data, ('data', lambda _, v: v['name'])):
|
||||
name = resp['name']
|
||||
if name == 'media-experience':
|
||||
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
|
||||
elif name == 'article':
|
||||
for block in (try_get(resp,
|
||||
(lambda x: x['data']['blocks'],
|
||||
lambda x: x['data']['content']['model']['blocks'],),
|
||||
list) or []):
|
||||
if block.get('type') not in ['media', 'video']:
|
||||
continue
|
||||
parse_media(block.get('model'))
|
||||
for block in traverse_obj(resp, (
|
||||
'data', (None, ('content', 'model')), 'blocks',
|
||||
is_type('media', 'video'), 'model', {dict})):
|
||||
parse_media(block)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
# extract from SIMORGH_DATA hydration JSON
|
||||
simorgh_data = self._search_json(
|
||||
r'window\s*\.\s*SIMORGH_DATA\s*=', webpage,
|
||||
'simorgh data', playlist_id, default={})
|
||||
if simorgh_data:
|
||||
done = False
|
||||
for video_data in traverse_obj(simorgh_data, (
|
||||
'pageData', 'content', 'model', 'blocks', is_type('video', 'legacyMedia'))):
|
||||
model = traverse_obj(video_data, (
|
||||
'model', 'blocks', is_type('aresMedia'),
|
||||
'model', 'blocks', is_type('aresMediaMetadata'),
|
||||
'model', {dict}, any))
|
||||
if video_data['type'] == 'video':
|
||||
entry = parse_model(model)
|
||||
else: # legacyMedia: no duration, subtitles
|
||||
block_id, entry = traverse_obj(model, ('blockId', {str})), None
|
||||
media_data = traverse_obj(simorgh_data, (
|
||||
'pageData', 'promo', 'media',
|
||||
{lambda x: x if x['id'] == block_id else None}))
|
||||
formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), {
|
||||
'url': ('url', {url_or_none}),
|
||||
'ext': ('format', {str}),
|
||||
'tbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
|
||||
}))
|
||||
if formats:
|
||||
entry = {
|
||||
'id': block_id,
|
||||
'display_id': playlist_id,
|
||||
'formats': formats,
|
||||
'description': traverse_obj(simorgh_data, ('pageData', 'promo', 'summary', {str})),
|
||||
**traverse_obj(model, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
|
||||
'timestamp': ('firstPublished', {functools.partial(int_or_none, scale=1000)}),
|
||||
}),
|
||||
}
|
||||
done = True
|
||||
if entry:
|
||||
entries.append(entry)
|
||||
if done:
|
||||
break
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
def extract_all(pattern):
|
||||
return list(filter(None, map(
|
||||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||
re.findall(pattern, webpage))))
|
||||
|
||||
# US accessed article with single embedded video (e.g.
|
||||
# https://www.bbc.com/news/uk-68546268)
|
||||
next_data = traverse_obj(self._search_nextjs_data(webpage, playlist_id, default={}),
|
||||
('props', 'pageProps', 'page'))
|
||||
model = traverse_obj(next_data, (
|
||||
..., 'contents', is_type('video'),
|
||||
'model', 'blocks', is_type('media'),
|
||||
'model', 'blocks', is_type('mediaMetadata'),
|
||||
'model', {dict}, any))
|
||||
if model and (entry := parse_model(model)):
|
||||
if not entry.get('timestamp'):
|
||||
entry['timestamp'] = traverse_obj(next_data, (
|
||||
..., 'contents', is_type('timestamp'), 'model',
|
||||
'timestamp', {functools.partial(int_or_none, scale=1000)}, any))
|
||||
entries.append(entry)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
# Multiple video article (e.g.
|
||||
# http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
|
||||
EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
|
||||
|
|
|
@ -1,32 +0,0 @@
|
|||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CableAVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://cableav\.tv/(?P<id>[a-zA-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://cableav.tv/lS4iR9lWjN8/',
|
||||
'md5': '7e3fe5e49d61c4233b7f5b0f69b15e18',
|
||||
'info_dict': {
|
||||
'id': 'lS4iR9lWjN8',
|
||||
'ext': 'mp4',
|
||||
'title': '國產麻豆AV 叮叮映畫 DDF001 情欲小說家 - CableAV',
|
||||
'description': '國產AV 480p, 720p 国产麻豆AV 叮叮映画 DDF001 情欲小说家',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._og_search_video_url(webpage, secure=False)
|
||||
|
||||
formats = self._extract_m3u8_formats(video_url, video_id, 'mp4')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
}
|
|
@ -16,7 +16,6 @@
|
|||
merge_dicts,
|
||||
multipart_encode,
|
||||
parse_duration,
|
||||
random_birthday,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
try_get,
|
||||
|
@ -63,38 +62,57 @@ class CDAIE(InfoExtractor):
|
|||
'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'crash404',
|
||||
'view_count': int,
|
||||
'average_rating': float,
|
||||
'duration': 137,
|
||||
'age_limit': 0,
|
||||
'upload_date': '20160220',
|
||||
'timestamp': 1455968218,
|
||||
}
|
||||
}, {
|
||||
# Age-restricted
|
||||
'url': 'http://www.cda.pl/video/1273454c4',
|
||||
# Age-restricted with vfilm redirection
|
||||
'url': 'https://www.cda.pl/video/8753244c4',
|
||||
'md5': 'd8eeb83d63611289507010d3df3bb8b3',
|
||||
'info_dict': {
|
||||
'id': '1273454c4',
|
||||
'id': '8753244c4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bronson (2008) napisy HD 1080p',
|
||||
'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c',
|
||||
'title': '[18+] Bez Filtra: Rezerwowe Psy czyli... najwulgarniejsza polska gra?',
|
||||
'description': 'md5:ae80bac31bd6a9f077a6cce03c7c077e',
|
||||
'height': 1080,
|
||||
'uploader': 'boniek61',
|
||||
'uploader': 'arhn eu',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 5554,
|
||||
'duration': 991,
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'average_rating': float,
|
||||
},
|
||||
'timestamp': 1633888264,
|
||||
'upload_date': '20211010',
|
||||
}
|
||||
}, {
|
||||
# Age-restricted without vfilm redirection
|
||||
'url': 'https://www.cda.pl/video/17028157b8',
|
||||
'md5': 'c1fe5ff4582bace95d4f0ce0fbd0f992',
|
||||
'info_dict': {
|
||||
'id': '17028157b8',
|
||||
'ext': 'mp4',
|
||||
'title': 'STENDUPY MICHAŁ OGIŃSKI',
|
||||
'description': 'md5:5851f3272bfc31f762d616040a1d609a',
|
||||
'height': 480,
|
||||
'uploader': 'oginski',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 18855,
|
||||
'age_limit': 18,
|
||||
'average_rating': float,
|
||||
'timestamp': 1699705901,
|
||||
'upload_date': '20231111',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://ebd.cda.pl/0x0/5749950c',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
|
||||
form_data = random_birthday('rok', 'miesiac', 'dzien')
|
||||
form_data.update({'return': url, 'module': 'video', 'module_id': video_id})
|
||||
data, content_type = multipart_encode(form_data)
|
||||
data, content_type = multipart_encode({'age_confirm': ''})
|
||||
return self._download_webpage(
|
||||
urljoin(url, '/a/validatebirth'), video_id, *args,
|
||||
url, video_id, *args,
|
||||
data=data, headers={
|
||||
'Referer': url,
|
||||
'Content-Type': content_type,
|
||||
|
@ -164,7 +182,7 @@ def _real_extract(self, url):
|
|||
if 'Authorization' in self._API_HEADERS:
|
||||
return self._api_extract(video_id)
|
||||
else:
|
||||
return self._web_extract(video_id, url)
|
||||
return self._web_extract(video_id)
|
||||
|
||||
def _api_extract(self, video_id):
|
||||
meta = self._download_json(
|
||||
|
@ -197,9 +215,9 @@ def _api_extract(self, video_id):
|
|||
'view_count': meta.get('views'),
|
||||
}
|
||||
|
||||
def _web_extract(self, video_id, url):
|
||||
def _web_extract(self, video_id):
|
||||
self._set_cookie('cda.pl', 'cda.player', 'html5')
|
||||
webpage = self._download_webpage(
|
||||
webpage, urlh = self._download_webpage_handle(
|
||||
f'{self._BASE_URL}/video/{video_id}/vfilm', video_id)
|
||||
|
||||
if 'Ten film jest dostępny dla użytkowników premium' in webpage:
|
||||
|
@ -209,10 +227,10 @@ def _web_extract(self, video_id, url):
|
|||
self.raise_geo_restricted()
|
||||
|
||||
need_confirm_age = False
|
||||
if self._html_search_regex(r'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")',
|
||||
if self._html_search_regex(r'(<button[^>]+name="[^"]*age_confirm[^"]*")',
|
||||
webpage, 'birthday validate form', default=None):
|
||||
webpage = self._download_age_confirm_page(
|
||||
url, video_id, note='Confirming age')
|
||||
urlh.url, video_id, note='Confirming age')
|
||||
need_confirm_age = True
|
||||
|
||||
formats = []
|
||||
|
@ -222,9 +240,6 @@ def _web_extract(self, video_id, url):
|
|||
(?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*?
|
||||
<(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
|
||||
''', webpage, 'uploader', default=None, group='uploader')
|
||||
view_count = self._search_regex(
|
||||
r'Odsłony:(?:\s| )*([0-9]+)', webpage,
|
||||
'view_count', default=None)
|
||||
average_rating = self._search_regex(
|
||||
(r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
|
||||
r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False,
|
||||
|
@ -235,7 +250,6 @@ def _web_extract(self, video_id, url):
|
|||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'uploader': uploader,
|
||||
'view_count': int_or_none(view_count),
|
||||
'average_rating': float_or_none(average_rating),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
|
|
|
@ -957,7 +957,8 @@ def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=
|
|||
if urlh is False:
|
||||
assert not fatal
|
||||
return False
|
||||
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding)
|
||||
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal,
|
||||
encoding=encoding, data=data)
|
||||
return (content, urlh)
|
||||
|
||||
@staticmethod
|
||||
|
@ -1005,13 +1006,15 @@ def __check_blocked(self, content):
|
|||
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
|
||||
expected=True)
|
||||
|
||||
def _request_dump_filename(self, url, video_id):
|
||||
basen = f'{video_id}_{url}'
|
||||
def _request_dump_filename(self, url, video_id, data=None):
|
||||
if data is not None:
|
||||
data = hashlib.md5(data).hexdigest()
|
||||
basen = join_nonempty(video_id, data, url, delim='_')
|
||||
trim_length = self.get_param('trim_file_name') or 240
|
||||
if len(basen) > trim_length:
|
||||
h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
|
||||
basen = basen[:trim_length - len(h)] + h
|
||||
filename = sanitize_filename(f'{basen}.dump', restricted=True)
|
||||
filename = sanitize_filename(f'{basen}.dump', self.get_param('keep_bad_win_chars', False), restricted=True)
|
||||
# Working around MAX_PATH limitation on Windows (see
|
||||
# http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
|
||||
if compat_os_name == 'nt':
|
||||
|
@ -1028,16 +1031,18 @@ def __decode_webpage(self, webpage_bytes, encoding, headers):
|
|||
except LookupError:
|
||||
return webpage_bytes.decode('utf-8', 'replace')
|
||||
|
||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
|
||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True,
|
||||
prefix=None, encoding=None, data=None):
|
||||
webpage_bytes = urlh.read()
|
||||
if prefix is not None:
|
||||
webpage_bytes = prefix + webpage_bytes
|
||||
url_or_request = self._create_request(url_or_request, data)
|
||||
if self.get_param('dump_intermediate_pages', False):
|
||||
self.to_screen('Dumping request to ' + urlh.url)
|
||||
dump = base64.b64encode(webpage_bytes).decode('ascii')
|
||||
self._downloader.to_screen(dump)
|
||||
if self.get_param('write_pages'):
|
||||
filename = self._request_dump_filename(urlh.url, video_id)
|
||||
filename = self._request_dump_filename(urlh.url, video_id, url_or_request.data)
|
||||
self.to_screen(f'Saving request to {filename}')
|
||||
with open(filename, 'wb') as outf:
|
||||
outf.write(webpage_bytes)
|
||||
|
@ -1098,7 +1103,7 @@ def download_content(self, url_or_request, video_id, note=note, errnote=errnote,
|
|||
impersonate=None, require_impersonation=False):
|
||||
if self.get_param('load_pages'):
|
||||
url_or_request = self._create_request(url_or_request, data, headers, query)
|
||||
filename = self._request_dump_filename(url_or_request.url, video_id)
|
||||
filename = self._request_dump_filename(url_or_request.url, video_id, url_or_request.data)
|
||||
self.to_screen(f'Loading request from {filename}')
|
||||
try:
|
||||
with open(filename, 'rb') as dumpf:
|
||||
|
|
|
@ -1,105 +0,0 @@
|
|||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
get_elements_by_class,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class EinthusanIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<host>einthusan\.(?:tv|com|ca))/movie/watch/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://einthusan.tv/movie/watch/9097/',
|
||||
'md5': 'ff0f7f2065031b8a2cf13a933731c035',
|
||||
'info_dict': {
|
||||
'id': '9097',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ae Dil Hai Mushkil',
|
||||
'description': 'md5:33ef934c82a671a94652a9b4e54d931b',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://einthusan.com/movie/watch/9097/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://einthusan.ca/movie/watch/4E9n/?lang=hindi',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
|
||||
def _decrypt(self, encrypted_data, video_id):
|
||||
return self._parse_json(compat_b64decode((
|
||||
encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1]
|
||||
)).decode('utf-8'), video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
host = mobj.group('host')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(r'<h3>([^<]+)</h3>', webpage, 'title')
|
||||
|
||||
player_params = extract_attributes(self._search_regex(
|
||||
r'(<section[^>]+id="UIVideoPlayer"[^>]+>)', webpage, 'player parameters'))
|
||||
|
||||
page_id = self._html_search_regex(
|
||||
'<html[^>]+data-pageid="([^"]+)"', webpage, 'page ID')
|
||||
video_data = self._download_json(
|
||||
'https://%s/ajax/movie/watch/%s/' % (host, video_id), video_id,
|
||||
data=urlencode_postdata({
|
||||
'xEvent': 'UIVideoPlayer.PingOutcome',
|
||||
'xJson': json.dumps({
|
||||
'EJOutcomes': player_params['data-ejpingables'],
|
||||
'NativeHLS': False
|
||||
}),
|
||||
'arcVersion': 3,
|
||||
'appVersion': 59,
|
||||
'gorilla.csrf.Token': page_id,
|
||||
}))['Data']
|
||||
|
||||
if isinstance(video_data, compat_str) and video_data.startswith('/ratelimited/'):
|
||||
raise ExtractorError(
|
||||
'Download rate reached. Please try again later.', expected=True)
|
||||
|
||||
ej_links = self._decrypt(video_data['EJLinks'], video_id)
|
||||
|
||||
formats = []
|
||||
|
||||
m3u8_url = ej_links.get('HLSLink')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native'))
|
||||
|
||||
mp4_url = ej_links.get('MP4Link')
|
||||
if mp4_url:
|
||||
formats.append({
|
||||
'url': mp4_url,
|
||||
})
|
||||
|
||||
description = get_elements_by_class('synopsis', webpage)[0]
|
||||
thumbnail = self._html_search_regex(
|
||||
r'''<img[^>]+src=(["'])(?P<url>(?!\1).+?/moviecovers/(?!\1).+?)\1''',
|
||||
webpage, 'thumbnail url', fatal=False, group='url')
|
||||
if thumbnail is not None:
|
||||
thumbnail = compat_urlparse.urljoin(url, thumbnail)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
}
|
|
@ -16,13 +16,31 @@ class EplusIbIE(InfoExtractor):
|
|||
_VALID_URL = [r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)',
|
||||
r'https?://live\.eplus\.jp/(?P<id>sample|\d+)']
|
||||
_TESTS = [{
|
||||
'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D',
|
||||
'url': 'https://live.eplus.jp/ex/player?ib=41K6Wzbr3PlcMD%2FOKHFlC%2FcZCe2Eaw7FK%2BpJS1ooUHki8d0vGSy2mYqxillQBe1dSnOxU%2B8%2FzXKls4XPBSb3vw%3D%3D',
|
||||
'info_dict': {
|
||||
'id': '354502-0001-002',
|
||||
'title': 'LoveLive!Series Presents COUNTDOWN LoveLive! 2021→2022~LIVE with a smile!~【Streaming+(配信)】',
|
||||
'id': '335699-0001-006',
|
||||
'title': '少女☆歌劇 レヴュースタァライト -The LIVE 青嵐- BLUE GLITTER <定点映像配信>【Streaming+(配信)】',
|
||||
'live_status': 'was_live',
|
||||
'release_date': '20211231',
|
||||
'release_timestamp': 1640952000,
|
||||
'release_date': '20201221',
|
||||
'release_timestamp': 1608544800,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'ignore_no_formats_error': True,
|
||||
},
|
||||
'expected_warnings': [
|
||||
'This event may not be accessible',
|
||||
'No video formats found',
|
||||
'Requested format is not available',
|
||||
],
|
||||
}, {
|
||||
'url': 'https://live.eplus.jp/ex/player?ib=6QSsQdyRAwOFZrEHWlhRm7vocgV%2FO0YzBZ%2BaBEBg1XR%2FmbLn0R%2F048dUoAY038%2F%2F92MJ73BsoAtvUpbV6RLtDQ%3D%3D&show_id=2371511',
|
||||
'info_dict': {
|
||||
'id': '348021-0054-001',
|
||||
'title': 'ラブライブ!スーパースター!! Liella! First LoveLive! Tour ~Starlines~【東京/DAY.1】',
|
||||
'live_status': 'was_live',
|
||||
'release_date': '20220115',
|
||||
'release_timestamp': 1642233600,
|
||||
'description': str,
|
||||
},
|
||||
'params': {
|
||||
|
@ -124,6 +142,10 @@ def _real_extract(self, url):
|
|||
if data_json.get('drm_mode') == 'ON':
|
||||
self.report_drm(video_id)
|
||||
|
||||
if data_json.get('is_pass_ticket') == 'YES':
|
||||
raise ExtractorError(
|
||||
'This URL is for a pass ticket instead of a player page', expected=True)
|
||||
|
||||
delivery_status = data_json.get('delivery_status')
|
||||
archive_mode = data_json.get('archive_mode')
|
||||
release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import compat_parse_qs
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
bug_reports_message,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
|
@ -38,6 +40,17 @@ class GoogleDriveIE(InfoExtractor):
|
|||
'duration': 45,
|
||||
'thumbnail': 'https://drive.google.com/thumbnail?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
||||
}
|
||||
}, {
|
||||
# has itag 50 which is not in YoutubeIE._formats (royalty Free music from 1922)
|
||||
'url': 'https://drive.google.com/uc?id=1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
|
||||
'md5': '322db8d63dd19788c04050a4bba67073',
|
||||
'info_dict': {
|
||||
'id': '1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
|
||||
'ext': 'mp3',
|
||||
'title': 'My Buddy - Henry Burr - Gus Kahn - Walter Donaldson.mp3',
|
||||
'duration': 184,
|
||||
'thumbnail': 'https://drive.google.com/thumbnail?id=1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
|
||||
},
|
||||
}, {
|
||||
# video can't be watched anonymously due to view count limit reached,
|
||||
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
|
||||
|
@ -58,22 +71,8 @@ class GoogleDriveIE(InfoExtractor):
|
|||
'only_matching': True,
|
||||
}]
|
||||
_FORMATS_EXT = {
|
||||
'5': 'flv',
|
||||
'6': 'flv',
|
||||
'13': '3gp',
|
||||
'17': '3gp',
|
||||
'18': 'mp4',
|
||||
'22': 'mp4',
|
||||
'34': 'flv',
|
||||
'35': 'flv',
|
||||
'36': '3gp',
|
||||
'37': 'mp4',
|
||||
'38': 'mp4',
|
||||
'43': 'webm',
|
||||
'44': 'webm',
|
||||
'45': 'webm',
|
||||
'46': 'webm',
|
||||
'59': 'mp4',
|
||||
**{k: v['ext'] for k, v in YoutubeIE._formats.items() if v.get('ext')},
|
||||
'50': 'm4a',
|
||||
}
|
||||
_BASE_URL_CAPTIONS = 'https://drive.google.com/timedtext'
|
||||
_CAPTIONS_ENTRY_TAG = {
|
||||
|
@ -194,10 +193,13 @@ def get_value(key):
|
|||
if len(fmt_stream_split) < 2:
|
||||
continue
|
||||
format_id, format_url = fmt_stream_split[:2]
|
||||
ext = self._FORMATS_EXT.get(format_id)
|
||||
if not ext:
|
||||
self.report_warning(f'Unknown format {format_id}{bug_reports_message()}')
|
||||
f = {
|
||||
'url': lowercase_escape(format_url),
|
||||
'format_id': format_id,
|
||||
'ext': self._FORMATS_EXT[format_id],
|
||||
'ext': ext,
|
||||
}
|
||||
resolution = resolutions.get(format_id)
|
||||
if resolution:
|
||||
|
|
|
@ -1,103 +0,0 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
InAdvancePagedList,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class JableIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?jable\.tv/videos/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://jable.tv/videos/pppd-812/',
|
||||
'md5': 'f1537283a9bc073c31ff86ca35d9b2a6',
|
||||
'info_dict': {
|
||||
'id': 'pppd-812',
|
||||
'ext': 'mp4',
|
||||
'title': 'PPPD-812 只要表現好巨乳女教師吉根柚莉愛就獎勵學生們在白虎穴內射出精液',
|
||||
'description': 'md5:5b6d4199a854f62c5e56e26ccad19967',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://jable.tv/videos/apak-220/',
|
||||
'md5': '71f9239d69ced58ab74a816908847cc1',
|
||||
'info_dict': {
|
||||
'id': 'apak-220',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:5c3861b7cf80112a6e2b70bccf170824',
|
||||
'description': '',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'upload_date': '20220319',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
formats = self._extract_m3u8_formats(
|
||||
self._search_regex(r'var\s+hlsUrl\s*=\s*\'([^\']+)', webpage, 'hls_url'), video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage, default=''),
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
'upload_date': unified_strdate(self._search_regex(
|
||||
r'class="inactive-color">\D+\s+(\d{4}-\d+-\d+)', webpage, 'upload_date', default=None)),
|
||||
'view_count': int_or_none(self._search_regex(
|
||||
r'#icon-eye"></use></svg>\n*<span class="mr-3">([\d ]+)',
|
||||
webpage, 'view_count', default='').replace(' ', '')),
|
||||
'like_count': int_or_none(self._search_regex(
|
||||
r'#icon-heart"></use></svg><span class="count">(\d+)', webpage, 'link_count', default=None)),
|
||||
}
|
||||
|
||||
|
||||
class JablePlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?jable\.tv/(?:categories|models|tags)/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://jable.tv/models/kaede-karen/',
|
||||
'info_dict': {
|
||||
'id': 'kaede-karen',
|
||||
'title': '楓カレン',
|
||||
},
|
||||
'playlist_count': 34,
|
||||
}, {
|
||||
'url': 'https://jable.tv/categories/roleplay/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://jable.tv/tags/girl/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
def page_func(page_num):
|
||||
return [
|
||||
self.url_result(player_url, JableIE)
|
||||
for player_url in orderedSet(re.findall(
|
||||
r'href="(https://jable.tv/videos/[\w-]+/?)"',
|
||||
self._download_webpage(url, playlist_id, query={
|
||||
'mode': 'async',
|
||||
'from': page_num + 1,
|
||||
'function': 'get_block',
|
||||
'block_id': 'list_videos_common_videos_list',
|
||||
}, note=f'Downloading page {page_num + 1}')))]
|
||||
|
||||
return self.playlist_result(
|
||||
InAdvancePagedList(page_func, int_or_none(self._search_regex(
|
||||
r'from:(\d+)">[^<]+\s*»', webpage, 'last page number', default=1)), 24),
|
||||
playlist_id, self._search_regex(
|
||||
r'<h2 class="h3-md mb-1">([^<]+)', webpage, 'playlist title', default=None))
|
|
@ -1,95 +0,0 @@
|
|||
import urllib.parse
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
remove_end,
|
||||
unified_strdate,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class Porn91IE(InfoExtractor):
|
||||
IE_NAME = '91porn'
|
||||
_VALID_URL = r'(?:https?://)(?:www\.|)91porn\.com/view_video.php\?([^#]+&)?viewkey=(?P<id>\w+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134',
|
||||
'md5': 'd869db281402e0ef4ddef3c38b866f86',
|
||||
'info_dict': {
|
||||
'id': '7e42283b4f5ab36da134',
|
||||
'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!',
|
||||
'description': 'md5:1ff241f579b07ae936a54e810ad2e891',
|
||||
'ext': 'mp4',
|
||||
'duration': 431,
|
||||
'upload_date': '20150520',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://91porn.com/view_video.php?viewkey=7ef0cf3d362c699ab91c',
|
||||
'md5': 'f8fd50540468a6d795378cd778b40226',
|
||||
'info_dict': {
|
||||
'id': '7ef0cf3d362c699ab91c',
|
||||
'title': '真实空乘,冲上云霄第二部',
|
||||
'description': 'md5:618bf9652cafcc66cd277bd96789baea',
|
||||
'ext': 'mp4',
|
||||
'duration': 248,
|
||||
'upload_date': '20221119',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
self._set_cookie('91porn.com', 'language', 'cn_CN')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://91porn.com/view_video.php?viewkey=%s' % video_id, video_id)
|
||||
|
||||
if '视频不存在,可能已经被删除或者被举报为不良内容!' in webpage:
|
||||
raise ExtractorError('91 Porn says: Video does not exist', expected=True)
|
||||
|
||||
daily_limit = self._search_regex(
|
||||
r'作为游客,你每天只可观看([\d]+)个视频', webpage, 'exceeded daily limit', default=None, fatal=False)
|
||||
if daily_limit:
|
||||
raise ExtractorError(f'91 Porn says: Daily limit {daily_limit} videos exceeded', expected=True)
|
||||
|
||||
video_link_url = self._search_regex(
|
||||
r'document\.write\(\s*strencode2\s*\(\s*((?:"[^"]+")|(?:\'[^\']+\'))', webpage, 'video link')
|
||||
video_link_url = self._search_regex(
|
||||
r'src=["\']([^"\']+)["\']', urllib.parse.unquote(video_link_url), 'unquoted video link')
|
||||
|
||||
formats, subtitles = self._get_formats_and_subtitle(video_link_url, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': remove_end(self._html_extract_title(webpage).replace('\n', ''), 'Chinese homemade video').strip(),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'upload_date': unified_strdate(self._search_regex(
|
||||
r'<span\s+class=["\']title-yakov["\']>(\d{4}-\d{2}-\d{2})</span>', webpage, 'upload_date', fatal=False)),
|
||||
'description': self._html_search_regex(
|
||||
r'<span\s+class=["\']more title["\']>\s*([^<]+)', webpage, 'description', fatal=False),
|
||||
'duration': parse_duration(self._search_regex(
|
||||
r'时长:\s*<span[^>]*>\s*(\d+(?::\d+){1,2})', webpage, 'duration', fatal=False)),
|
||||
'comment_count': int_or_none(self._search_regex(
|
||||
r'留言:\s*<span[^>]*>\s*(\d+)\s*</span>', webpage, 'comment count', fatal=False)),
|
||||
'view_count': int_or_none(self._search_regex(
|
||||
r'热度:\s*<span[^>]*>\s*(\d+)\s*</span>', webpage, 'view count', fatal=False)),
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
||||
def _get_formats_and_subtitle(self, video_link_url, video_id):
|
||||
ext = determine_ext(video_link_url)
|
||||
if ext == 'm3u8':
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_link_url, video_id, ext='mp4')
|
||||
else:
|
||||
formats = [{'url': video_link_url, 'ext': ext}]
|
||||
subtitles = {}
|
||||
|
||||
return formats, subtitles
|
|
@ -97,7 +97,7 @@ def is_logged(webpage):
|
|||
login_form = self._hidden_inputs(login_page)
|
||||
|
||||
login_form.update({
|
||||
'username': username,
|
||||
'email': username,
|
||||
'password': password,
|
||||
})
|
||||
|
||||
|
|
|
@ -45,19 +45,18 @@ class TikTokBaseIE(InfoExtractor):
|
|||
# "app id": aweme = 1128, trill = 1180, musical_ly = 1233, universal = 0
|
||||
'aid': '0',
|
||||
}
|
||||
_KNOWN_APP_INFO = [
|
||||
'7351144126450059040',
|
||||
'7351149742343391009',
|
||||
'7351153174894626592',
|
||||
]
|
||||
_APP_INFO_POOL = None
|
||||
_APP_INFO = None
|
||||
_APP_USER_AGENT = None
|
||||
|
||||
@property
|
||||
def _KNOWN_APP_INFO(self):
|
||||
return self._configuration_arg('app_info', ie_key=TikTokIE)
|
||||
|
||||
@property
|
||||
def _API_HOSTNAME(self):
|
||||
return self._configuration_arg(
|
||||
'api_hostname', ['api22-normal-c-useast2a.tiktokv.com'], ie_key=TikTokIE)[0]
|
||||
'api_hostname', ['api16-normal-c-useast1a.tiktokv.com'], ie_key=TikTokIE)[0]
|
||||
|
||||
def _get_next_app_info(self):
|
||||
if self._APP_INFO_POOL is None:
|
||||
|
@ -66,13 +65,10 @@ def _get_next_app_info(self):
|
|||
for key, default in self._APP_INFO_DEFAULTS.items()
|
||||
if key != 'iid'
|
||||
}
|
||||
app_info_list = (
|
||||
self._configuration_arg('app_info', ie_key=TikTokIE)
|
||||
or random.sample(self._KNOWN_APP_INFO, len(self._KNOWN_APP_INFO)))
|
||||
self._APP_INFO_POOL = [
|
||||
{**defaults, **dict(
|
||||
(k, v) for k, v in zip(self._APP_INFO_DEFAULTS, app_info.split('/')) if v
|
||||
)} for app_info in app_info_list
|
||||
)} for app_info in self._KNOWN_APP_INFO
|
||||
]
|
||||
|
||||
if not self._APP_INFO_POOL:
|
||||
|
@ -757,11 +753,13 @@ class TikTokIE(TikTokBaseIE):
|
|||
|
||||
def _real_extract(self, url):
|
||||
video_id, user_id = self._match_valid_url(url).group('id', 'user_id')
|
||||
try:
|
||||
return self._extract_aweme_app(video_id)
|
||||
except ExtractorError as e:
|
||||
e.expected = True
|
||||
self.report_warning(f'{e}; trying with webpage')
|
||||
|
||||
if self._KNOWN_APP_INFO:
|
||||
try:
|
||||
return self._extract_aweme_app(video_id)
|
||||
except ExtractorError as e:
|
||||
e.expected = True
|
||||
self.report_warning(f'{e}; trying with webpage')
|
||||
|
||||
url = self._create_url(user_id, video_id)
|
||||
webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'Mozilla/5.0'})
|
||||
|
|
|
@ -36,7 +36,7 @@ class TwitterBaseIE(InfoExtractor):
|
|||
_NETRC_MACHINE = 'twitter'
|
||||
_API_BASE = 'https://api.twitter.com/1.1/'
|
||||
_GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
|
||||
_BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
|
||||
_BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:(?:twitter|x)\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
|
||||
_AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
|
||||
_LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
|
||||
_flow_token = None
|
||||
|
@ -1191,6 +1191,31 @@ class TwitterIE(TwitterBaseIE):
|
|||
'age_limit': 0,
|
||||
'_old_archive_ids': ['twitter 1724884212803834154'],
|
||||
},
|
||||
}, {
|
||||
# x.com
|
||||
'url': 'https://x.com/historyinmemes/status/1790637656616943991',
|
||||
'md5': 'daca3952ba0defe2cfafb1276d4c1ea5',
|
||||
'info_dict': {
|
||||
'id': '1790637589910654976',
|
||||
'ext': 'mp4',
|
||||
'title': 'Historic Vids - One of the most intense moments in history',
|
||||
'description': 'One of the most intense moments in history https://t.co/Zgzhvix8ES',
|
||||
'display_id': '1790637656616943991',
|
||||
'uploader': 'Historic Vids',
|
||||
'uploader_id': 'historyinmemes',
|
||||
'uploader_url': 'https://twitter.com/historyinmemes',
|
||||
'channel_id': '855481986290524160',
|
||||
'upload_date': '20240515',
|
||||
'timestamp': 1715756260.0,
|
||||
'duration': 15.488,
|
||||
'tags': [],
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
|
||||
'age_limit': 0,
|
||||
'_old_archive_ids': ['twitter 1790637656616943991'],
|
||||
}
|
||||
}, {
|
||||
# onion route
|
||||
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
|
||||
|
|
|
@ -173,6 +173,20 @@ class KnownPiracyIE(UnsupportedInfoExtractor):
|
|||
r'filemoon\.sx',
|
||||
r'hentai\.animestigma\.com',
|
||||
r'thisav\.com',
|
||||
r'gounlimited\.to',
|
||||
r'highstream\.tv',
|
||||
r'uqload\.com',
|
||||
r'vedbam\.xyz',
|
||||
r'vadbam\.net'
|
||||
r'vidlo\.us',
|
||||
r'wolfstream\.tv',
|
||||
r'xvideosharing\.com',
|
||||
r'(?:\w+\.)?viidshar\.com',
|
||||
r'sxyprn\.com',
|
||||
r'jable\.tv',
|
||||
r'91porn\.com',
|
||||
r'einthusan\.(?:tv|com|ca)',
|
||||
r'yourupload\.com',
|
||||
)
|
||||
|
||||
_TESTS = [{
|
||||
|
|
|
@ -1,198 +0,0 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
decode_packed_codes,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
# based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58
|
||||
def aa_decode(aa_code):
|
||||
symbol_table = [
|
||||
('7', '((゚ー゚) + (o^_^o))'),
|
||||
('6', '((o^_^o) +(o^_^o))'),
|
||||
('5', '((゚ー゚) + (゚Θ゚))'),
|
||||
('2', '((o^_^o) - (゚Θ゚))'),
|
||||
('4', '(゚ー゚)'),
|
||||
('3', '(o^_^o)'),
|
||||
('1', '(゚Θ゚)'),
|
||||
('0', '(c^_^o)'),
|
||||
]
|
||||
delim = '(゚Д゚)[゚ε゚]+'
|
||||
ret = ''
|
||||
for aa_char in aa_code.split(delim):
|
||||
for val, pat in symbol_table:
|
||||
aa_char = aa_char.replace(pat, val)
|
||||
aa_char = aa_char.replace('+ ', '')
|
||||
m = re.match(r'^\d+', aa_char)
|
||||
if m:
|
||||
ret += chr(int(m.group(0), 8))
|
||||
else:
|
||||
m = re.match(r'^u([\da-f]+)', aa_char)
|
||||
if m:
|
||||
ret += chr(int(m.group(1), 16))
|
||||
return ret
|
||||
|
||||
|
||||
class XFileShareIE(InfoExtractor):
|
||||
_SITES = (
|
||||
(r'aparat\.cam', 'Aparat'),
|
||||
(r'clipwatching\.com', 'ClipWatching'),
|
||||
(r'gounlimited\.to', 'GoUnlimited'),
|
||||
(r'govid\.me', 'GoVid'),
|
||||
(r'holavid\.com', 'HolaVid'),
|
||||
(r'streamty\.com', 'Streamty'),
|
||||
(r'thevideobee\.to', 'TheVideoBee'),
|
||||
(r'uqload\.com', 'Uqload'),
|
||||
(r'vidbom\.com', 'VidBom'),
|
||||
(r'vidlo\.us', 'vidlo'),
|
||||
(r'vidlocker\.xyz', 'VidLocker'),
|
||||
(r'vidshare\.tv', 'VidShare'),
|
||||
(r'vup\.to', 'VUp'),
|
||||
(r'wolfstream\.tv', 'WolfStream'),
|
||||
(r'xvideosharing\.com', 'XVideoSharing'),
|
||||
)
|
||||
|
||||
IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
|
||||
_VALID_URL = (r'https?://(?:www\.)?(?P<host>%s)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
|
||||
% '|'.join(site for site in list(zip(*_SITES))[0]))
|
||||
_EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1' % '|'.join(site for site in list(zip(*_SITES))[0])]
|
||||
|
||||
_FILE_NOT_FOUND_REGEXES = (
|
||||
r'>(?:404 - )?File Not Found<',
|
||||
r'>The file was removed by administrator<',
|
||||
)
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://uqload.com/dltx1wztngdz',
|
||||
'md5': '3cfbb65e4c90e93d7b37bcb65a595557',
|
||||
'info_dict': {
|
||||
'id': 'dltx1wztngdz',
|
||||
'ext': 'mp4',
|
||||
'title': 'Rick Astley Never Gonna Give You mp4',
|
||||
'thumbnail': r're:https://.*\.jpg'
|
||||
}
|
||||
}, {
|
||||
'url': 'http://xvideosharing.com/fq65f94nd2ve',
|
||||
'md5': '4181f63957e8fe90ac836fa58dc3c8a6',
|
||||
'info_dict': {
|
||||
'id': 'fq65f94nd2ve',
|
||||
'ext': 'mp4',
|
||||
'title': 'sample',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://aparat.cam/n4d6dh0wvlpr',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://wolfstream.tv/nthme29v9u2x',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
url = 'https://%s/' % host + ('embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES):
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
fields = self._hidden_inputs(webpage)
|
||||
|
||||
if fields.get('op') == 'download1':
|
||||
countdown = int_or_none(self._search_regex(
|
||||
r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>',
|
||||
webpage, 'countdown', default=None))
|
||||
if countdown:
|
||||
self._sleep(countdown, video_id)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, 'Downloading video page',
|
||||
data=urlencode_postdata(fields), headers={
|
||||
'Referer': url,
|
||||
'Content-type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
|
||||
title = (self._search_regex(
|
||||
(r'style="z-index: [0-9]+;">([^<]+)</span>',
|
||||
r'<td nowrap>([^<]+)</td>',
|
||||
r'h4-fine[^>]*>([^<]+)<',
|
||||
r'>Watch (.+)[ <]',
|
||||
r'<h2 class="video-page-head">([^<]+)</h2>',
|
||||
r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<', # streamin.to
|
||||
r'title\s*:\s*"([^"]+)"'), # govid.me
|
||||
webpage, 'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or video_id).strip()
|
||||
|
||||
for regex, func in (
|
||||
(r'(eval\(function\(p,a,c,k,e,d\){.+)', decode_packed_codes),
|
||||
(r'(゚.+)', aa_decode)):
|
||||
obf_code = self._search_regex(regex, webpage, 'obfuscated code', default=None)
|
||||
if obf_code:
|
||||
webpage = webpage.replace(obf_code, func(obf_code))
|
||||
|
||||
formats = []
|
||||
|
||||
jwplayer_data = self._search_regex(
|
||||
[
|
||||
r'jwplayer\("[^"]+"\)\.load\(\[({.+?})\]\);',
|
||||
r'jwplayer\("[^"]+"\)\.setup\(({.+?})\);',
|
||||
], webpage,
|
||||
'jwplayer data', default=None)
|
||||
if jwplayer_data:
|
||||
jwplayer_data = self._parse_json(
|
||||
jwplayer_data.replace(r"\'", "'"), video_id, js_to_json)
|
||||
if jwplayer_data:
|
||||
formats = self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, False,
|
||||
m3u8_id='hls', mpd_id='dash')['formats']
|
||||
|
||||
if not formats:
|
||||
urls = []
|
||||
for regex in (
|
||||
r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
|
||||
r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
|
||||
r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
|
||||
r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'):
|
||||
for mobj in re.finditer(regex, webpage):
|
||||
video_url = mobj.group('url')
|
||||
if video_url not in urls:
|
||||
urls.append(video_url)
|
||||
|
||||
sources = self._search_regex(
|
||||
r'sources\s*:\s*(\[(?!{)[^\]]+\])', webpage, 'sources', default=None)
|
||||
if sources:
|
||||
urls.extend(self._parse_json(sources, video_id))
|
||||
|
||||
formats = []
|
||||
for video_url in urls:
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': 'sd',
|
||||
})
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
[
|
||||
r'<video[^>]+poster="([^"]+)"',
|
||||
r'(?:image|poster)\s*:\s*["\'](http[^"\']+)["\'],',
|
||||
], webpage, 'thumbnail', default=None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
'http_headers': {'Referer': url}
|
||||
}
|
|
@ -173,8 +173,41 @@ def _real_extract(self, url):
|
|||
|
||||
class XVideosQuickiesIE(InfoExtractor):
|
||||
IE_NAME = 'xvideos:quickies'
|
||||
_VALID_URL = r'https?://(?P<domain>(?:[^/]+\.)?xvideos2?\.com)/amateur-channels/[^#]+#quickies/a/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?P<domain>(?:[^/?#]+\.)?xvideos2?\.com)/(?:profiles/|amateur-channels/)?[^/?#]+#quickies/a/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.xvideos.com/lili_love#quickies/a/ipdtikh1a4c',
|
||||
'md5': 'f9e4f518ff1de14b99a400bbd0fc5ee0',
|
||||
'info_dict': {
|
||||
'id': 'ipdtikh1a4c',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mexican chichóna putisima',
|
||||
'age_limit': 18,
|
||||
'duration': 81,
|
||||
'thumbnail': r're:^https://cdn.*-pic.xvideos-cdn.com/.+\.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.xvideos.com/profiles/lili_love#quickies/a/ipphaob6fd1',
|
||||
'md5': '5340938aac6b46e19ebdd1d84535862e',
|
||||
'info_dict': {
|
||||
'id': 'ipphaob6fd1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Puta chichona mexicana squirting',
|
||||
'age_limit': 18,
|
||||
'duration': 56,
|
||||
'thumbnail': r're:^https://cdn.*-pic.xvideos-cdn.com/.+\.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.xvideos.com/amateur-channels/lili_love#quickies/a/hfmffmd7661',
|
||||
'md5': '92428518bbabcb4c513e55922e022491',
|
||||
'info_dict': {
|
||||
'id': 'hfmffmd7661',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chichona mexican slut',
|
||||
'age_limit': 18,
|
||||
'duration': 9,
|
||||
'thumbnail': r're:^https://cdn.*-pic.xvideos-cdn.com/.+\.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.xvideos.com/amateur-channels/wifeluna#quickies/a/47258683',
|
||||
'md5': '16e322a93282667f1963915568f782c1',
|
||||
'info_dict': {
|
||||
|
@ -189,4 +222,4 @@ class XVideosQuickiesIE(InfoExtractor):
|
|||
|
||||
def _real_extract(self, url):
|
||||
domain, id_ = self._match_valid_url(url).group('domain', 'id')
|
||||
return self.url_result(f'https://{domain}/video{id_}/_', XVideosIE, id_)
|
||||
return self.url_result(f'https://{domain}/video{"" if id_.isdecimal() else "."}{id_}/_', XVideosIE, id_)
|
||||
|
|
|
@ -1,65 +0,0 @@
|
|||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class YourPornIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?sxyprn\.com/post/(?P<id>[^/?#&.]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://sxyprn.com/post/57ffcb2e1179b.html',
|
||||
'md5': '6f8682b6464033d87acaa7a8ff0c092e',
|
||||
'info_dict': {
|
||||
'id': '57ffcb2e1179b',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:c9f43630bd968267672651ba905a7d35',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 165,
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://sxyprn.com/post/57ffcb2e1179b.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
parts = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info',
|
||||
group='data'),
|
||||
video_id)[video_id].split('/')
|
||||
|
||||
num = 0
|
||||
for c in parts[6] + parts[7]:
|
||||
if c.isnumeric():
|
||||
num += int(c)
|
||||
parts[5] = compat_str(int(parts[5]) - num)
|
||||
parts[1] += '8'
|
||||
video_url = urljoin(url, '/'.join(parts))
|
||||
|
||||
title = (self._search_regex(
|
||||
r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title',
|
||||
default=None) or self._og_search_description(webpage)).strip()
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'duration\s*:\s*<[^>]+>([\d:]+)', webpage, 'duration',
|
||||
default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'age_limit': 18,
|
||||
'ext': 'mp4',
|
||||
}
|
|
@ -1,43 +0,0 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import urljoin
|
||||
|
||||
|
||||
class YourUploadIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:yourupload\.com/(?:watch|embed)|embed\.yourupload\.com)/(?P<id>[A-Za-z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://yourupload.com/watch/14i14h',
|
||||
'md5': '5e2c63385454c557f97c4c4131a393cd',
|
||||
'info_dict': {
|
||||
'id': '14i14h',
|
||||
'ext': 'mp4',
|
||||
'title': 'BigBuckBunny_320x180.mp4',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.yourupload.com/embed/14i14h',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://embed.yourupload.com/14i14h',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
embed_url = 'http://www.yourupload.com/embed/%s' % video_id
|
||||
|
||||
webpage = self._download_webpage(embed_url, video_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
video_url = urljoin(embed_url, self._og_search_video_url(webpage))
|
||||
thumbnail = self._og_search_thumbnail(webpage, default=None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'http_headers': {
|
||||
'Referer': embed_url,
|
||||
},
|
||||
}
|
|
@ -240,6 +240,16 @@
|
|||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 85
|
||||
},
|
||||
# This client has pre-merged video+audio 720p/1080p streams
|
||||
'mediaconnect': {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'MEDIA_CONNECT_FRONTEND',
|
||||
'clientVersion': '0.1',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 95
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
|
@ -1171,7 +1181,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
|
||||
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
|
||||
)
|
||||
_formats = {
|
||||
_formats = { # NB: Used in YoutubeWebArchiveIE and GoogleDriveIE
|
||||
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
|
||||
'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
|
||||
'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
|
||||
|
@ -2343,6 +2353,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
'format': '17', # 3gp format available on android
|
||||
'extractor_args': {'youtube': {'player_client': ['android']}},
|
||||
},
|
||||
'skip': 'android client broken',
|
||||
},
|
||||
{
|
||||
# Skip download of additional client configs (remix client config in this case)
|
||||
|
@ -2720,7 +2731,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
'heatmap': 'count:100',
|
||||
},
|
||||
'params': {
|
||||
'extractor_args': {'youtube': {'player_client': ['android'], 'player_skip': ['webpage']}},
|
||||
'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}},
|
||||
},
|
||||
},
|
||||
]
|
||||
|
@ -3307,7 +3318,36 @@ def _extract_heatmap(self, data):
|
|||
'value': ('intensityScoreNormalized', {float_or_none}),
|
||||
})) or None
|
||||
|
||||
def _extract_comment(self, comment_renderer, parent=None):
|
||||
def _extract_comment(self, entities, parent=None):
|
||||
comment_entity_payload = get_first(entities, ('payload', 'commentEntityPayload', {dict}))
|
||||
if not (comment_id := traverse_obj(comment_entity_payload, ('properties', 'commentId', {str}))):
|
||||
return
|
||||
|
||||
toolbar_entity_payload = get_first(entities, ('payload', 'engagementToolbarStateEntityPayload', {dict}))
|
||||
time_text = traverse_obj(comment_entity_payload, ('properties', 'publishedTime', {str})) or ''
|
||||
|
||||
return {
|
||||
'id': comment_id,
|
||||
'parent': parent or 'root',
|
||||
**traverse_obj(comment_entity_payload, {
|
||||
'text': ('properties', 'content', 'content', {str}),
|
||||
'like_count': ('toolbar', 'likeCountA11y', {parse_count}),
|
||||
'author_id': ('author', 'channelId', {self.ucid_or_none}),
|
||||
'author': ('author', 'displayName', {str}),
|
||||
'author_thumbnail': ('author', 'avatarThumbnailUrl', {url_or_none}),
|
||||
'author_is_uploader': ('author', 'isCreator', {bool}),
|
||||
'author_is_verified': ('author', 'isVerified', {bool}),
|
||||
'author_url': ('author', 'channelCommand', 'innertubeCommand', (
|
||||
('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url')
|
||||
), {lambda x: urljoin('https://www.youtube.com', x)}),
|
||||
}, get_all=False),
|
||||
'is_favorited': (None if toolbar_entity_payload is None else
|
||||
toolbar_entity_payload.get('heartState') == 'TOOLBAR_HEART_STATE_HEARTED'),
|
||||
'_time_text': time_text, # FIXME: non-standard, but we need a way of showing that it is an estimate.
|
||||
'timestamp': self._parse_time_text(time_text),
|
||||
}
|
||||
|
||||
def _extract_comment_old(self, comment_renderer, parent=None):
|
||||
comment_id = comment_renderer.get('commentId')
|
||||
if not comment_id:
|
||||
return
|
||||
|
@ -3388,21 +3428,39 @@ def extract_header(contents):
|
|||
break
|
||||
return _continuation
|
||||
|
||||
def extract_thread(contents):
|
||||
def extract_thread(contents, entity_payloads):
|
||||
if not parent:
|
||||
tracker['current_page_thread'] = 0
|
||||
for content in contents:
|
||||
if not parent and tracker['total_parent_comments'] >= max_parents:
|
||||
yield
|
||||
comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
|
||||
comment_renderer = get_first(
|
||||
(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
|
||||
expected_type=dict, default={})
|
||||
|
||||
comment = self._extract_comment(comment_renderer, parent)
|
||||
# old comment format
|
||||
if not entity_payloads:
|
||||
comment_renderer = get_first(
|
||||
(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
|
||||
expected_type=dict, default={})
|
||||
|
||||
comment = self._extract_comment_old(comment_renderer, parent)
|
||||
|
||||
# new comment format
|
||||
else:
|
||||
view_model = (
|
||||
traverse_obj(comment_thread_renderer, ('commentViewModel', 'commentViewModel', {dict}))
|
||||
or traverse_obj(content, ('commentViewModel', {dict})))
|
||||
comment_keys = traverse_obj(view_model, (('commentKey', 'toolbarStateKey'), {str}))
|
||||
if not comment_keys:
|
||||
continue
|
||||
entities = traverse_obj(entity_payloads, lambda _, v: v['entityKey'] in comment_keys)
|
||||
comment = self._extract_comment(entities, parent)
|
||||
if comment:
|
||||
comment['is_pinned'] = traverse_obj(view_model, ('pinnedText', {str})) is not None
|
||||
|
||||
if not comment:
|
||||
continue
|
||||
comment_id = comment['id']
|
||||
|
||||
if comment.get('is_pinned'):
|
||||
tracker['pinned_comment_ids'].add(comment_id)
|
||||
# Sometimes YouTube may break and give us infinite looping comments.
|
||||
|
@ -3495,7 +3553,7 @@ def extract_thread(contents):
|
|||
check_get_keys = None
|
||||
if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
|
||||
check_get_keys = [[*continuation_items_path, ..., (
|
||||
'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentRenderer'))]]
|
||||
'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentViewModel', 'commentRenderer'))]]
|
||||
try:
|
||||
response = self._extract_response(
|
||||
item_id=None, query=continuation,
|
||||
|
@ -3519,6 +3577,7 @@ def extract_thread(contents):
|
|||
raise
|
||||
is_forced_continuation = False
|
||||
continuation = None
|
||||
mutations = traverse_obj(response, ('frameworkUpdates', 'entityBatchUpdate', 'mutations', ..., {dict}))
|
||||
for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
|
||||
if is_first_continuation:
|
||||
continuation = extract_header(continuation_items)
|
||||
|
@ -3527,7 +3586,7 @@ def extract_thread(contents):
|
|||
break
|
||||
continue
|
||||
|
||||
for entry in extract_thread(continuation_items):
|
||||
for entry in extract_thread(continuation_items, mutations):
|
||||
if not entry:
|
||||
return
|
||||
yield entry
|
||||
|
@ -3604,8 +3663,6 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
|
|||
yt_query = {
|
||||
'videoId': video_id,
|
||||
}
|
||||
if _split_innertube_client(client)[0] in ('android', 'android_embedscreen'):
|
||||
yt_query['params'] = 'CgIIAQ=='
|
||||
|
||||
pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]
|
||||
if pp_arg:
|
||||
|
@ -3621,19 +3678,24 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
|
|||
|
||||
def _get_requested_clients(self, url, smuggled_data):
|
||||
requested_clients = []
|
||||
default = ['ios', 'android', 'web']
|
||||
android_clients = []
|
||||
default = ['ios', 'web']
|
||||
allowed_clients = sorted(
|
||||
(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
|
||||
key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
|
||||
for client in self._configuration_arg('player_client'):
|
||||
if client in allowed_clients:
|
||||
requested_clients.append(client)
|
||||
elif client == 'default':
|
||||
if client == 'default':
|
||||
requested_clients.extend(default)
|
||||
elif client == 'all':
|
||||
requested_clients.extend(allowed_clients)
|
||||
else:
|
||||
elif client not in allowed_clients:
|
||||
self.report_warning(f'Skipping unsupported client {client}')
|
||||
elif client.startswith('android'):
|
||||
android_clients.append(client)
|
||||
else:
|
||||
requested_clients.append(client)
|
||||
# Force deprioritization of broken Android clients for format de-duplication
|
||||
requested_clients.extend(android_clients)
|
||||
if not requested_clients:
|
||||
requested_clients = default
|
||||
|
||||
|
@ -3852,6 +3914,14 @@ def build_fragments(f):
|
|||
f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
|
||||
|
||||
client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
|
||||
# Android client formats are broken due to integrity check enforcement
|
||||
# Ref: https://github.com/yt-dlp/yt-dlp/issues/9554
|
||||
is_broken = client_name and client_name.startswith(short_client_name('android'))
|
||||
if is_broken:
|
||||
self.report_warning(
|
||||
f'{video_id}: Android client formats are broken and may yield HTTP Error 403. '
|
||||
'They will be deprioritized', only_once=True)
|
||||
|
||||
name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
|
||||
fps = int_or_none(fmt.get('fps')) or 0
|
||||
dct = {
|
||||
|
@ -3864,7 +3934,7 @@ def build_fragments(f):
|
|||
name, fmt.get('isDrc') and 'DRC',
|
||||
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
|
||||
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
|
||||
throttled and 'THROTTLED', is_damaged and 'DAMAGED',
|
||||
throttled and 'THROTTLED', is_damaged and 'DAMAGED', is_broken and 'BROKEN',
|
||||
(self.get_param('verbose') or all_formats) and client_name,
|
||||
delim=', '),
|
||||
# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
|
||||
|
@ -3882,8 +3952,8 @@ def build_fragments(f):
|
|||
'language': join_nonempty(audio_track.get('id', '').split('.')[0],
|
||||
'desc' if language_preference < -1 else '') or None,
|
||||
'language_preference': language_preference,
|
||||
# Strictly de-prioritize damaged and 3gp formats
|
||||
'preference': -10 if is_damaged else -2 if itag == '17' else None,
|
||||
# Strictly de-prioritize broken, damaged and 3gp formats
|
||||
'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None,
|
||||
}
|
||||
mime_mobj = re.match(
|
||||
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
|
||||
|
|
|
@ -267,6 +267,10 @@ def _dict_from_options_callback(
|
|||
out_dict[key] = out_dict.get(key, []) + [val] if append else val
|
||||
setattr(parser.values, option.dest, out_dict)
|
||||
|
||||
def _store_multiple_callback(option, opt_str, value, parser, values):
|
||||
for key, value in values.items():
|
||||
setattr(parser.values, key, value)
|
||||
|
||||
def when_prefix(default):
|
||||
return {
|
||||
'default': {},
|
||||
|
@ -1360,7 +1364,13 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
|||
help='Force filenames to be Windows-compatible')
|
||||
filesystem.add_option(
|
||||
'--no-windows-filenames',
|
||||
action='store_false', dest='windowsfilenames',
|
||||
action='callback', dest='keep_bad_win_chars', default=False, callback=_store_multiple_callback,
|
||||
callback_kwargs={
|
||||
'values': {
|
||||
'windowsfilenames': False,
|
||||
'keep_bad_win_chars': True
|
||||
}
|
||||
},
|
||||
help='Make filenames Windows-compatible only if using Windows (default)')
|
||||
filesystem.add_option(
|
||||
'--trim-filenames', '--trim-file-names', metavar='LENGTH',
|
||||
|
|
|
@ -609,11 +609,12 @@ def timeconvert(timestr):
|
|||
return timestamp
|
||||
|
||||
|
||||
def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
|
||||
def sanitize_filename(s, keep_bad_win_chars=False, restricted=False, is_id=NO_DEFAULT):
|
||||
"""Sanitizes a string so it could be used as part of a filename.
|
||||
@param restricted Use a stricter subset of allowed characters
|
||||
@param is_id Whether this is an ID that should be kept unchanged if possible.
|
||||
If unset, yt-dlp's new sanitization rules are in effect
|
||||
@param keep_bad_win_chars Whether to keep characters invalid on Windows
|
||||
@param restricted Use a stricter subset of allowed characters
|
||||
@param is_id Whether this is an ID that should be kept unchanged if possible.
|
||||
If unset, yt-dlp's new sanitization rules are in effect
|
||||
"""
|
||||
if s == '':
|
||||
return ''
|
||||
|
@ -623,16 +624,16 @@ def replace_insane(char):
|
|||
return ACCENT_CHARS[char]
|
||||
elif not restricted and char == '\n':
|
||||
return '\0 '
|
||||
elif is_id is NO_DEFAULT and not restricted and char in '"*:<>?|/\\':
|
||||
elif is_id is NO_DEFAULT and not restricted and char in '"*:<>?|/\\' and not keep_bad_win_chars:
|
||||
# Replace with their full-width unicode counterparts
|
||||
return {'/': '\u29F8', '\\': '\u29f9'}.get(char, chr(ord(char) + 0xfee0))
|
||||
elif char == '?' or ord(char) < 32 or ord(char) == 127:
|
||||
elif (not keep_bad_win_chars and char == '?') or ord(char) < 32 or ord(char) == 127:
|
||||
return ''
|
||||
elif char == '"':
|
||||
elif not keep_bad_win_chars and char == '"':
|
||||
return '' if restricted else '\''
|
||||
elif char == ':':
|
||||
elif not keep_bad_win_chars and char == ':':
|
||||
return '\0_\0-' if restricted else '\0 \0-'
|
||||
elif char in '\\/|*<>':
|
||||
elif (not keep_bad_win_chars and char in '\\|*<>') or char == '/':
|
||||
return '\0_'
|
||||
if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
|
||||
return '' if unicodedata.category(char)[0] in 'CM' else '\0_'
|
||||
|
@ -641,7 +642,8 @@ def replace_insane(char):
|
|||
# Replace look-alike Unicode glyphs
|
||||
if restricted and (is_id is NO_DEFAULT or not is_id):
|
||||
s = unicodedata.normalize('NFKC', s)
|
||||
s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) # Handle timestamps
|
||||
s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0) if keep_bad_win_chars
|
||||
else m.group(0).replace(':', '_'), s) # Handle timestamps
|
||||
result = ''.join(map(replace_insane, s))
|
||||
if is_id is NO_DEFAULT:
|
||||
result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result) # Remove repeated substitute chars
|
||||
|
|
Loading…
Reference in New Issue
Block a user