Update to ytdl-commit-de39d128

[extractor/ceskatelevize] Back-port extractor from yt-dlp
de39d1281c

Closes #5361, Closes #4634, Closes #5210
This commit is contained in:
pukkandan 2022-11-07 01:16:33 +05:30
parent a349d4d641
commit db4678e448
No known key found for this signature in database
GPG Key ID: 7EEE9E1E817D0A39
12 changed files with 385 additions and 201 deletions

View File

@ -11,7 +11,6 @@
import base64 import base64
from yt_dlp.aes import ( from yt_dlp.aes import (
BLOCK_SIZE_BYTES,
aes_cbc_decrypt, aes_cbc_decrypt,
aes_cbc_decrypt_bytes, aes_cbc_decrypt_bytes,
aes_cbc_encrypt, aes_cbc_encrypt,
@ -103,8 +102,7 @@ def test_decrypt_text(self):
def test_ecb_encrypt(self): def test_ecb_encrypt(self):
data = bytes_to_intlist(self.secret_msg) data = bytes_to_intlist(self.secret_msg)
data += [0x08] * (BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES) encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key))
encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key, self.iv))
self.assertEqual( self.assertEqual(
encrypted, encrypted,
b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:')

View File

@ -28,11 +28,23 @@ def aes_cbc_encrypt_bytes(data, key, iv, **kwargs):
return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs)) return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs))
BLOCK_SIZE_BYTES = 16
def unpad_pkcs7(data): def unpad_pkcs7(data):
return data[:-compat_ord(data[-1])] return data[:-compat_ord(data[-1])]
BLOCK_SIZE_BYTES = 16 def pkcs7_padding(data):
"""
PKCS#7 padding
@param {int[]} data cleartext
@returns {int[]} padding data
"""
remaining_length = BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES
return data + [remaining_length] * remaining_length
def pad_block(block, padding_mode): def pad_block(block, padding_mode):
@ -64,7 +76,7 @@ def pad_block(block, padding_mode):
def aes_ecb_encrypt(data, key, iv=None): def aes_ecb_encrypt(data, key, iv=None):
""" """
Encrypt with aes in ECB mode Encrypt with aes in ECB mode. Using PKCS#7 padding
@param {int[]} data cleartext @param {int[]} data cleartext
@param {int[]} key 16/24/32-Byte cipher key @param {int[]} key 16/24/32-Byte cipher key
@ -77,8 +89,7 @@ def aes_ecb_encrypt(data, key, iv=None):
encrypted_data = [] encrypted_data = []
for i in range(block_count): for i in range(block_count):
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
encrypted_data += aes_encrypt(block, expanded_key) encrypted_data += aes_encrypt(pkcs7_padding(block), expanded_key)
encrypted_data = encrypted_data[:len(data)]
return encrypted_data return encrypted_data
@ -551,5 +562,6 @@ def ghash(subkey, data):
'key_expansion', 'key_expansion',
'pad_block', 'pad_block',
'pkcs7_padding',
'unpad_pkcs7', 'unpad_pkcs7',
] ]

View File

@ -48,6 +48,7 @@ def compat_setenv(key, value, env=os.environ):
compat_basestring = str compat_basestring = str
compat_casefold = str.casefold
compat_chr = chr compat_chr = chr
compat_collections_abc = collections.abc compat_collections_abc = collections.abc
compat_cookiejar = http.cookiejar compat_cookiejar = http.cookiejar

View File

@ -28,30 +28,34 @@
class ADNIE(InfoExtractor): class ADNIE(InfoExtractor):
IE_DESC = 'Anime Digital Network' IE_DESC = 'Animation Digital Network'
_VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
_TEST = { _TESTS = [{
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites', 'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
'md5': '0319c99885ff5547565cacb4f3f9348d', 'md5': '1c9ef066ceb302c86f80c2b371615261',
'info_dict': { 'info_dict': {
'id': '7778', 'id': '9841',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Blue Exorcist - Kyôto Saga - Episode 1', 'title': 'Fruits Basket - Episode 1',
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5', 'description': 'md5:14be2f72c3c96809b0ca424b0097d336',
'series': 'Blue Exorcist - Kyôto Saga', 'series': 'Fruits Basket',
'duration': 1467, 'duration': 1437,
'release_date': '20170106', 'release_date': '20190405',
'comment_count': int, 'comment_count': int,
'average_rating': float, 'average_rating': float,
'season_number': 2, 'season_number': 1,
'episode': 'Début des hostilités', 'episode': 'À ce soir !',
'episode_number': 1, 'episode_number': 1,
} },
} 'skip': 'Only available in region (FR, ...)',
}, {
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
'only_matching': True,
}]
_NETRC_MACHINE = 'animedigitalnetwork' _NETRC_MACHINE = 'animationdigitalnetwork'
_BASE_URL = 'http://animedigitalnetwork.fr' _BASE = 'animationdigitalnetwork.fr'
_API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/' _API_BASE_URL = 'https://gw.api.' + _BASE + '/'
_PLAYER_BASE_URL = _API_BASE_URL + 'player/' _PLAYER_BASE_URL = _API_BASE_URL + 'player/'
_HEADERS = {} _HEADERS = {}
_LOGIN_ERR_MESSAGE = 'Unable to log in' _LOGIN_ERR_MESSAGE = 'Unable to log in'
@ -75,11 +79,11 @@ def _get_subtitles(self, sub_url, video_id):
if subtitle_location: if subtitle_location:
enc_subtitles = self._download_webpage( enc_subtitles = self._download_webpage(
subtitle_location, video_id, 'Downloading subtitles data', subtitle_location, video_id, 'Downloading subtitles data',
fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'}) fatal=False, headers={'Origin': 'https://' + self._BASE})
if not enc_subtitles: if not enc_subtitles:
return None return None
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js # http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes( dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes(
compat_b64decode(enc_subtitles[24:]), compat_b64decode(enc_subtitles[24:]),
binascii.unhexlify(self._K + '7fac1178830cfe0c'), binascii.unhexlify(self._K + '7fac1178830cfe0c'),

View File

@ -9,6 +9,7 @@
ExtractorError, ExtractorError,
float_or_none, float_or_none,
sanitized_Request, sanitized_Request,
str_or_none,
traverse_obj, traverse_obj,
urlencode_postdata, urlencode_postdata,
USER_AGENTS, USER_AGENTS,
@ -16,13 +17,13 @@
class CeskaTelevizeIE(InfoExtractor): class CeskaTelevizeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', 'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en',
'info_dict': { 'info_dict': {
'id': '61924494877028507', 'id': '61924494877028507',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Hyde Park Civilizace: Bonus 01 - En', 'title': 'Bonus 01 - En - Hyde Park Civilizace',
'description': 'English Subtittles', 'description': 'English Subtittles',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'duration': 81.3, 'duration': 81.3,
@ -33,18 +34,29 @@ class CeskaTelevizeIE(InfoExtractor):
}, },
}, { }, {
# live stream # live stream
'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', 'url': 'http://www.ceskatelevize.cz/zive/ct1/',
'info_dict': { 'info_dict': {
'id': 402, 'id': '102',
'ext': 'mp4', 'ext': 'mp4',
'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'title': r'ČT1 - živé vysílání online',
'description': 'Sledujte živé vysílání kanálu ČT1 online. Vybírat si můžete i z dalších kanálů České televize na kterémkoli z vašich zařízení.',
'is_live': True, 'is_live': True,
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Georestricted to Czech Republic', }, {
# another
'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
'only_matching': True,
'info_dict': {
'id': 402,
'ext': 'mp4',
'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'is_live': True,
},
# 'skip': 'Georestricted to Czech Republic',
}, { }, {
'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25', 'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
'only_matching': True, 'only_matching': True,
@ -53,21 +65,21 @@ class CeskaTelevizeIE(InfoExtractor):
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
'info_dict': { 'info_dict': {
'id': '215562210900007-bogotart', 'id': '215562210900007-bogotart',
'title': 'Queer: Bogotart', 'title': 'Bogotart - Queer',
'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti. Připravil Peter Serge Butko', 'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti',
}, },
'playlist': [{ 'playlist': [{
'info_dict': { 'info_dict': {
'id': '61924494877311053', 'id': '61924494877311053',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Queer: Bogotart (Varování 18+)', 'title': 'Bogotart - Queer (Varování 18+)',
'duration': 11.9, 'duration': 11.9,
}, },
}, { }, {
'info_dict': { 'info_dict': {
'id': '61924494877068022', 'id': '61924494877068022',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Queer: Bogotart (Queer)', 'title': 'Bogotart - Queer (Queer)',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'duration': 1558.3, 'duration': 1558.3,
}, },
@ -84,28 +96,42 @@ class CeskaTelevizeIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
playlist_id = self._match_id(url) playlist_id = self._match_id(url)
parsed_url = compat_urllib_parse_urlparse(url) webpage, urlh = self._download_webpage_handle(url, playlist_id)
webpage = self._download_webpage(url, playlist_id) parsed_url = compat_urllib_parse_urlparse(urlh.geturl())
site_name = self._og_search_property('site_name', webpage, fatal=False, default=None) site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
playlist_title = self._og_search_title(webpage, default=None) playlist_title = self._og_search_title(webpage, default=None)
if site_name and playlist_title: if site_name and playlist_title:
playlist_title = playlist_title.replace(f'{site_name}', '', 1) playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0]
playlist_description = self._og_search_description(webpage, default=None) playlist_description = self._og_search_description(webpage, default=None)
if playlist_description: if playlist_description:
playlist_description = playlist_description.replace('\xa0', ' ') playlist_description = playlist_description.replace('\xa0', ' ')
if parsed_url.path.startswith('/porady/'): type_ = 'IDEC'
if re.search(r'(^/porady|/zive)/', parsed_url.path):
next_data = self._search_nextjs_data(webpage, playlist_id) next_data = self._search_nextjs_data(webpage, playlist_id)
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False) if '/zive/' in parsed_url.path:
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'idec'), get_all=False)
else:
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False)
if not idec:
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'videobonusDetail', 'bonusId'), get_all=False)
if idec:
type_ = 'bonus'
if not idec: if not idec:
raise ExtractorError('Failed to find IDEC id') raise ExtractorError('Failed to find IDEC id')
iframe_hash = self._download_webpage('https://www.ceskatelevize.cz/v-api/iframe-hash/', playlist_id) iframe_hash = self._download_webpage(
webpage = self._download_webpage('https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', playlist_id, 'https://www.ceskatelevize.cz/v-api/iframe-hash/',
query={'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', 'IDEC': idec}) playlist_id, note='Getting IFRAME hash')
query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec, }
webpage = self._download_webpage(
'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php',
playlist_id, note='Downloading player', query=query)
NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
if '%s</p>' % NOT_AVAILABLE_STRING in webpage: if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) self.raise_geo_restricted(NOT_AVAILABLE_STRING)
if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen', )):
raise ExtractorError('no video with IDEC available', video_id=idec, expected=True)
type_ = None type_ = None
episode_id = None episode_id = None
@ -174,7 +200,6 @@ def _real_extract(self, url):
is_live = item.get('type') == 'LIVE' is_live = item.get('type') == 'LIVE'
formats = [] formats = []
for format_id, stream_url in item.get('streamUrls', {}).items(): for format_id, stream_url in item.get('streamUrls', {}).items():
stream_url = stream_url.replace('https://', 'http://')
if 'playerType=flash' in stream_url: if 'playerType=flash' in stream_url:
stream_formats = self._extract_m3u8_formats( stream_formats = self._extract_m3u8_formats(
stream_url, playlist_id, 'mp4', 'm3u8_native', stream_url, playlist_id, 'mp4', 'm3u8_native',
@ -196,7 +221,7 @@ def _real_extract(self, url):
entries[num]['formats'].extend(formats) entries[num]['formats'].extend(formats)
continue continue
item_id = item.get('id') or item['assetId'] item_id = str_or_none(item.get('id') or item['assetId'])
title = item['title'] title = item['title']
duration = float_or_none(item.get('duration')) duration = float_or_none(item.get('duration'))
@ -227,6 +252,8 @@ def _real_extract(self, url):
for e in entries: for e in entries:
self._sort_formats(e['formats']) self._sort_formats(e['formats'])
if len(entries) == 1:
return entries[0]
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
def _get_subtitles(self, episode_id, subs): def _get_subtitles(self, episode_id, subs):

View File

@ -1,8 +1,12 @@
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
extract_attributes,
int_or_none, int_or_none,
str_to_int, str_to_int,
url_or_none,
urlencode_postdata, urlencode_postdata,
) )
@ -17,17 +21,20 @@ class ManyVidsIE(InfoExtractor):
'id': '133957', 'id': '133957',
'ext': 'mp4', 'ext': 'mp4',
'title': 'everthing about me (Preview)', 'title': 'everthing about me (Preview)',
'uploader': 'ellyxxix',
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
}, },
}, { }, {
# full video # full video
'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/', 'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
'md5': 'f3e8f7086409e9b470e2643edb96bdcc', 'md5': 'bb47bab0e0802c2a60c24ef079dfe60f',
'info_dict': { 'info_dict': {
'id': '935718', 'id': '935718',
'ext': 'mp4', 'ext': 'mp4',
'title': 'MY FACE REVEAL', 'title': 'MY FACE REVEAL',
'description': 'md5:ec5901d41808b3746fed90face161612',
'uploader': 'Sarah Calanthe',
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
}, },
@ -36,17 +43,50 @@ class ManyVidsIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) real_url = 'https://www.manyvids.com/video/%s/gtm.js' % (video_id, )
try:
webpage = self._download_webpage(real_url, video_id)
except Exception:
# probably useless fallback
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex( info = self._search_regex(
r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1', r'''(<div\b[^>]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''',
webpage, 'video URL', group='url') webpage, 'meta details', default='')
info = extract_attributes(info)
title = self._html_search_regex( player = self._search_regex(
(r'<span[^>]+class=["\']item-title[^>]+>([^<]+)', r'''(<div\b[^>]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''',
r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'), webpage, 'player details', default='')
webpage, 'title', default=None) or self._html_search_meta( player = extract_attributes(player)
'twitter:title', webpage, 'title', fatal=True)
video_urls_and_ids = (
(info.get('data-meta-video'), 'video'),
(player.get('data-video-transcoded'), 'transcoded'),
(player.get('data-video-filepath'), 'filepath'),
(self._og_search_video_url(webpage, secure=False, default=None), 'og_video'),
)
def txt_or_none(s, default=None):
return (s.strip() or default) if isinstance(s, compat_str) else default
uploader = txt_or_none(info.get('data-meta-author'))
def mung_title(s):
if uploader:
s = re.sub(r'^\s*%s\s+[|-]' % (re.escape(uploader), ), '', s)
return txt_or_none(s)
title = (
mung_title(info.get('data-meta-title'))
or self._html_search_regex(
(r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
webpage, 'title', default=None)
or self._html_search_meta(
'twitter:title', webpage, 'title', fatal=True))
title = re.sub(r'\s*[|-]\s+ManyVids\s*$', '', title) or title
if any(p in webpage for p in ('preview_videos', '_preview.mp4')): if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
title += ' (Preview)' title += ' (Preview)'
@ -59,7 +99,8 @@ def _real_extract(self, url):
# Sets some cookies # Sets some cookies
self._download_webpage( self._download_webpage(
'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php', 'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
video_id, fatal=False, data=urlencode_postdata({ video_id, note='Setting format cookies', fatal=False,
data=urlencode_postdata({
'mvtoken': mv_token, 'mvtoken': mv_token,
'vid': video_id, 'vid': video_id,
}), headers={ }), headers={
@ -67,24 +108,56 @@ def _real_extract(self, url):
'X-Requested-With': 'XMLHttpRequest' 'X-Requested-With': 'XMLHttpRequest'
}) })
if determine_ext(video_url) == 'm3u8': formats = []
formats = self._extract_m3u8_formats( for v_url, fmt in video_urls_and_ids:
video_url, video_id, 'mp4', entry_protocol='m3u8_native', v_url = url_or_none(v_url)
m3u8_id='hls') if not v_url:
else: continue
formats = [{'url': video_url}] if determine_ext(v_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
v_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls'))
else:
formats.append({
'url': v_url,
'format_id': fmt,
})
like_count = int_or_none(self._search_regex( self._remove_duplicate_formats(formats)
r'data-likes=["\'](\d+)', webpage, 'like count', default=None))
view_count = str_to_int(self._html_search_regex( for f in formats:
r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage, if f.get('height') is None:
'view count', default=None)) f['height'] = int_or_none(
self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None))
if '/preview/' in f['url']:
f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview')))
f['preference'] = -10
if 'transcoded' in f['format_id']:
f['preference'] = f.get('preference', -1) - 1
self._sort_formats(formats)
def get_likes():
likes = self._search_regex(
r'''(<a\b[^>]*\bdata-id\s*=\s*(['"])%s\2[^>]*>)''' % (video_id, ),
webpage, 'likes', default='')
likes = extract_attributes(likes)
return int_or_none(likes.get('data-likes'))
def get_views():
return str_to_int(self._html_search_regex(
r'''(?s)<span\b[^>]*\bclass\s*=["']views-wrapper\b[^>]+>.+?<span\b[^>]+>\s*(\d[\d,.]*)\s*</span>''',
webpage, 'view count', default=None))
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'view_count': view_count,
'like_count': like_count,
'formats': formats, 'formats': formats,
'uploader': self._html_search_regex(r'<meta[^>]+name="author"[^>]*>([^<]+)', webpage, 'uploader'), 'description': txt_or_none(info.get('data-meta-description')),
'uploader': txt_or_none(info.get('data-meta-author')),
'thumbnail': (
url_or_none(info.get('data-meta-image'))
or url_or_none(player.get('data-video-screenshot'))),
'view_count': get_views(),
'like_count': get_likes(),
} }

View File

@ -69,7 +69,7 @@ class MotherlessIE(InfoExtractor):
'title': 'a/ Hot Teens', 'title': 'a/ Hot Teens',
'categories': list, 'categories': list,
'upload_date': '20210104', 'upload_date': '20210104',
'uploader_id': 'yonbiw', 'uploader_id': 'anonymous',
'thumbnail': r're:https?://.*\.jpg', 'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18, 'age_limit': 18,
}, },
@ -123,11 +123,12 @@ def _real_extract(self, url):
kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta} kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d') upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
comment_count = webpage.count('class="media-comment-contents"') comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
uploader_id = self._html_search_regex( uploader_id = self._html_search_regex(
(r'"media-meta-member">\s+<a href="/m/([^"]+)"', (r'''<span\b[^>]+\bclass\s*=\s*["']username\b[^>]*>([^<]+)</span>''',
r'<span\b[^>]+\bclass="username">([^<]+)</span>'), r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)'''),
webpage, 'uploader_id', fatal=False) webpage, 'uploader_id', fatal=False)
categories = self._html_search_meta('keywords', webpage, default=None) categories = self._html_search_meta('keywords', webpage, default=None)
if categories: if categories:
categories = [cat.strip() for cat in categories.split(',')] categories = [cat.strip() for cat in categories.split(',')]
@ -217,23 +218,23 @@ def _real_extract(self, url):
r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False) r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
description = self._html_search_meta( description = self._html_search_meta(
'description', webpage, fatal=False) 'description', webpage, fatal=False)
page_count = self._int(self._search_regex( page_count = str_to_int(self._search_regex(
r'(\d+)</(?:a|span)><(?:a|span)[^>]+rel="next">', r'(\d+)\s*</(?:a|span)>\s*<(?:a|span)[^>]+(?:>\s*NEXT|\brel\s*=\s*["\']?next)\b',
webpage, 'page_count', default=0), 'page_count') webpage, 'page_count', default=0))
if not page_count: if not page_count:
message = self._search_regex( message = self._search_regex(
r'class="error-page"[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*', r'''class\s*=\s*['"]error-page\b[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*''',
webpage, 'error_msg', default=None) or 'This group has no videos.' webpage, 'error_msg', default=None) or 'This group has no videos.'
self.report_warning(message, group_id) self.report_warning(message, group_id)
page_count = 1
PAGE_SIZE = 80 PAGE_SIZE = 80
def _get_page(idx): def _get_page(idx):
if not page_count: if idx > 0:
return webpage = self._download_webpage(
webpage = self._download_webpage( page_url, group_id, query={'page': idx + 1},
page_url, group_id, query={'page': idx + 1}, note='Downloading page %d/%d' % (idx + 1, page_count)
note='Downloading page %d/%d' % (idx + 1, page_count) )
)
for entry in self._extract_entries(webpage, url): for entry in self._extract_entries(webpage, url):
yield entry yield entry

View File

@ -1,12 +1,25 @@
import itertools import json
import re import re
import time
from base64 import b64encode from base64 import b64encode
from binascii import hexlify
from datetime import datetime from datetime import datetime
from hashlib import md5 from hashlib import md5
from random import randint
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str, compat_urllib_parse_urlencode from ..aes import aes_ecb_encrypt, pkcs7_padding
from ..utils import float_or_none, sanitized_Request from ..compat import compat_urllib_parse_urlencode
from ..utils import (
ExtractorError,
bytes_to_intlist,
error_to_compat_str,
float_or_none,
int_or_none,
intlist_to_bytes,
sanitized_Request,
try_get,
)
class NetEaseMusicBaseIE(InfoExtractor): class NetEaseMusicBaseIE(InfoExtractor):
@ -17,7 +30,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
@classmethod @classmethod
def _encrypt(cls, dfsid): def _encrypt(cls, dfsid):
salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8')) salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8'))
string_bytes = bytearray(compat_str(dfsid).encode('ascii')) string_bytes = bytearray(str(dfsid).encode('ascii'))
salt_len = len(salt_bytes) salt_len = len(salt_bytes)
for i in range(len(string_bytes)): for i in range(len(string_bytes)):
string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len] string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len]
@ -26,32 +39,106 @@ def _encrypt(cls, dfsid):
result = b64encode(m.digest()).decode('ascii') result = b64encode(m.digest()).decode('ascii')
return result.replace('/', '_').replace('+', '-') return result.replace('/', '_').replace('+', '-')
@classmethod
def make_player_api_request_data_and_headers(cls, song_id, bitrate):
KEY = b'e82ckenh8dichen8'
URL = '/api/song/enhance/player/url'
now = int(time.time() * 1000)
rand = randint(0, 1000)
cookie = {
'osver': None,
'deviceId': None,
'appver': '8.0.0',
'versioncode': '140',
'mobilename': None,
'buildver': '1623435496',
'resolution': '1920x1080',
'__csrf': '',
'os': 'pc',
'channel': None,
'requestId': '{0}_{1:04}'.format(now, rand),
}
request_text = json.dumps(
{'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie},
separators=(',', ':'))
message = 'nobody{0}use{1}md5forencrypt'.format(
URL, request_text).encode('latin1')
msg_digest = md5(message).hexdigest()
data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format(
URL, request_text, msg_digest)
data = pkcs7_padding(bytes_to_intlist(data))
encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY)))
encrypted_params = hexlify(encrypted).decode('ascii').upper()
cookie = '; '.join(
['{0}={1}'.format(k, v if v is not None else 'undefined')
for [k, v] in cookie.items()])
headers = {
'User-Agent': self.extractor.get_param('http_headers')['User-Agent'],
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': 'https://music.163.com',
'Cookie': cookie,
}
return ('params={0}'.format(encrypted_params), headers)
def _call_player_api(self, song_id, bitrate):
url = 'https://interface3.music.163.com/eapi/song/enhance/player/url'
data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate)
try:
msg = 'empty result'
result = self._download_json(
url, song_id, data=data.encode('ascii'), headers=headers)
if result:
return result
except ExtractorError as e:
if type(e.cause) in (ValueError, TypeError):
# JSON load failure
raise
except Exception as e:
msg = error_to_compat_str(e)
self.report_warning('%s API call (%s) failed: %s' % (
song_id, bitrate, msg))
return {}
def extract_formats(self, info): def extract_formats(self, info):
err = 0
formats = [] formats = []
song_id = info['id']
for song_format in self._FORMATS: for song_format in self._FORMATS:
details = info.get(song_format) details = info.get(song_format)
if not details: if not details:
continue continue
song_file_path = '/%s/%s.%s' % (
self._encrypt(details['dfsId']), details['dfsId'], details['extension'])
# 203.130.59.9, 124.40.233.182, 115.231.74.139, etc is a reverse proxy-like feature bitrate = int_or_none(details.get('bitrate')) or 999000
# from NetEase's CDN provider that can be used if m5.music.126.net does not data = self._call_player_api(song_id, bitrate)
# work, especially for users outside of Mainland China for song in try_get(data, lambda x: x['data'], list) or []:
# via: https://github.com/JixunMoe/unblock-163/issues/3#issuecomment-163115880 song_url = try_get(song, lambda x: x['url'])
for host in ('http://m5.music.126.net', 'http://115.231.74.139/m1.music.126.net', if not song_url:
'http://124.40.233.182/m1.music.126.net', 'http://203.130.59.9/m1.music.126.net'): continue
song_url = host + song_file_path
if self._is_valid_url(song_url, info['id'], 'song'): if self._is_valid_url(song_url, info['id'], 'song'):
formats.append({ formats.append({
'url': song_url, 'url': song_url,
'ext': details.get('extension'), 'ext': details.get('extension'),
'abr': float_or_none(details.get('bitrate'), scale=1000), 'abr': float_or_none(song.get('br'), scale=1000),
'format_id': song_format, 'format_id': song_format,
'filesize': details.get('size'), 'filesize': int_or_none(song.get('size')),
'asr': details.get('sr') 'asr': int_or_none(details.get('sr')),
}) })
break elif err == 0:
err = try_get(song, lambda x: x['code'], int)
if not formats:
msg = 'No media links found'
if err != 0 and (err < 200 or err >= 400):
raise ExtractorError(
'%s (site code %d)' % (msg, err, ), expected=True)
else:
self.raise_geo_restricted(
msg + ': probably this video is not available from your location due to geo restriction.',
countries=['CN'])
return formats return formats
@classmethod @classmethod
@ -67,33 +154,19 @@ def query_api(self, endpoint, video_id, note):
class NetEaseMusicIE(NetEaseMusicBaseIE): class NetEaseMusicIE(NetEaseMusicBaseIE):
IE_NAME = 'netease:song' IE_NAME = 'netease:song'
IE_DESC = '网易云音乐' IE_DESC = '网易云音乐'
_VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)' _VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://music.163.com/#/song?id=32102397', 'url': 'http://music.163.com/#/song?id=32102397',
'md5': 'f2e97280e6345c74ba9d5677dd5dcb45', 'md5': '3e909614ce09b1ccef4a3eb205441190',
'info_dict': { 'info_dict': {
'id': '32102397', 'id': '32102397',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Bad Blood (feat. Kendrick Lamar)', 'title': 'Bad Blood',
'creator': 'Taylor Swift / Kendrick Lamar', 'creator': 'Taylor Swift / Kendrick Lamar',
'upload_date': '20150517', 'upload_date': '20150516',
'timestamp': 1431878400, 'timestamp': 1431792000,
'description': 'md5:a10a54589c2860300d02e1de821eb2ef', 'description': 'md5:25fc5f27e47aad975aa6d36382c7833c',
}, },
'skip': 'Blocked outside Mainland China',
}, {
'note': 'No lyrics translation.',
'url': 'http://music.163.com/#/song?id=29822014',
'info_dict': {
'id': '29822014',
'ext': 'mp3',
'title': '听见下雨的声音',
'creator': '周杰伦',
'upload_date': '20141225',
'timestamp': 1419523200,
'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c',
},
'skip': 'Blocked outside Mainland China',
}, { }, {
'note': 'No lyrics.', 'note': 'No lyrics.',
'url': 'http://music.163.com/song?id=17241424', 'url': 'http://music.163.com/song?id=17241424',
@ -103,9 +176,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'title': 'Opus 28', 'title': 'Opus 28',
'creator': 'Dustin O\'Halloran', 'creator': 'Dustin O\'Halloran',
'upload_date': '20080211', 'upload_date': '20080211',
'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4',
'timestamp': 1202745600, 'timestamp': 1202745600,
}, },
'skip': 'Blocked outside Mainland China',
}, { }, {
'note': 'Has translated name.', 'note': 'Has translated name.',
'url': 'http://music.163.com/#/song?id=22735043', 'url': 'http://music.163.com/#/song?id=22735043',
@ -119,7 +192,18 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'timestamp': 1264608000, 'timestamp': 1264608000,
'alt_title': '说出愿望吧(Genie)', 'alt_title': '说出愿望吧(Genie)',
}, },
'skip': 'Blocked outside Mainland China', }, {
'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
'md5': '95826c73ea50b1c288b22180ec9e754d',
'info_dict': {
'id': '95670',
'ext': 'mp3',
'title': '国际歌',
'creator': '马备',
'upload_date': '19911130',
'timestamp': 691516800,
'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
},
}] }]
def _process_lyrics(self, lyrics_info): def _process_lyrics(self, lyrics_info):

View File

@ -58,8 +58,7 @@ def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None
return self._download_json( return self._download_json(
urljoin('https://psapi.nrk.no/', path), urljoin('https://psapi.nrk.no/', path),
video_id, note or 'Downloading %s JSON' % item, video_id, note or 'Downloading %s JSON' % item,
fatal=fatal, query=query, fatal=fatal, query=query)
headers={'Accept-Encoding': 'gzip, deflate, br'})
class NRKIE(NRKBaseIE): class NRKIE(NRKBaseIE):

View File

@ -870,7 +870,7 @@ def _real_extract(self, url):
if '://player.vimeo.com/video/' in url: if '://player.vimeo.com/video/' in url:
config = self._parse_json(self._search_regex( config = self._parse_json(self._search_regex(
r'\bconfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id) r'\b(?:playerC|c)onfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
if config.get('view') == 4: if config.get('view') == 4:
config = self._verify_player_video_password( config = self._verify_player_video_password(
redirect_url, video_id, headers) redirect_url, video_id, headers)

View File

@ -3,13 +3,14 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_str
from ..utils import ( from ..utils import (
NO_DEFAULT,
ExtractorError,
determine_ext, determine_ext,
extract_attributes,
float_or_none, float_or_none,
int_or_none, int_or_none,
join_nonempty, join_nonempty,
merge_dicts, merge_dicts,
NO_DEFAULT,
orderedSet,
parse_codecs, parse_codecs,
qualities, qualities,
traverse_obj, traverse_obj,
@ -188,7 +189,7 @@ class ZDFIE(ZDFBaseIE):
}, },
}, { }, {
'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html', 'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html',
'md5': '57af4423db0455a3975d2dc4578536bc', 'md5': '1b93bdec7d02fc0b703c5e7687461628',
'info_dict': { 'info_dict': {
'ext': 'mp4', 'ext': 'mp4',
'id': 'video_funk_1770473', 'id': 'video_funk_1770473',
@ -250,17 +251,15 @@ def _extract_entry(self, url, player, content, video_id):
title = content.get('title') or content['teaserHeadline'] title = content.get('title') or content['teaserHeadline']
t = content['mainVideoContent']['http://zdf.de/rels/target'] t = content['mainVideoContent']['http://zdf.de/rels/target']
ptmd_path = traverse_obj(t, (
ptmd_path = t.get('http://zdf.de/rels/streams/ptmd') (('streams', 'default'), None),
('http://zdf.de/rels/streams/ptmd', 'http://zdf.de/rels/streams/ptmd-template')
), get_all=False)
if not ptmd_path: if not ptmd_path:
ptmd_path = traverse_obj( raise ExtractorError('Could not extract ptmd_path')
t, ('streams', 'default', 'http://zdf.de/rels/streams/ptmd-template'),
'http://zdf.de/rels/streams/ptmd-template').replace(
'{playerId}', 'ngplayer_2_4')
info = self._extract_ptmd( info = self._extract_ptmd(
urljoin(url, ptmd_path), video_id, player['apiToken'], url) urljoin(url, ptmd_path.replace('{playerId}', 'ngplayer_2_4')), video_id, player['apiToken'], url)
thumbnails = [] thumbnails = []
layouts = try_get( layouts = try_get(
@ -309,15 +308,16 @@ def _extract_mobile(self, video_id):
'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id, 'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id,
video_id) video_id)
document = video['document']
title = document['titel']
content_id = document['basename']
formats = [] formats = []
format_urls = set() formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list)
for f in document['formitaeten']: document = formitaeten and video['document']
self._extract_format(content_id, formats, format_urls, f) if formitaeten:
title = document['titel']
content_id = document['basename']
format_urls = set()
for f in formitaeten or []:
self._extract_format(content_id, formats, format_urls, f)
self._sort_formats(formats) self._sort_formats(formats)
thumbnails = [] thumbnails = []
@ -364,9 +364,9 @@ class ZDFChannelIE(ZDFBaseIE):
'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio', 'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio',
'info_dict': { 'info_dict': {
'id': 'das-aktuelle-sportstudio', 'id': 'das-aktuelle-sportstudio',
'title': 'das aktuelle sportstudio | ZDF', 'title': 'das aktuelle sportstudio',
}, },
'playlist_mincount': 23, 'playlist_mincount': 18,
}, { }, {
'url': 'https://www.zdf.de/dokumentation/planet-e', 'url': 'https://www.zdf.de/dokumentation/planet-e',
'info_dict': { 'info_dict': {
@ -374,6 +374,14 @@ class ZDFChannelIE(ZDFBaseIE):
'title': 'planet e.', 'title': 'planet e.',
}, },
'playlist_mincount': 50, 'playlist_mincount': 50,
}, {
'url': 'https://www.zdf.de/gesellschaft/aktenzeichen-xy-ungeloest',
'info_dict': {
'id': 'aktenzeichen-xy-ungeloest',
'title': 'Aktenzeichen XY... ungelöst',
'entries': "lambda x: not any('xy580-fall1-kindermoerder-gesucht-100' in e['url'] for e in x)",
},
'playlist_mincount': 2,
}, { }, {
'url': 'https://www.zdf.de/filme/taunuskrimi/', 'url': 'https://www.zdf.de/filme/taunuskrimi/',
'only_matching': True, 'only_matching': True,
@ -383,60 +391,36 @@ class ZDFChannelIE(ZDFBaseIE):
def suitable(cls, url): def suitable(cls, url):
return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url) return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url)
def _og_search_title(self, webpage, fatal=False):
title = super(ZDFChannelIE, self)._og_search_title(webpage, fatal=fatal)
return re.split(r'\s+[-|]\s+ZDF(?:mediathek)?$', title or '')[0] or None
def _real_extract(self, url): def _real_extract(self, url):
channel_id = self._match_id(url) channel_id = self._match_id(url)
webpage = self._download_webpage(url, channel_id) webpage = self._download_webpage(url, channel_id)
entries = [ matches = re.finditer(
self.url_result(item_url, ie=ZDFIE.ie_key()) r'''<div\b[^>]*?\sdata-plusbar-id\s*=\s*(["'])(?P<p_id>[\w-]+)\1[^>]*?\sdata-plusbar-url=\1(?P<url>%s)\1''' % ZDFIE._VALID_URL,
for item_url in orderedSet(re.findall( webpage)
r'data-plusbar-url=["\'](http.+?\.html)', webpage))]
return self.playlist_result( if self._downloader.params.get('noplaylist', False):
entries, channel_id, self._og_search_title(webpage, fatal=False)) entry = next(
(self.url_result(m.group('url'), ie=ZDFIE.ie_key()) for m in matches),
None)
self.to_screen('Downloading just the main video because of --no-playlist')
if entry:
return entry
else:
self.to_screen('Downloading playlist %s - add --no-playlist to download just the main video' % (channel_id, ))
r""" def check_video(m):
player = self._extract_player(webpage, channel_id) v_ref = self._search_regex(
r'''(<a\b[^>]*?\shref\s*=[^>]+?\sdata-target-id\s*=\s*(["'])%s\2[^>]*>)''' % (m.group('p_id'), ),
webpage, 'check id', default='')
v_ref = extract_attributes(v_ref)
return v_ref.get('data-target-video-type') != 'novideo'
channel_id = self._search_regex( return self.playlist_from_matches(
r'docId\s*:\s*(["\'])(?P<id>(?!\1).+?)\1', webpage, (m.group('url') for m in matches if check_video(m)),
'channel id', group='id') channel_id, self._og_search_title(webpage, fatal=False))
channel = self._call_api(
'https://api.zdf.de/content/documents/%s.json' % channel_id,
player, url, channel_id)
items = []
for module in channel['module']:
for teaser in try_get(module, lambda x: x['teaser'], list) or []:
t = try_get(
teaser, lambda x: x['http://zdf.de/rels/target'], dict)
if not t:
continue
items.extend(try_get(
t,
lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'],
list) or [])
items.extend(try_get(
module,
lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'],
list) or [])
entries = []
entry_urls = set()
for item in items:
t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict)
if not t:
continue
sharing_url = t.get('http://zdf.de/rels/sharing-url')
if not sharing_url or not isinstance(sharing_url, compat_str):
continue
if sharing_url in entry_urls:
continue
entry_urls.add(sharing_url)
entries.append(self.url_result(
sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id')))
return self.playlist_result(entries, channel_id, channel.get('title'))
"""

View File

@ -685,7 +685,8 @@ def replace_insane(char):
return '\0_' return '\0_'
return char return char
if restricted and is_id is NO_DEFAULT: # Replace look-alike Unicode glyphs
if restricted and (is_id is NO_DEFAULT or not is_id):
s = unicodedata.normalize('NFKC', s) s = unicodedata.normalize('NFKC', s)
s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) # Handle timestamps s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) # Handle timestamps
result = ''.join(map(replace_insane, s)) result = ''.join(map(replace_insane, s))