2013-06-23 19:58:33 +02:00
# coding: utf-8
2014-09-13 07:51:06 +02:00
from __future__ import unicode_literals
2021-07-12 01:18:40 +02:00
import base64
2021-04-07 13:37:43 +02:00
import calendar
2021-06-30 00:07:49 +02:00
import copy
2021-07-19 06:55:07 +02:00
import datetime
2021-03-03 11:32:40 +01:00
import hashlib
2013-09-22 10:37:23 +02:00
import itertools
2013-06-23 19:58:33 +02:00
import json
2021-11-16 20:56:23 +01:00
import math
2013-09-22 00:35:03 +02:00
import os . path
2016-02-29 20:01:33 +01:00
import random
2013-06-23 19:58:33 +02:00
import re
2021-02-04 15:57:26 +01:00
import time
2013-09-21 14:19:30 +02:00
import traceback
2013-06-23 19:58:33 +02:00
2013-06-23 20:28:15 +02:00
from . common import InfoExtractor , SearchInfoExtractor
2014-12-11 10:08:17 +01:00
from . . compat import (
2013-09-22 10:30:02 +02:00
compat_chr ,
2021-01-01 13:26:37 +01:00
compat_HTTPError ,
2013-06-23 19:58:33 +02:00
compat_parse_qs ,
2021-02-04 15:37:17 +01:00
compat_str ,
2015-07-17 19:51:57 +02:00
compat_urllib_parse_unquote_plus ,
2016-03-25 20:46:57 +01:00
compat_urllib_parse_urlencode ,
2015-07-20 21:10:28 +02:00
compat_urllib_parse_urlparse ,
2013-10-01 17:58:13 +02:00
compat_urlparse ,
2014-12-11 10:08:17 +01:00
)
2021-02-04 15:37:17 +01:00
from . . jsinterp import JSInterpreter
2014-12-11 10:08:17 +01:00
from . . utils import (
2021-11-16 20:56:23 +01:00
bug_reports_message ,
2021-07-12 01:18:40 +02:00
bytes_to_intlist ,
2013-06-23 19:58:33 +02:00
clean_html ,
2021-04-07 13:37:43 +02:00
datetime_from_str ,
2021-07-21 05:52:34 +02:00
dict_get ,
2021-04-17 00:39:08 +02:00
error_to_compat_str ,
2013-06-23 19:58:33 +02:00
ExtractorError ,
2015-02-11 18:39:31 +01:00
float_or_none ,
2021-07-21 05:52:34 +02:00
format_field ,
2014-01-19 05:47:20 +01:00
int_or_none ,
2021-07-12 01:18:40 +02:00
intlist_to_bytes ,
2021-08-13 08:18:26 +02:00
is_html ,
2021-11-06 02:05:24 +01:00
join_nonempty ,
2016-01-24 18:02:19 +01:00
mimetype2ext ,
2021-07-21 19:57:23 +02:00
network_exceptions ,
2021-11-24 04:01:52 +01:00
NO_DEFAULT ,
2021-07-21 05:52:34 +02:00
orderedSet ,
2017-02-12 12:09:53 +01:00
parse_codecs ,
2021-07-15 01:24:42 +02:00
parse_count ,
2015-07-20 21:10:28 +02:00
parse_duration ,
2021-07-21 17:15:45 +02:00
parse_iso8601 ,
2021-08-22 21:02:00 +02:00
parse_qs ,
2021-02-18 19:12:56 +01:00
qualities ,
2021-09-04 04:33:42 +02:00
remove_end ,
2017-05-06 23:19:11 +02:00
remove_start ,
2015-07-25 17:30:34 +02:00
smuggle_url ,
2018-11-03 00:26:16 +01:00
str_or_none ,
2015-06-28 20:48:06 +02:00
str_to_int ,
2021-07-20 02:02:41 +02:00
traverse_obj ,
2017-01-26 15:43:14 +01:00
try_get ,
2013-06-23 19:58:33 +02:00
unescapeHTML ,
unified_strdate ,
2015-07-25 17:30:34 +02:00
unsmuggle_url ,
2020-11-19 20:22:59 +01:00
update_url_query ,
2018-12-16 13:35:48 +01:00
url_or_none ,
2021-07-19 06:55:07 +02:00
urljoin ,
2021-07-20 02:02:41 +02:00
variadic ,
2013-06-23 19:58:33 +02:00
)
2014-11-23 20:41:03 +01:00
2021-11-16 20:56:23 +01:00
def get_first ( obj , keys , * * kwargs ) :
return traverse_obj ( obj , ( . . . , * variadic ( keys ) ) , * * kwargs , get_all = False )
2021-07-30 21:45:04 +02:00
# any clients starting with _ cannot be explicity requested by the user
INNERTUBE_CLIENTS = {
' web ' : {
' INNERTUBE_API_KEY ' : ' AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8 ' ,
' INNERTUBE_CONTEXT ' : {
' client ' : {
' clientName ' : ' WEB ' ,
' clientVersion ' : ' 2.20210622.10.00 ' ,
}
} ,
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 1
} ,
' web_embedded ' : {
' INNERTUBE_API_KEY ' : ' AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8 ' ,
' INNERTUBE_CONTEXT ' : {
' client ' : {
' clientName ' : ' WEB_EMBEDDED_PLAYER ' ,
' clientVersion ' : ' 1.20210620.0.1 ' ,
} ,
} ,
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 56
} ,
' web_music ' : {
' INNERTUBE_API_KEY ' : ' AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30 ' ,
' INNERTUBE_HOST ' : ' music.youtube.com ' ,
' INNERTUBE_CONTEXT ' : {
' client ' : {
' clientName ' : ' WEB_REMIX ' ,
' clientVersion ' : ' 1.20210621.00.00 ' ,
}
} ,
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 67 ,
} ,
2021-08-01 23:13:46 +02:00
' web_creator ' : {
' INNERTUBE_API_KEY ' : ' AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8 ' ,
' INNERTUBE_CONTEXT ' : {
' client ' : {
' clientName ' : ' WEB_CREATOR ' ,
' clientVersion ' : ' 1.20210621.00.00 ' ,
}
} ,
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 62 ,
} ,
2021-07-30 21:45:04 +02:00
' android ' : {
' INNERTUBE_API_KEY ' : ' AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8 ' ,
' INNERTUBE_CONTEXT ' : {
' client ' : {
' clientName ' : ' ANDROID ' ,
' clientVersion ' : ' 16.20 ' ,
}
} ,
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 3 ,
2021-09-06 09:26:41 +02:00
' REQUIRE_JS_PLAYER ' : False
2021-07-30 21:45:04 +02:00
} ,
' android_embedded ' : {
' INNERTUBE_API_KEY ' : ' AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8 ' ,
' INNERTUBE_CONTEXT ' : {
' client ' : {
' clientName ' : ' ANDROID_EMBEDDED_PLAYER ' ,
' clientVersion ' : ' 16.20 ' ,
} ,
} ,
2021-09-06 09:26:41 +02:00
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 55 ,
' REQUIRE_JS_PLAYER ' : False
2021-07-30 21:45:04 +02:00
} ,
' android_music ' : {
' INNERTUBE_API_KEY ' : ' AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30 ' ,
' INNERTUBE_HOST ' : ' music.youtube.com ' ,
' INNERTUBE_CONTEXT ' : {
' client ' : {
' clientName ' : ' ANDROID_MUSIC ' ,
' clientVersion ' : ' 4.32 ' ,
}
} ,
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 21 ,
2021-09-06 09:26:41 +02:00
' REQUIRE_JS_PLAYER ' : False
2021-07-30 21:45:04 +02:00
} ,
2021-08-01 23:13:46 +02:00
' android_creator ' : {
' INNERTUBE_CONTEXT ' : {
' client ' : {
' clientName ' : ' ANDROID_CREATOR ' ,
' clientVersion ' : ' 21.24.100 ' ,
} ,
} ,
2021-09-06 09:26:41 +02:00
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 14 ,
' REQUIRE_JS_PLAYER ' : False
2021-08-01 23:13:46 +02:00
} ,
2021-07-30 21:43:26 +02:00
# ios has HLS live streams
# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
2021-07-30 21:45:04 +02:00
' ios ' : {
' INNERTUBE_API_KEY ' : ' AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8 ' ,
' INNERTUBE_CONTEXT ' : {
' client ' : {
' clientName ' : ' IOS ' ,
' clientVersion ' : ' 16.20 ' ,
}
} ,
2021-09-06 09:26:41 +02:00
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 5 ,
' REQUIRE_JS_PLAYER ' : False
2021-07-30 21:45:04 +02:00
} ,
' ios_embedded ' : {
' INNERTUBE_API_KEY ' : ' AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8 ' ,
' INNERTUBE_CONTEXT ' : {
' client ' : {
' clientName ' : ' IOS_MESSAGES_EXTENSION ' ,
' clientVersion ' : ' 16.20 ' ,
} ,
} ,
2021-09-06 09:26:41 +02:00
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 66 ,
' REQUIRE_JS_PLAYER ' : False
2021-07-30 21:45:04 +02:00
} ,
' ios_music ' : {
' INNERTUBE_API_KEY ' : ' AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og ' ,
' INNERTUBE_HOST ' : ' music.youtube.com ' ,
' INNERTUBE_CONTEXT ' : {
' client ' : {
' clientName ' : ' IOS_MUSIC ' ,
' clientVersion ' : ' 4.32 ' ,
} ,
} ,
2021-09-06 09:26:41 +02:00
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 26 ,
' REQUIRE_JS_PLAYER ' : False
2021-07-30 21:45:04 +02:00
} ,
2021-08-01 23:13:46 +02:00
' ios_creator ' : {
' INNERTUBE_CONTEXT ' : {
' client ' : {
' clientName ' : ' IOS_CREATOR ' ,
' clientVersion ' : ' 21.24.100 ' ,
} ,
} ,
2021-09-06 09:26:41 +02:00
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 15 ,
' REQUIRE_JS_PLAYER ' : False
2021-08-01 23:13:46 +02:00
} ,
2021-07-30 21:43:26 +02:00
# mweb has 'ultralow' formats
# See: https://github.com/yt-dlp/yt-dlp/pull/557
2021-07-30 21:45:04 +02:00
' mweb ' : {
' INNERTUBE_API_KEY ' : ' AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8 ' ,
' INNERTUBE_CONTEXT ' : {
' client ' : {
' clientName ' : ' MWEB ' ,
' clientVersion ' : ' 2.20210721.07.00 ' ,
}
} ,
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 2
} ,
}
def build_innertube_clients ( ) :
2021-07-27 16:03:59 +02:00
third_party = {
' embedUrl ' : ' https://google.com ' , # Can be any valid URL
}
2021-07-30 21:45:04 +02:00
base_clients = ( ' android ' , ' web ' , ' ios ' , ' mweb ' )
priority = qualities ( base_clients [ : : - 1 ] )
for client , ytcfg in tuple ( INNERTUBE_CLIENTS . items ( ) ) :
2021-08-01 08:12:26 +02:00
ytcfg . setdefault ( ' INNERTUBE_API_KEY ' , ' AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8 ' )
2021-07-30 21:45:04 +02:00
ytcfg . setdefault ( ' INNERTUBE_HOST ' , ' www.youtube.com ' )
2021-09-06 09:26:41 +02:00
ytcfg . setdefault ( ' REQUIRE_JS_PLAYER ' , True )
2021-07-30 21:45:04 +02:00
ytcfg [ ' INNERTUBE_CONTEXT ' ] [ ' client ' ] . setdefault ( ' hl ' , ' en ' )
ytcfg [ ' priority ' ] = 10 * priority ( client . split ( ' _ ' , 1 ) [ 0 ] )
if client in base_clients :
INNERTUBE_CLIENTS [ f ' { client } _agegate ' ] = agegate_ytcfg = copy . deepcopy ( ytcfg )
agegate_ytcfg [ ' INNERTUBE_CONTEXT ' ] [ ' client ' ] [ ' clientScreen ' ] = ' EMBED '
2021-07-27 16:03:59 +02:00
agegate_ytcfg [ ' INNERTUBE_CONTEXT ' ] [ ' thirdParty ' ] = third_party
2021-07-30 21:45:04 +02:00
agegate_ytcfg [ ' priority ' ] - = 1
elif client . endswith ( ' _embedded ' ) :
2021-07-27 16:03:59 +02:00
ytcfg [ ' INNERTUBE_CONTEXT ' ] [ ' thirdParty ' ] = third_party
2021-07-30 21:45:04 +02:00
ytcfg [ ' priority ' ] - = 2
else :
ytcfg [ ' priority ' ] - = 3
build_innertube_clients ( )
2013-09-11 15:48:23 +02:00
class YoutubeBaseInfoExtractor ( InfoExtractor ) :
2013-07-24 20:40:12 +02:00
""" Provide base functions for Youtube extractors """
2017-05-06 18:58:47 +02:00
2020-11-21 23:21:09 +01:00
_RESERVED_NAMES = (
2021-09-19 16:07:47 +02:00
r ' channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip| '
2021-07-30 21:43:26 +02:00
r ' shorts|movies|results|shared|hashtag|trending|feed|feeds| '
r ' browse|oembed|get_video_info|iframe_api|s/player| '
2021-03-04 19:05:26 +01:00
r ' storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout ' )
2020-11-21 23:21:09 +01:00
2021-07-30 21:43:26 +02:00
_PLAYLIST_ID_RE = r ' (?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_] { 10,}|RDMM|WL|LL|LM) '
2013-07-24 20:40:12 +02:00
_NETRC_MACHINE = ' youtube '
2021-07-30 21:43:26 +02:00
2013-07-24 20:40:12 +02:00
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False
2021-11-06 04:07:34 +01:00
_INVIDIOUS_SITES = (
# invidious-redirect websites
r ' (?:www \ .)?redirect \ .invidious \ .io ' ,
r ' (?:(?:www|dev) \ .)?invidio \ .us ' ,
# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
r ' (?:www \ .)?invidious \ .pussthecat \ .org ' ,
r ' (?:www \ .)?invidious \ .zee \ .li ' ,
r ' (?:www \ .)?invidious \ .ethibox \ .fr ' ,
r ' (?:www \ .)?invidious \ .3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd \ .onion ' ,
# youtube-dl invidious instances list
r ' (?:(?:www|no) \ .)?invidiou \ .sh ' ,
r ' (?:(?:www|fi) \ .)?invidious \ .snopyta \ .org ' ,
r ' (?:www \ .)?invidious \ .kabi \ .tk ' ,
r ' (?:www \ .)?invidious \ .mastodon \ .host ' ,
r ' (?:www \ .)?invidious \ .zapashcanon \ .fr ' ,
r ' (?:www \ .)?(?:invidious(?:-us)?|piped) \ .kavin \ .rocks ' ,
r ' (?:www \ .)?invidious \ .tinfoil-hat \ .net ' ,
r ' (?:www \ .)?invidious \ .himiko \ .cloud ' ,
r ' (?:www \ .)?invidious \ .reallyancient \ .tech ' ,
r ' (?:www \ .)?invidious \ .tube ' ,
r ' (?:www \ .)?invidiou \ .site ' ,
r ' (?:www \ .)?invidious \ .site ' ,
r ' (?:www \ .)?invidious \ .xyz ' ,
r ' (?:www \ .)?invidious \ .nixnet \ .xyz ' ,
r ' (?:www \ .)?invidious \ .048596 \ .xyz ' ,
r ' (?:www \ .)?invidious \ .drycat \ .fr ' ,
r ' (?:www \ .)?inv \ .skyn3t \ .in ' ,
r ' (?:www \ .)?tube \ .poal \ .co ' ,
r ' (?:www \ .)?tube \ .connect \ .cafe ' ,
r ' (?:www \ .)?vid \ .wxzm \ .sx ' ,
r ' (?:www \ .)?vid \ .mint \ .lgbt ' ,
r ' (?:www \ .)?vid \ .puffyan \ .us ' ,
r ' (?:www \ .)?yewtu \ .be ' ,
r ' (?:www \ .)?yt \ .elukerio \ .org ' ,
r ' (?:www \ .)?yt \ .lelux \ .fi ' ,
r ' (?:www \ .)?invidious \ .ggc-project \ .de ' ,
r ' (?:www \ .)?yt \ .maisputain \ .ovh ' ,
r ' (?:www \ .)?ytprivate \ .com ' ,
r ' (?:www \ .)?invidious \ .13ad \ .de ' ,
r ' (?:www \ .)?invidious \ .toot \ .koeln ' ,
r ' (?:www \ .)?invidious \ .fdn \ .fr ' ,
r ' (?:www \ .)?watch \ .nettohikari \ .com ' ,
r ' (?:www \ .)?invidious \ .namazso \ .eu ' ,
r ' (?:www \ .)?invidious \ .silkky \ .cloud ' ,
r ' (?:www \ .)?invidious \ .exonip \ .de ' ,
r ' (?:www \ .)?invidious \ .riverside \ .rocks ' ,
r ' (?:www \ .)?invidious \ .blamefran \ .net ' ,
r ' (?:www \ .)?invidious \ .moomoo \ .de ' ,
r ' (?:www \ .)?ytb \ .trom \ .tf ' ,
r ' (?:www \ .)?yt \ .cyberhost \ .uk ' ,
r ' (?:www \ .)?kgg2m7yk5aybusll \ .onion ' ,
r ' (?:www \ .)?qklhadlycap4cnod \ .onion ' ,
r ' (?:www \ .)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid \ .onion ' ,
r ' (?:www \ .)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid \ .onion ' ,
r ' (?:www \ .)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad \ .onion ' ,
r ' (?:www \ .)?invidious \ .l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd \ .onion ' ,
r ' (?:www \ .)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya \ .b32 \ .i2p ' ,
r ' (?:www \ .)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd \ .onion ' ,
r ' (?:www \ .)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd \ .onion ' ,
r ' (?:www \ .)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad \ .onion ' ,
r ' (?:www \ .)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad \ .onion ' ,
r ' (?:www \ .)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid \ .onion ' ,
)
2013-07-24 20:40:12 +02:00
def _login ( self ) :
2014-08-16 23:28:41 +02:00
"""
Attempt to log in to YouTube .
If _LOGIN_REQUIRED is set and no authentication was provided , an error is raised .
"""
2021-05-19 15:41:44 +02:00
2021-07-21 22:32:49 +02:00
if ( self . _LOGIN_REQUIRED
and self . get_param ( ' cookiefile ' ) is None
and self . get_param ( ' cookiesfrombrowser ' ) is None ) :
2021-05-19 15:41:44 +02:00
self . raise_login_required (
' Login details are needed to download this content ' , method = ' cookies ' )
2018-05-26 17:12:44 +02:00
username , password = self . _get_login_info ( )
2021-05-19 15:41:44 +02:00
if username :
2021-10-18 04:25:34 +02:00
self . report_warning ( f ' Cannot login to YouTube using username and password. { self . _LOGIN_HINTS [ " cookies " ] } ' )
2013-07-24 20:40:12 +02:00
2021-04-01 10:28:33 +02:00
def _initialize_consent ( self ) :
cookies = self . _get_cookies ( ' https://www.youtube.com/ ' )
if cookies . get ( ' __Secure-3PSID ' ) :
return
consent_id = None
consent = cookies . get ( ' CONSENT ' )
if consent :
if ' YES ' in consent . value :
return
consent_id = self . _search_regex (
r ' PENDING \ +( \ d+) ' , consent . value , ' consent ' , default = None )
if not consent_id :
consent_id = random . randint ( 100 , 999 )
self . _set_cookie ( ' .youtube.com ' , ' CONSENT ' , ' YES+cb.20210328-17-p0.en+FX+ %s ' % consent_id )
2017-09-01 19:57:14 +02:00
2013-07-24 20:40:12 +02:00
def _real_initialize ( self ) :
2021-04-01 10:28:33 +02:00
self . _initialize_consent ( )
2021-10-18 04:25:34 +02:00
self . _login ( )
2013-06-23 19:58:33 +02:00
2020-11-21 15:50:42 +01:00
_YT_INITIAL_DATA_RE = r ' (?:window \ s* \ [ \ s*[ " \' ]ytInitialData[ " \' ] \ s* \ ]|ytInitialData) \ s*= \ s*( { .+?}) \ s*; '
2021-01-01 13:26:37 +01:00
_YT_INITIAL_PLAYER_RESPONSE_RE = r ' ytInitialPlayerResponse \ s*= \ s*( { .+?}) \ s*; '
_YT_INITIAL_BOUNDARY_RE = r ' (?:var \ s+meta|</script| \ n) '
2020-11-21 15:50:42 +01:00
2021-07-30 21:45:04 +02:00
def _get_default_ytcfg ( self , client = ' web ' ) :
return copy . deepcopy ( INNERTUBE_CLIENTS [ client ] )
2021-06-30 00:07:49 +02:00
2021-07-30 21:45:04 +02:00
def _get_innertube_host ( self , client = ' web ' ) :
return INNERTUBE_CLIENTS [ client ] [ ' INNERTUBE_HOST ' ]
2021-06-30 00:07:49 +02:00
2021-07-30 21:45:04 +02:00
def _ytcfg_get_safe ( self , ytcfg , getter , expected_type = None , default_client = ' web ' ) :
2021-06-30 00:07:49 +02:00
# try_get but with fallback to default ytcfg client values when present
_func = lambda y : try_get ( y , getter , expected_type )
return _func ( ytcfg ) or _func ( self . _get_default_ytcfg ( default_client ) )
2021-07-30 21:45:04 +02:00
def _extract_client_name ( self , ytcfg , default_client = ' web ' ) :
2021-07-30 21:43:26 +02:00
return self . _ytcfg_get_safe (
ytcfg , ( lambda x : x [ ' INNERTUBE_CLIENT_NAME ' ] ,
lambda x : x [ ' INNERTUBE_CONTEXT ' ] [ ' client ' ] [ ' clientName ' ] ) , compat_str , default_client )
2021-06-30 00:07:49 +02:00
2021-07-30 21:45:04 +02:00
def _extract_client_version ( self , ytcfg , default_client = ' web ' ) :
2021-07-30 21:43:26 +02:00
return self . _ytcfg_get_safe (
ytcfg , ( lambda x : x [ ' INNERTUBE_CLIENT_VERSION ' ] ,
lambda x : x [ ' INNERTUBE_CONTEXT ' ] [ ' client ' ] [ ' clientVersion ' ] ) , compat_str , default_client )
2021-06-30 00:07:49 +02:00
2021-07-30 21:45:04 +02:00
def _extract_api_key ( self , ytcfg = None , default_client = ' web ' ) :
2021-06-30 00:07:49 +02:00
return self . _ytcfg_get_safe ( ytcfg , lambda x : x [ ' INNERTUBE_API_KEY ' ] , compat_str , default_client )
2021-07-30 21:45:04 +02:00
def _extract_context ( self , ytcfg = None , default_client = ' web ' ) :
2021-06-30 00:07:49 +02:00
_get_context = lambda y : try_get ( y , lambda x : x [ ' INNERTUBE_CONTEXT ' ] , dict )
context = _get_context ( ytcfg )
if context :
return context
context = _get_context ( self . _get_default_ytcfg ( default_client ) )
if not ytcfg :
return context
# Recreate the client context (required)
context [ ' client ' ] . update ( {
' clientVersion ' : self . _extract_client_version ( ytcfg , default_client ) ,
' clientName ' : self . _extract_client_name ( ytcfg , default_client ) ,
} )
visitor_data = try_get ( ytcfg , lambda x : x [ ' VISITOR_DATA ' ] , compat_str )
if visitor_data :
context [ ' client ' ] [ ' visitorData ' ] = visitor_data
return context
2021-08-02 04:30:08 +02:00
_SAPISID = None
2021-06-30 00:07:49 +02:00
def _generate_sapisidhash_header ( self , origin = ' https://www.youtube.com ' ) :
2021-03-03 11:32:40 +01:00
time_now = round ( time . time ( ) )
2021-08-02 04:30:08 +02:00
if self . _SAPISID is None :
yt_cookies = self . _get_cookies ( ' https://www.youtube.com ' )
# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
# See: https://github.com/yt-dlp/yt-dlp/issues/393
sapisid_cookie = dict_get (
yt_cookies , ( ' __Secure-3PAPISID ' , ' SAPISID ' ) )
if sapisid_cookie and sapisid_cookie . value :
self . _SAPISID = sapisid_cookie . value
self . write_debug ( ' Extracted SAPISID cookie ' )
# SAPISID cookie is required if not already present
if not yt_cookies . get ( ' SAPISID ' ) :
self . write_debug ( ' Copying __Secure-3PAPISID cookie to SAPISID cookie ' )
self . _set_cookie (
' .youtube.com ' , ' SAPISID ' , self . _SAPISID , secure = True , expire_time = time_now + 3600 )
else :
self . _SAPISID = False
if not self . _SAPISID :
return None
2021-06-10 23:02:57 +02:00
# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
sapisidhash = hashlib . sha1 (
2021-08-02 04:30:08 +02:00
f ' { time_now } { self . _SAPISID } { origin } ' . encode ( ' utf-8 ' ) ) . hexdigest ( )
2021-06-10 23:02:57 +02:00
return f ' SAPISIDHASH { time_now } _ { sapisidhash } '
2021-03-03 11:32:40 +01:00
def _call_api ( self , ep , query , video_id , fatal = True , headers = None ,
2021-04-11 22:23:04 +02:00
note = ' Downloading API JSON ' , errnote = ' Unable to download API page ' ,
2021-07-30 21:45:04 +02:00
context = None , api_key = None , api_hostname = None , default_client = ' web ' ) :
2021-04-11 22:23:04 +02:00
2021-06-30 00:07:49 +02:00
data = { ' context ' : context } if context else { ' context ' : self . _extract_context ( default_client = default_client ) }
2020-11-19 20:22:59 +01:00
data . update ( query )
2021-07-21 05:52:34 +02:00
real_headers = self . generate_api_headers ( default_client = default_client )
2021-04-11 22:23:04 +02:00
real_headers . update ( { ' content-type ' : ' application/json ' } )
if headers :
real_headers . update ( headers )
2021-02-04 15:37:17 +01:00
return self . _download_json (
2021-06-30 00:07:49 +02:00
' https:// %s /youtubei/v1/ %s ' % ( api_hostname or self . _get_innertube_host ( default_client ) , ep ) ,
2021-03-03 11:32:40 +01:00
video_id = video_id , fatal = fatal , note = note , errnote = errnote ,
2021-04-11 22:23:04 +02:00
data = json . dumps ( data ) . encode ( ' utf8 ' ) , headers = real_headers ,
query = { ' key ' : api_key or self . _extract_api_key ( ) } )
2021-10-08 23:19:25 +02:00
def extract_yt_initial_data ( self , item_id , webpage , fatal = True ) :
data = self . _search_regex (
( r ' %s \ s* %s ' % ( self . _YT_INITIAL_DATA_RE , self . _YT_INITIAL_BOUNDARY_RE ) ,
self . _YT_INITIAL_DATA_RE ) , webpage , ' yt initial data ' , fatal = fatal )
if data :
return self . _parse_json ( data , item_id , fatal = fatal )
2015-11-21 23:17:07 +01:00
2021-09-24 02:52:17 +02:00
@staticmethod
def _extract_session_index ( * data ) :
"""
Index of current account in account list .
See : https : / / github . com / yt - dlp / yt - dlp / pull / 519
"""
for ytcfg in data :
session_index = int_or_none ( try_get ( ytcfg , lambda x : x [ ' SESSION_INDEX ' ] ) )
if session_index is not None :
return session_index
# Deprecated?
def _extract_identity_token ( self , ytcfg = None , webpage = None ) :
2021-03-14 23:41:11 +01:00
if ytcfg :
token = try_get ( ytcfg , lambda x : x [ ' ID_TOKEN ' ] , compat_str )
if token :
return token
2021-09-24 02:52:17 +02:00
if webpage :
return self . _search_regex (
r ' \ bID_TOKEN[ " \' ] \ s*: \ s*[ " \' ](.+?)[ " \' ] ' , webpage ,
' identity token ' , default = None , fatal = False )
2021-03-14 23:41:11 +01:00
@staticmethod
2021-07-19 06:55:07 +02:00
def _extract_account_syncid ( * args ) :
2021-04-14 07:07:03 +02:00
"""
Extract syncId required to download private playlists of secondary channels
2021-07-19 06:55:07 +02:00
@params response and / or ytcfg
2021-04-14 07:07:03 +02:00
"""
2021-07-19 06:55:07 +02:00
for data in args :
# ytcfg includes channel_syncid if on secondary channel
delegated_sid = try_get ( data , lambda x : x [ ' DELEGATED_SESSION_ID ' ] , compat_str )
if delegated_sid :
return delegated_sid
sync_ids = ( try_get (
data , ( lambda x : x [ ' responseContext ' ] [ ' mainAppWebResponseContext ' ] [ ' datasyncId ' ] ,
2021-09-17 20:23:55 +02:00
lambda x : x [ ' DATASYNC_ID ' ] ) , compat_str ) or ' ' ) . split ( ' || ' )
2021-07-19 06:55:07 +02:00
if len ( sync_ids ) > = 2 and sync_ids [ 1 ] :
# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
# and just "user_syncid||" for primary channel. We only want the channel_syncid
return sync_ids [ 0 ]
2021-03-14 23:41:11 +01:00
2021-10-08 23:19:25 +02:00
@staticmethod
def _extract_visitor_data ( * args ) :
"""
Extracts visitorData from an API response or ytcfg
Appears to be used to track session state
"""
2021-11-19 01:06:28 +01:00
return get_first (
args , ( ( ' VISITOR_DATA ' , ( ' INNERTUBE_CONTEXT ' , ' client ' , ' visitorData ' ) , ( ' responseContext ' , ' visitorData ' ) ) ) ,
expected_type = str )
2021-10-08 23:19:25 +02:00
2021-09-24 02:52:17 +02:00
@property
def is_authenticated ( self ) :
return bool ( self . _generate_sapisidhash_header ( ) )
2021-07-21 05:52:34 +02:00
def extract_ytcfg ( self , video_id , webpage ) :
2021-04-21 07:07:24 +02:00
if not webpage :
return { }
2021-01-01 13:26:37 +01:00
return self . _parse_json (
self . _search_regex (
r ' ytcfg \ .set \ s* \ ( \ s*( { .+?}) \ s* \ ) \ s*; ' , webpage , ' ytcfg ' ,
2021-04-11 22:23:04 +02:00
default = ' {} ' ) , video_id , fatal = False ) or { }
2021-07-21 05:52:34 +02:00
def generate_api_headers (
2021-09-24 02:52:17 +02:00
self , * , ytcfg = None , account_syncid = None , session_index = None ,
visitor_data = None , identity_token = None , api_hostname = None , default_client = ' web ' ) :
2021-07-21 05:52:34 +02:00
origin = ' https:// ' + ( api_hostname if api_hostname else self . _get_innertube_host ( default_client ) )
2021-04-11 22:23:04 +02:00
headers = {
2021-06-30 00:07:49 +02:00
' X-YouTube-Client-Name ' : compat_str (
2021-07-21 05:52:34 +02:00
self . _ytcfg_get_safe ( ytcfg , lambda x : x [ ' INNERTUBE_CONTEXT_CLIENT_NAME ' ] , default_client = default_client ) ) ,
' X-YouTube-Client-Version ' : self . _extract_client_version ( ytcfg , default_client ) ,
2021-09-24 02:52:17 +02:00
' Origin ' : origin ,
' X-Youtube-Identity-Token ' : identity_token or self . _extract_identity_token ( ytcfg ) ,
' X-Goog-PageId ' : account_syncid or self . _extract_account_syncid ( ytcfg ) ,
2021-10-08 23:19:25 +02:00
' X-Goog-Visitor-Id ' : visitor_data or self . _extract_visitor_data ( ytcfg )
2021-09-24 02:52:17 +02:00
}
if session_index is None :
2021-07-18 08:23:32 +02:00
session_index = self . _extract_session_index ( ytcfg )
if account_syncid or session_index is not None :
headers [ ' X-Goog-AuthUser ' ] = session_index if session_index is not None else 0
2021-09-24 02:52:17 +02:00
2021-06-30 00:07:49 +02:00
auth = self . _generate_sapisidhash_header ( origin )
2021-04-11 22:23:04 +02:00
if auth is not None :
headers [ ' Authorization ' ] = auth
2021-06-30 00:07:49 +02:00
headers [ ' X-Origin ' ] = origin
2021-09-24 02:52:17 +02:00
return { h : v for h , v in headers . items ( ) if v is not None }
2021-01-01 13:26:37 +01:00
2021-07-12 01:18:40 +02:00
@staticmethod
def _build_api_continuation_query ( continuation , ctp = None ) :
query = {
' continuation ' : continuation
}
# TODO: Inconsistency with clickTrackingParams.
# Currently we have a fixed ctp contained within context (from ytcfg)
# and a ctp in root query for continuation.
if ctp :
query [ ' clickTracking ' ] = { ' clickTrackingParams ' : ctp }
return query
@classmethod
def _extract_next_continuation_data ( cls , renderer ) :
next_continuation = try_get (
renderer , ( lambda x : x [ ' continuations ' ] [ 0 ] [ ' nextContinuationData ' ] ,
lambda x : x [ ' continuation ' ] [ ' reloadContinuationData ' ] ) , dict )
if not next_continuation :
return
continuation = next_continuation . get ( ' continuation ' )
if not continuation :
return
ctp = next_continuation . get ( ' clickTrackingParams ' )
2021-07-19 06:55:07 +02:00
return cls . _build_api_continuation_query ( continuation , ctp )
2021-07-12 01:18:40 +02:00
@classmethod
def _extract_continuation_ep_data ( cls , continuation_ep : dict ) :
if isinstance ( continuation_ep , dict ) :
continuation = try_get (
continuation_ep , lambda x : x [ ' continuationCommand ' ] [ ' token ' ] , compat_str )
if not continuation :
return
ctp = continuation_ep . get ( ' clickTrackingParams ' )
2021-07-19 06:55:07 +02:00
return cls . _build_api_continuation_query ( continuation , ctp )
2021-07-12 01:18:40 +02:00
@classmethod
def _extract_continuation ( cls , renderer ) :
next_continuation = cls . _extract_next_continuation_data ( renderer )
if next_continuation :
return next_continuation
2021-07-19 06:55:07 +02:00
2021-07-12 01:18:40 +02:00
contents = [ ]
for key in ( ' contents ' , ' items ' ) :
contents . extend ( try_get ( renderer , lambda x : x [ key ] , list ) or [ ] )
2021-07-19 06:55:07 +02:00
2021-07-12 01:18:40 +02:00
for content in contents :
if not isinstance ( content , dict ) :
continue
continuation_ep = try_get (
content , ( lambda x : x [ ' continuationItemRenderer ' ] [ ' continuationEndpoint ' ] ,
lambda x : x [ ' continuationItemRenderer ' ] [ ' button ' ] [ ' buttonRenderer ' ] [ ' command ' ] ) ,
dict )
continuation = cls . _extract_continuation_ep_data ( continuation_ep )
if continuation :
return continuation
2021-07-19 06:55:07 +02:00
@classmethod
def _extract_alerts ( cls , data ) :
2021-06-30 00:07:49 +02:00
for alert_dict in try_get ( data , lambda x : x [ ' alerts ' ] , list ) or [ ] :
if not isinstance ( alert_dict , dict ) :
continue
for alert in alert_dict . values ( ) :
alert_type = alert . get ( ' type ' )
if not alert_type :
continue
2021-07-24 02:46:46 +02:00
message = cls . _get_text ( alert , ' text ' )
2021-06-30 00:07:49 +02:00
if message :
yield alert_type , message
2021-09-04 04:33:42 +02:00
def _report_alerts ( self , alerts , expected = True , fatal = True , only_once = False ) :
2021-06-30 00:07:49 +02:00
errors = [ ]
warnings = [ ]
for alert_type , alert_message in alerts :
2021-08-13 08:18:26 +02:00
if alert_type . lower ( ) == ' error ' and fatal :
2021-06-30 00:07:49 +02:00
errors . append ( [ alert_type , alert_message ] )
else :
warnings . append ( [ alert_type , alert_message ] )
for alert_type , alert_message in ( warnings + errors [ : - 1 ] ) :
2021-09-04 04:33:42 +02:00
self . report_warning ( ' YouTube said: %s - %s ' % ( alert_type , alert_message ) , only_once = only_once )
2021-06-30 00:07:49 +02:00
if errors :
raise ExtractorError ( ' YouTube said: %s ' % errors [ - 1 ] [ 1 ] , expected = expected )
def _extract_and_report_alerts ( self , data , * args , * * kwargs ) :
return self . _report_alerts ( self . _extract_alerts ( data ) , * args , * * kwargs )
2021-07-15 04:42:30 +02:00
def _extract_badges ( self , renderer : dict ) :
badges = set ( )
for badge in try_get ( renderer , lambda x : x [ ' badges ' ] , list ) or [ ] :
label = try_get ( badge , lambda x : x [ ' metadataBadgeRenderer ' ] [ ' label ' ] , compat_str )
if label :
badges . add ( label . lower ( ) )
return badges
@staticmethod
2021-07-24 02:46:46 +02:00
def _get_text ( data , * path_list , max_runs = None ) :
for path in path_list or [ None ] :
if path is None :
obj = [ data ]
else :
obj = traverse_obj ( data , path , default = [ ] )
if not any ( key is . . . or isinstance ( key , ( list , tuple ) ) for key in variadic ( path ) ) :
obj = [ obj ]
for item in obj :
text = try_get ( item , lambda x : x [ ' simpleText ' ] , compat_str )
if text :
return text
runs = try_get ( item , lambda x : x [ ' runs ' ] , list ) or [ ]
if not runs and isinstance ( item , list ) :
runs = item
runs = runs [ : min ( len ( runs ) , max_runs or len ( runs ) ) ]
text = ' ' . join ( traverse_obj ( runs , ( . . . , ' text ' ) , expected_type = str , default = [ ] ) )
if text :
return text
2021-07-15 04:42:30 +02:00
2021-06-30 00:07:49 +02:00
def _extract_response ( self , item_id , query , note = ' Downloading API JSON ' , headers = None ,
ytcfg = None , check_get_keys = None , ep = ' browse ' , fatal = True , api_hostname = None ,
2021-07-30 21:45:04 +02:00
default_client = ' web ' ) :
2021-06-30 00:07:49 +02:00
response = None
last_error = None
count = - 1
retries = self . get_param ( ' extractor_retries ' , 3 )
if check_get_keys is None :
check_get_keys = [ ]
while count < retries :
count + = 1
if last_error :
2021-09-04 04:33:42 +02:00
self . report_warning ( ' %s . Retrying ... ' % remove_end ( last_error , ' . ' ) )
2021-06-30 00:07:49 +02:00
try :
response = self . _call_api (
ep = ep , fatal = True , headers = headers ,
video_id = item_id , query = query ,
context = self . _extract_context ( ytcfg , default_client ) ,
api_key = self . _extract_api_key ( ytcfg , default_client ) ,
api_hostname = api_hostname , default_client = default_client ,
note = ' %s %s ' % ( note , ' (retry # %d ) ' % count if count else ' ' ) )
except ExtractorError as e :
2021-07-21 19:57:23 +02:00
if isinstance ( e . cause , network_exceptions ) :
2021-08-13 08:18:26 +02:00
if isinstance ( e . cause , compat_HTTPError ) and not is_html ( e . cause . read ( 512 ) ) :
e . cause . seek ( 0 )
yt_error = try_get (
self . _parse_json ( e . cause . read ( ) . decode ( ) , item_id , fatal = False ) ,
lambda x : x [ ' error ' ] [ ' message ' ] , compat_str )
if yt_error :
self . _report_alerts ( [ ( ' ERROR ' , yt_error ) ] , fatal = False )
2021-06-30 00:07:49 +02:00
# Downloading page may result in intermittent 5xx HTTP error
# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
2021-07-21 19:57:23 +02:00
# We also want to catch all other network exceptions since errors in later pages can be troublesome
# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
if not isinstance ( e . cause , compat_HTTPError ) or e . cause . code not in ( 403 , 429 ) :
2021-09-05 07:46:23 +02:00
last_error = error_to_compat_str ( e . cause or e . msg )
2021-07-21 19:57:23 +02:00
if count < retries :
continue
2021-06-30 00:07:49 +02:00
if fatal :
raise
else :
self . report_warning ( error_to_compat_str ( e ) )
return
else :
try :
2021-10-08 23:19:25 +02:00
self . _extract_and_report_alerts ( response , only_once = True )
2021-06-30 00:07:49 +02:00
except ExtractorError as e :
2021-09-04 04:33:42 +02:00
# YouTube servers may return errors we want to retry on in a 200 OK response
# See: https://github.com/yt-dlp/yt-dlp/issues/839
if ' unknown error ' in e . msg . lower ( ) :
last_error = e . msg
continue
2021-06-30 00:07:49 +02:00
if fatal :
raise
self . report_warning ( error_to_compat_str ( e ) )
return
if not check_get_keys or dict_get ( response , check_get_keys ) :
break
# Youtube sometimes sends incomplete data
# See: https://github.com/ytdl-org/youtube-dl/issues/28194
last_error = ' Incomplete data received '
if count > = retries :
if fatal :
raise ExtractorError ( last_error )
else :
self . report_warning ( last_error )
return
return response
2021-05-15 16:38:47 +02:00
@staticmethod
def is_music_url ( url ) :
return re . match ( r ' https?://music \ .youtube \ .com/ ' , url ) is not None
2021-01-16 13:40:15 +01:00
def _extract_video ( self , renderer ) :
video_id = renderer . get ( ' videoId ' )
2021-07-24 02:46:46 +02:00
title = self . _get_text ( renderer , ' title ' )
description = self . _get_text ( renderer , ' descriptionSnippet ' )
2021-07-24 02:53:02 +02:00
duration = parse_duration ( self . _get_text (
renderer , ' lengthText ' , ( ' thumbnailOverlays ' , . . . , ' thumbnailOverlayTimeStatusRenderer ' , ' text ' ) ) )
2021-07-24 02:46:46 +02:00
view_count_text = self . _get_text ( renderer , ' viewCountText ' ) or ' '
2021-01-16 13:40:15 +01:00
view_count = str_to_int ( self . _search_regex (
r ' ^([ \ d,]+) ' , re . sub ( r ' \ s ' , ' ' , view_count_text ) ,
' view count ' , default = None ) )
2021-07-19 06:55:07 +02:00
2021-07-24 02:46:46 +02:00
uploader = self . _get_text ( renderer , ' ownerText ' , ' shortBylineText ' )
2021-07-19 06:55:07 +02:00
2021-01-16 13:40:15 +01:00
return {
2021-04-04 00:05:17 +02:00
' _type ' : ' url ' ,
2021-01-16 13:40:15 +01:00
' ie_key ' : YoutubeIE . ie_key ( ) ,
' id ' : video_id ,
2021-09-22 02:05:39 +02:00
' url ' : f ' https://www.youtube.com/watch?v= { video_id } ' ,
2021-01-16 13:40:15 +01:00
' title ' : title ,
' description ' : description ,
' duration ' : duration ,
' view_count ' : view_count ,
' uploader ' : uploader ,
}
2015-11-21 23:17:07 +01:00
2015-02-16 21:44:17 +01:00
class YoutubeIE ( YoutubeBaseInfoExtractor ) :
2021-10-23 16:29:52 +02:00
IE_DESC = ' YouTube '
2013-11-18 16:42:35 +01:00
_VALID_URL = r """ (?x)^
2013-06-23 19:58:33 +02:00
(
2014-09-11 21:47:25 +02:00
( ? : https ? : / / | / / ) # http(s):// or protocol-independent URL
2021-02-19 21:44:36 +01:00
( ? : ( ? : ( ? : ( ? : \w + \. ) ? [ yY ] [ oO ] [ uU ] [ tT ] [ uU ] [ bB ] [ eE ] ( ? : - nocookie | kids ) ? \. com |
( ? : www \. ) ? deturl \. com / www \. youtube \. com |
( ? : www \. ) ? pwnyoutube \. com |
( ? : www \. ) ? hooktube \. com |
( ? : www \. ) ? yourepeat \. com |
tube \. majestyc \. net |
% ( invidious ) s |
youtube \. googleapis \. com ) / # the various hostnames, with wildcard subdomains
2013-06-23 19:58:33 +02:00
( ? : . * ? \#/)? # handle anchor (#/) redirect urls
( ? : # the various things that can precede the ID:
2021-08-22 17:04:59 +02:00
( ? : ( ? : v | embed | e | shorts ) / ( ? ! videoseries ) ) # v/ or embed/ or e/ or shorts/
2013-06-23 19:58:33 +02:00
| ( ? : # or the v= param in all its forms
2014-02-18 20:00:54 +01:00
( ? : ( ? : watch | movie ) ( ? : _popup ) ? ( ? : \. php ) ? / ? ) ? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
2013-06-23 19:58:33 +02:00
( ? : \? | \#!?) # the params delimiter ? or # or #!
2015-11-29 16:01:59 +01:00
( ? : . * ? [ & ; ] ) ? ? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
2013-06-23 19:58:33 +02:00
v =
)
2013-09-05 22:38:23 +02:00
) )
2015-08-16 22:04:13 +02:00
| ( ? :
youtu \. be | # just youtu.be/xxxx
2016-04-03 22:26:20 +02:00
vid \. plus | # or vid.plus/xxxx
zwearz \. com / watch | # or zwearz.com/watch/xxxx
2021-02-19 21:44:36 +01:00
% ( invidious ) s
2015-08-16 22:04:13 +02:00
) /
2014-09-11 21:47:25 +02:00
| ( ? : www \. ) ? cleanvideosearch \. com / media / action / yt / watch \? videoId =
2013-09-05 22:38:23 +02:00
)
2013-06-23 19:58:33 +02:00
) ? # all until now is optional -> you can pass the naked ID
2021-04-17 05:02:33 +02:00
( ? P < id > [ 0 - 9 A - Za - z_ - ] { 11 } ) # here is it! the YouTube video ID
2013-06-23 19:58:33 +02:00
( ? ( 1 ) . + ) ? # if we found the ID, everything can follow
2021-05-15 16:38:47 +02:00
( ? : \#|$)""" % {
2021-11-06 04:07:34 +01:00
' invidious ' : ' | ' . join ( YoutubeBaseInfoExtractor . _INVIDIOUS_SITES ) ,
2021-02-19 21:44:36 +01:00
}
2020-05-02 02:18:08 +02:00
_PLAYER_INFO_RE = (
2021-02-10 22:22:55 +01:00
r ' /s/player/(?P<id>[a-zA-Z0-9_-] { 8,})/player ' ,
r ' /(?P<id>[a-zA-Z0-9_-] { 8,})/player(?:_ias \ .vflset(?:/[a-zA-Z] { 2,3}_[a-zA-Z] { 2,3})?|-plasma-ias-(?:phone|tablet)-[a-z] {2} _[A-Z] {2} \ .vflset)/base \ .js$ ' ,
2021-02-04 15:37:17 +01:00
r ' \ b(?P<id>vfl[a-zA-Z0-9_-]+) \ b.*? \ .js$ ' ,
2020-05-02 02:18:08 +02:00
)
2013-12-24 12:34:09 +01:00
_formats = {
2016-03-02 17:35:04 +01:00
' 5 ' : { ' ext ' : ' flv ' , ' width ' : 400 , ' height ' : 240 , ' acodec ' : ' mp3 ' , ' abr ' : 64 , ' vcodec ' : ' h263 ' } ,
' 6 ' : { ' ext ' : ' flv ' , ' width ' : 450 , ' height ' : 270 , ' acodec ' : ' mp3 ' , ' abr ' : 64 , ' vcodec ' : ' h263 ' } ,
' 13 ' : { ' ext ' : ' 3gp ' , ' acodec ' : ' aac ' , ' vcodec ' : ' mp4v ' } ,
' 17 ' : { ' ext ' : ' 3gp ' , ' width ' : 176 , ' height ' : 144 , ' acodec ' : ' aac ' , ' abr ' : 24 , ' vcodec ' : ' mp4v ' } ,
' 18 ' : { ' ext ' : ' mp4 ' , ' width ' : 640 , ' height ' : 360 , ' acodec ' : ' aac ' , ' abr ' : 96 , ' vcodec ' : ' h264 ' } ,
' 22 ' : { ' ext ' : ' mp4 ' , ' width ' : 1280 , ' height ' : 720 , ' acodec ' : ' aac ' , ' abr ' : 192 , ' vcodec ' : ' h264 ' } ,
' 34 ' : { ' ext ' : ' flv ' , ' width ' : 640 , ' height ' : 360 , ' acodec ' : ' aac ' , ' abr ' : 128 , ' vcodec ' : ' h264 ' } ,
' 35 ' : { ' ext ' : ' flv ' , ' width ' : 854 , ' height ' : 480 , ' acodec ' : ' aac ' , ' abr ' : 128 , ' vcodec ' : ' h264 ' } ,
2016-02-07 20:30:57 +01:00
# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
2016-03-02 17:35:04 +01:00
' 36 ' : { ' ext ' : ' 3gp ' , ' width ' : 320 , ' acodec ' : ' aac ' , ' vcodec ' : ' mp4v ' } ,
' 37 ' : { ' ext ' : ' mp4 ' , ' width ' : 1920 , ' height ' : 1080 , ' acodec ' : ' aac ' , ' abr ' : 192 , ' vcodec ' : ' h264 ' } ,
' 38 ' : { ' ext ' : ' mp4 ' , ' width ' : 4096 , ' height ' : 3072 , ' acodec ' : ' aac ' , ' abr ' : 192 , ' vcodec ' : ' h264 ' } ,
' 43 ' : { ' ext ' : ' webm ' , ' width ' : 640 , ' height ' : 360 , ' acodec ' : ' vorbis ' , ' abr ' : 128 , ' vcodec ' : ' vp8 ' } ,
' 44 ' : { ' ext ' : ' webm ' , ' width ' : 854 , ' height ' : 480 , ' acodec ' : ' vorbis ' , ' abr ' : 128 , ' vcodec ' : ' vp8 ' } ,
' 45 ' : { ' ext ' : ' webm ' , ' width ' : 1280 , ' height ' : 720 , ' acodec ' : ' vorbis ' , ' abr ' : 192 , ' vcodec ' : ' vp8 ' } ,
2016-01-03 04:11:19 +01:00
' 46 ' : { ' ext ' : ' webm ' , ' width ' : 1920 , ' height ' : 1080 , ' acodec ' : ' vorbis ' , ' abr ' : 192 , ' vcodec ' : ' vp8 ' } ,
2016-03-02 17:35:04 +01:00
' 59 ' : { ' ext ' : ' mp4 ' , ' width ' : 854 , ' height ' : 480 , ' acodec ' : ' aac ' , ' abr ' : 128 , ' vcodec ' : ' h264 ' } ,
' 78 ' : { ' ext ' : ' mp4 ' , ' width ' : 854 , ' height ' : 480 , ' acodec ' : ' aac ' , ' abr ' : 128 , ' vcodec ' : ' h264 ' } ,
2016-01-03 04:11:19 +01:00
# 3D videos
2016-03-02 17:35:04 +01:00
' 82 ' : { ' ext ' : ' mp4 ' , ' height ' : 360 , ' format_note ' : ' 3D ' , ' acodec ' : ' aac ' , ' abr ' : 128 , ' vcodec ' : ' h264 ' , ' preference ' : - 20 } ,
' 83 ' : { ' ext ' : ' mp4 ' , ' height ' : 480 , ' format_note ' : ' 3D ' , ' acodec ' : ' aac ' , ' abr ' : 128 , ' vcodec ' : ' h264 ' , ' preference ' : - 20 } ,
' 84 ' : { ' ext ' : ' mp4 ' , ' height ' : 720 , ' format_note ' : ' 3D ' , ' acodec ' : ' aac ' , ' abr ' : 192 , ' vcodec ' : ' h264 ' , ' preference ' : - 20 } ,
' 85 ' : { ' ext ' : ' mp4 ' , ' height ' : 1080 , ' format_note ' : ' 3D ' , ' acodec ' : ' aac ' , ' abr ' : 192 , ' vcodec ' : ' h264 ' , ' preference ' : - 20 } ,
2016-01-03 04:11:19 +01:00
' 100 ' : { ' ext ' : ' webm ' , ' height ' : 360 , ' format_note ' : ' 3D ' , ' acodec ' : ' vorbis ' , ' abr ' : 128 , ' vcodec ' : ' vp8 ' , ' preference ' : - 20 } ,
' 101 ' : { ' ext ' : ' webm ' , ' height ' : 480 , ' format_note ' : ' 3D ' , ' acodec ' : ' vorbis ' , ' abr ' : 192 , ' vcodec ' : ' vp8 ' , ' preference ' : - 20 } ,
' 102 ' : { ' ext ' : ' webm ' , ' height ' : 720 , ' format_note ' : ' 3D ' , ' acodec ' : ' vorbis ' , ' abr ' : 192 , ' vcodec ' : ' vp8 ' , ' preference ' : - 20 } ,
2013-08-20 03:22:25 +02:00
2013-09-04 03:49:35 +02:00
# Apple HTTP Live Streaming
2016-03-17 12:25:37 +01:00
' 91 ' : { ' ext ' : ' mp4 ' , ' height ' : 144 , ' format_note ' : ' HLS ' , ' acodec ' : ' aac ' , ' abr ' : 48 , ' vcodec ' : ' h264 ' , ' preference ' : - 10 } ,
2016-03-02 17:35:04 +01:00
' 92 ' : { ' ext ' : ' mp4 ' , ' height ' : 240 , ' format_note ' : ' HLS ' , ' acodec ' : ' aac ' , ' abr ' : 48 , ' vcodec ' : ' h264 ' , ' preference ' : - 10 } ,
' 93 ' : { ' ext ' : ' mp4 ' , ' height ' : 360 , ' format_note ' : ' HLS ' , ' acodec ' : ' aac ' , ' abr ' : 128 , ' vcodec ' : ' h264 ' , ' preference ' : - 10 } ,
' 94 ' : { ' ext ' : ' mp4 ' , ' height ' : 480 , ' format_note ' : ' HLS ' , ' acodec ' : ' aac ' , ' abr ' : 128 , ' vcodec ' : ' h264 ' , ' preference ' : - 10 } ,
' 95 ' : { ' ext ' : ' mp4 ' , ' height ' : 720 , ' format_note ' : ' HLS ' , ' acodec ' : ' aac ' , ' abr ' : 256 , ' vcodec ' : ' h264 ' , ' preference ' : - 10 } ,
' 96 ' : { ' ext ' : ' mp4 ' , ' height ' : 1080 , ' format_note ' : ' HLS ' , ' acodec ' : ' aac ' , ' abr ' : 256 , ' vcodec ' : ' h264 ' , ' preference ' : - 10 } ,
2016-01-03 04:11:19 +01:00
' 132 ' : { ' ext ' : ' mp4 ' , ' height ' : 240 , ' format_note ' : ' HLS ' , ' acodec ' : ' aac ' , ' abr ' : 48 , ' vcodec ' : ' h264 ' , ' preference ' : - 10 } ,
' 151 ' : { ' ext ' : ' mp4 ' , ' height ' : 72 , ' format_note ' : ' HLS ' , ' acodec ' : ' aac ' , ' abr ' : 24 , ' vcodec ' : ' h264 ' , ' preference ' : - 10 } ,
2013-12-24 12:34:09 +01:00
# DASH mp4 video
2017-04-11 17:41:48 +02:00
' 133 ' : { ' ext ' : ' mp4 ' , ' height ' : 240 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' h264 ' } ,
' 134 ' : { ' ext ' : ' mp4 ' , ' height ' : 360 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' h264 ' } ,
' 135 ' : { ' ext ' : ' mp4 ' , ' height ' : 480 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' h264 ' } ,
' 136 ' : { ' ext ' : ' mp4 ' , ' height ' : 720 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' h264 ' } ,
' 137 ' : { ' ext ' : ' mp4 ' , ' height ' : 1080 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' h264 ' } ,
2019-03-09 13:14:41 +01:00
' 138 ' : { ' ext ' : ' mp4 ' , ' format_note ' : ' DASH video ' , ' vcodec ' : ' h264 ' } , # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
2017-04-11 17:41:48 +02:00
' 160 ' : { ' ext ' : ' mp4 ' , ' height ' : 144 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' h264 ' } ,
' 212 ' : { ' ext ' : ' mp4 ' , ' height ' : 480 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' h264 ' } ,
' 264 ' : { ' ext ' : ' mp4 ' , ' height ' : 1440 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' h264 ' } ,
' 298 ' : { ' ext ' : ' mp4 ' , ' height ' : 720 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' h264 ' , ' fps ' : 60 } ,
' 299 ' : { ' ext ' : ' mp4 ' , ' height ' : 1080 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' h264 ' , ' fps ' : 60 } ,
' 266 ' : { ' ext ' : ' mp4 ' , ' height ' : 2160 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' h264 ' } ,
2013-08-20 03:22:25 +02:00
2013-10-18 23:53:00 +02:00
# Dash mp4 audio
2017-04-11 17:41:48 +02:00
' 139 ' : { ' ext ' : ' m4a ' , ' format_note ' : ' DASH audio ' , ' acodec ' : ' aac ' , ' abr ' : 48 , ' container ' : ' m4a_dash ' } ,
' 140 ' : { ' ext ' : ' m4a ' , ' format_note ' : ' DASH audio ' , ' acodec ' : ' aac ' , ' abr ' : 128 , ' container ' : ' m4a_dash ' } ,
' 141 ' : { ' ext ' : ' m4a ' , ' format_note ' : ' DASH audio ' , ' acodec ' : ' aac ' , ' abr ' : 256 , ' container ' : ' m4a_dash ' } ,
' 256 ' : { ' ext ' : ' m4a ' , ' format_note ' : ' DASH audio ' , ' acodec ' : ' aac ' , ' container ' : ' m4a_dash ' } ,
' 258 ' : { ' ext ' : ' m4a ' , ' format_note ' : ' DASH audio ' , ' acodec ' : ' aac ' , ' container ' : ' m4a_dash ' } ,
' 325 ' : { ' ext ' : ' m4a ' , ' format_note ' : ' DASH audio ' , ' acodec ' : ' dtse ' , ' container ' : ' m4a_dash ' } ,
' 328 ' : { ' ext ' : ' m4a ' , ' format_note ' : ' DASH audio ' , ' acodec ' : ' ec-3 ' , ' container ' : ' m4a_dash ' } ,
2013-08-20 03:22:25 +02:00
# Dash webm
2017-04-11 17:41:48 +02:00
' 167 ' : { ' ext ' : ' webm ' , ' height ' : 360 , ' width ' : 640 , ' format_note ' : ' DASH video ' , ' container ' : ' webm ' , ' vcodec ' : ' vp8 ' } ,
' 168 ' : { ' ext ' : ' webm ' , ' height ' : 480 , ' width ' : 854 , ' format_note ' : ' DASH video ' , ' container ' : ' webm ' , ' vcodec ' : ' vp8 ' } ,
' 169 ' : { ' ext ' : ' webm ' , ' height ' : 720 , ' width ' : 1280 , ' format_note ' : ' DASH video ' , ' container ' : ' webm ' , ' vcodec ' : ' vp8 ' } ,
' 170 ' : { ' ext ' : ' webm ' , ' height ' : 1080 , ' width ' : 1920 , ' format_note ' : ' DASH video ' , ' container ' : ' webm ' , ' vcodec ' : ' vp8 ' } ,
' 218 ' : { ' ext ' : ' webm ' , ' height ' : 480 , ' width ' : 854 , ' format_note ' : ' DASH video ' , ' container ' : ' webm ' , ' vcodec ' : ' vp8 ' } ,
' 219 ' : { ' ext ' : ' webm ' , ' height ' : 480 , ' width ' : 854 , ' format_note ' : ' DASH video ' , ' container ' : ' webm ' , ' vcodec ' : ' vp8 ' } ,
' 278 ' : { ' ext ' : ' webm ' , ' height ' : 144 , ' format_note ' : ' DASH video ' , ' container ' : ' webm ' , ' vcodec ' : ' vp9 ' } ,
' 242 ' : { ' ext ' : ' webm ' , ' height ' : 240 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' } ,
' 243 ' : { ' ext ' : ' webm ' , ' height ' : 360 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' } ,
' 244 ' : { ' ext ' : ' webm ' , ' height ' : 480 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' } ,
' 245 ' : { ' ext ' : ' webm ' , ' height ' : 480 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' } ,
' 246 ' : { ' ext ' : ' webm ' , ' height ' : 480 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' } ,
' 247 ' : { ' ext ' : ' webm ' , ' height ' : 720 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' } ,
' 248 ' : { ' ext ' : ' webm ' , ' height ' : 1080 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' } ,
' 271 ' : { ' ext ' : ' webm ' , ' height ' : 1440 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' } ,
2015-11-30 15:42:05 +01:00
# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
2017-04-11 17:41:48 +02:00
' 272 ' : { ' ext ' : ' webm ' , ' height ' : 2160 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' } ,
' 302 ' : { ' ext ' : ' webm ' , ' height ' : 720 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' , ' fps ' : 60 } ,
' 303 ' : { ' ext ' : ' webm ' , ' height ' : 1080 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' , ' fps ' : 60 } ,
' 308 ' : { ' ext ' : ' webm ' , ' height ' : 1440 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' , ' fps ' : 60 } ,
' 313 ' : { ' ext ' : ' webm ' , ' height ' : 2160 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' } ,
' 315 ' : { ' ext ' : ' webm ' , ' height ' : 2160 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' , ' fps ' : 60 } ,
2013-12-24 12:34:09 +01:00
# Dash webm audio
2017-04-11 17:41:48 +02:00
' 171 ' : { ' ext ' : ' webm ' , ' acodec ' : ' vorbis ' , ' format_note ' : ' DASH audio ' , ' abr ' : 128 } ,
' 172 ' : { ' ext ' : ' webm ' , ' acodec ' : ' vorbis ' , ' format_note ' : ' DASH audio ' , ' abr ' : 256 } ,
2014-01-09 02:38:50 +01:00
2014-11-18 11:06:09 +01:00
# Dash webm audio with opus inside
2017-04-11 17:41:48 +02:00
' 249 ' : { ' ext ' : ' webm ' , ' format_note ' : ' DASH audio ' , ' acodec ' : ' opus ' , ' abr ' : 50 } ,
' 250 ' : { ' ext ' : ' webm ' , ' format_note ' : ' DASH audio ' , ' acodec ' : ' opus ' , ' abr ' : 70 } ,
' 251 ' : { ' ext ' : ' webm ' , ' format_note ' : ' DASH audio ' , ' acodec ' : ' opus ' , ' abr ' : 160 } ,
2014-11-18 11:06:09 +01:00
2014-01-09 02:38:50 +01:00
# RTMP (unnamed)
' _rtmp ' : { ' protocol ' : ' rtmp ' } ,
2019-06-13 20:59:05 +02:00
# av01 video only formats sometimes served with "unknown" codecs
2021-08-22 21:59:43 +02:00
' 394 ' : { ' ext ' : ' mp4 ' , ' height ' : 144 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' av01.0.00M.08 ' } ,
' 395 ' : { ' ext ' : ' mp4 ' , ' height ' : 240 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' av01.0.00M.08 ' } ,
' 396 ' : { ' ext ' : ' mp4 ' , ' height ' : 360 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' av01.0.01M.08 ' } ,
' 397 ' : { ' ext ' : ' mp4 ' , ' height ' : 480 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' av01.0.04M.08 ' } ,
' 398 ' : { ' ext ' : ' mp4 ' , ' height ' : 720 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' av01.0.05M.08 ' } ,
' 399 ' : { ' ext ' : ' mp4 ' , ' height ' : 1080 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' av01.0.08M.08 ' } ,
' 400 ' : { ' ext ' : ' mp4 ' , ' height ' : 1440 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' av01.0.12M.08 ' } ,
' 401 ' : { ' ext ' : ' mp4 ' , ' height ' : 2160 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' av01.0.12M.08 ' } ,
2013-06-23 19:58:33 +02:00
}
2021-01-01 13:26:37 +01:00
_SUBTITLE_FORMATS = ( ' json3 ' , ' srv1 ' , ' srv2 ' , ' srv3 ' , ' ttml ' , ' vtt ' )
2013-08-20 03:22:25 +02:00
2017-02-26 10:51:21 +01:00
_GEO_BYPASS = False
2014-09-13 07:51:06 +02:00
IE_NAME = ' youtube '
2013-06-27 19:13:11 +02:00
_TESTS = [
{
2016-09-17 16:48:20 +02:00
' url ' : ' https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9 ' ,
2014-09-24 09:49:53 +02:00
' info_dict ' : {
' id ' : ' BaW_jenozKc ' ,
' ext ' : ' mp4 ' ,
2020-09-02 22:37:35 +02:00
' title ' : ' youtube-dl test video " \' / \\ ä↭𝕐 ' ,
2014-09-24 09:49:53 +02:00
' uploader ' : ' Philipp Hagemeister ' ,
' uploader_id ' : ' phihag ' ,
2017-01-02 13:08:07 +01:00
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/phihag ' ,
2018-09-14 20:24:26 +02:00
' channel_id ' : ' UCLqxVugv74EIW3VWh2NOa3Q ' ,
' channel_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/channel/UCLqxVugv74EIW3VWh2NOa3Q ' ,
2014-09-24 09:49:53 +02:00
' upload_date ' : ' 20121002 ' ,
2020-09-02 22:37:35 +02:00
' description ' : ' test chars: " \' / \\ ä↭𝕐 \n test URL: https://github.com/rg3/youtube-dl/issues/1892 \n \n This is a test video for youtube-dl. \n \n For more information, contact phihag@phihag.de . ' ,
2014-09-24 09:49:53 +02:00
' categories ' : [ ' Science & Technology ' ] ,
2020-09-02 22:37:35 +02:00
' tags ' : [ ' youtube-dl ' ] ,
2017-01-26 15:43:14 +01:00
' duration ' : 10 ,
2018-11-03 00:26:16 +01:00
' view_count ' : int ,
2014-08-31 18:10:05 +02:00
' like_count ' : int ,
' dislike_count ' : int ,
2015-07-20 21:10:28 +02:00
' start_time ' : 1 ,
2015-07-23 13:20:21 +02:00
' end_time ' : 9 ,
2013-06-27 19:13:11 +02:00
}
2013-06-27 19:55:39 +02:00
} ,
2013-11-18 13:05:18 +01:00
{
2014-09-24 09:49:53 +02:00
' url ' : ' //www.YouTube.com/watch?v=yZIXLfi8CZQ ' ,
' note ' : ' Embed-only video (#1746) ' ,
' info_dict ' : {
' id ' : ' yZIXLfi8CZQ ' ,
' ext ' : ' mp4 ' ,
' upload_date ' : ' 20120608 ' ,
' title ' : ' Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012 ' ,
' description ' : ' md5:09b78bd971f1e3e289601dfba15ca4f7 ' ,
' uploader ' : ' SET India ' ,
2015-11-23 16:35:23 +01:00
' uploader_id ' : ' setindia ' ,
2017-01-02 13:08:07 +01:00
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/setindia ' ,
2015-11-23 16:35:23 +01:00
' age_limit ' : 18 ,
2021-02-04 15:37:17 +01:00
} ,
' skip ' : ' Private video ' ,
2013-11-18 13:05:18 +01:00
} ,
2015-08-10 20:52:38 +02:00
{
2020-11-19 20:22:59 +01:00
' url ' : ' https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ ' ,
2015-08-10 20:52:38 +02:00
' note ' : ' Use the first video ID in the URL ' ,
' info_dict ' : {
' id ' : ' BaW_jenozKc ' ,
' ext ' : ' mp4 ' ,
2020-09-02 22:37:35 +02:00
' title ' : ' youtube-dl test video " \' / \\ ä↭𝕐 ' ,
2015-08-10 20:52:38 +02:00
' uploader ' : ' Philipp Hagemeister ' ,
' uploader_id ' : ' phihag ' ,
2017-01-02 13:08:07 +01:00
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/phihag ' ,
2015-08-10 20:52:38 +02:00
' upload_date ' : ' 20121002 ' ,
2020-09-02 22:37:35 +02:00
' description ' : ' test chars: " \' / \\ ä↭𝕐 \n test URL: https://github.com/rg3/youtube-dl/issues/1892 \n \n This is a test video for youtube-dl. \n \n For more information, contact phihag@phihag.de . ' ,
2015-08-10 20:52:38 +02:00
' categories ' : [ ' Science & Technology ' ] ,
2020-09-02 22:37:35 +02:00
' tags ' : [ ' youtube-dl ' ] ,
2017-01-26 15:43:14 +01:00
' duration ' : 10 ,
2018-11-03 00:26:16 +01:00
' view_count ' : int ,
2015-08-10 20:52:38 +02:00
' like_count ' : int ,
' dislike_count ' : int ,
2015-08-10 21:22:06 +02:00
} ,
' params ' : {
' skip_download ' : True ,
} ,
2015-08-10 20:52:38 +02:00
} ,
2014-01-19 05:47:20 +01:00
{
2016-09-17 16:48:20 +02:00
' url ' : ' https://www.youtube.com/watch?v=a9LDPn-MO4I ' ,
2014-09-24 09:49:53 +02:00
' note ' : ' 256k DASH audio (format 141) via DASH manifest ' ,
' info_dict ' : {
' id ' : ' a9LDPn-MO4I ' ,
' ext ' : ' m4a ' ,
' upload_date ' : ' 20121002 ' ,
' uploader_id ' : ' 8KVIDEO ' ,
2017-01-02 13:08:07 +01:00
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/8KVIDEO ' ,
2014-09-24 09:49:53 +02:00
' description ' : ' ' ,
' uploader ' : ' 8KVIDEO ' ,
' title ' : ' UHDTV TEST 8K VIDEO.mp4 '
2014-01-22 21:56:37 +01:00
} ,
2014-09-24 09:49:53 +02:00
' params ' : {
' youtube_include_dash_manifest ' : True ,
' format ' : ' 141 ' ,
2014-01-22 21:56:37 +01:00
} ,
2016-06-24 17:27:55 +02:00
' skip ' : ' format 141 not served anymore ' ,
2014-01-19 05:47:20 +01:00
} ,
2020-11-19 20:22:59 +01:00
# DASH manifest with encrypted signature
{
' url ' : ' https://www.youtube.com/watch?v=IB3lcPjvWLA ' ,
' info_dict ' : {
' id ' : ' IB3lcPjvWLA ' ,
' ext ' : ' m4a ' ,
' title ' : ' Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson ' ,
' description ' : ' md5:8f5e2b82460520b619ccac1f509d43bf ' ,
' duration ' : 244 ,
' uploader ' : ' AfrojackVEVO ' ,
' uploader_id ' : ' AfrojackVEVO ' ,
' upload_date ' : ' 20131011 ' ,
2021-02-10 22:22:55 +01:00
' abr ' : 129.495 ,
2020-11-19 20:22:59 +01:00
} ,
' params ' : {
' youtube_include_dash_manifest ' : True ,
' format ' : ' 141/bestaudio[ext=m4a] ' ,
} ,
} ,
2021-07-27 16:03:59 +02:00
# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
2014-11-30 21:45:49 +01:00
{
2021-07-27 16:03:59 +02:00
' note ' : ' Embed allowed age-gate video ' ,
2016-09-17 16:48:20 +02:00
' url ' : ' https://youtube.com/watch?v=HtVdAasjOgU ' ,
2014-11-30 21:45:49 +01:00
' info_dict ' : {
' id ' : ' HtVdAasjOgU ' ,
' ext ' : ' mp4 ' ,
' title ' : ' The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer ' ,
2017-01-02 13:08:07 +01:00
' description ' : r ' re:(?s). { 100,}About the Game \ n.*?The Witcher 3: Wild Hunt. { 100,} ' ,
2017-01-26 15:43:14 +01:00
' duration ' : 142 ,
2014-11-30 21:45:49 +01:00
' uploader ' : ' The Witcher ' ,
' uploader_id ' : ' WitcherGame ' ,
2017-01-02 13:08:07 +01:00
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/WitcherGame ' ,
2014-11-30 21:45:49 +01:00
' upload_date ' : ' 20140605 ' ,
2015-08-10 21:24:53 +02:00
' age_limit ' : 18 ,
2014-11-30 21:45:49 +01:00
} ,
} ,
2021-07-27 16:03:59 +02:00
{
' note ' : ' Age-gate video with embed allowed in public site ' ,
' url ' : ' https://youtube.com/watch?v=HsUATh_Nc2U ' ,
' info_dict ' : {
' id ' : ' HsUATh_Nc2U ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Godzilla 2 (Official Video) ' ,
' description ' : ' md5:bf77e03fcae5529475e500129b05668a ' ,
' upload_date ' : ' 20200408 ' ,
' uploader_id ' : ' FlyingKitty900 ' ,
' uploader ' : ' FlyingKitty ' ,
' age_limit ' : 18 ,
} ,
} ,
{
' note ' : ' Age-gate video embedable only with clientScreen=EMBED ' ,
' url ' : ' https://youtube.com/watch?v=Tq92D6wQ1mg ' ,
' info_dict ' : {
' id ' : ' Tq92D6wQ1mg ' ,
' title ' : ' [MMD] Adios - EVERGLOW [+Motion DL] ' ,
2021-07-30 21:43:26 +02:00
' ext ' : ' mp4 ' ,
' upload_date ' : ' 20191227 ' ,
2021-07-27 16:03:59 +02:00
' uploader_id ' : ' UC1yoRdFoFJaCY-AGfD9W0wQ ' ,
' uploader ' : ' Projekt Melody ' ,
' description ' : ' md5:17eccca93a786d51bc67646756894066 ' ,
' age_limit ' : 18 ,
} ,
} ,
{
' note ' : ' Non-Agegated non-embeddable video ' ,
' url ' : ' https://youtube.com/watch?v=MeJVWBSsPAY ' ,
' info_dict ' : {
' id ' : ' MeJVWBSsPAY ' ,
' ext ' : ' mp4 ' ,
' title ' : ' OOMPH! - Such Mich Find Mich (Lyrics) ' ,
' uploader ' : ' Herr Lurik ' ,
' uploader_id ' : ' st3in234 ' ,
' description ' : ' Fan Video. Music & Lyrics by OOMPH!. ' ,
' upload_date ' : ' 20130730 ' ,
} ,
} ,
{
' note ' : ' Non-bypassable age-gated video ' ,
' url ' : ' https://youtube.com/watch?v=Cr381pDsSsA ' ,
' only_matching ' : True ,
} ,
2020-11-19 20:22:59 +01:00
# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
# YouTube Red ad is not captured for creator
{
' url ' : ' __2ABJjxzNo ' ,
' info_dict ' : {
' id ' : ' __2ABJjxzNo ' ,
' ext ' : ' mp4 ' ,
' duration ' : 266 ,
' upload_date ' : ' 20100430 ' ,
' uploader_id ' : ' deadmau5 ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/deadmau5 ' ,
2021-02-04 15:37:17 +01:00
' creator ' : ' deadmau5 ' ,
' description ' : ' md5:6cbcd3a92ce1bc676fc4d6ab4ace2336 ' ,
2020-11-19 20:22:59 +01:00
' uploader ' : ' deadmau5 ' ,
' title ' : ' Deadmau5 - Some Chords (HD) ' ,
2021-02-04 15:37:17 +01:00
' alt_title ' : ' Some Chords ' ,
2020-11-19 20:22:59 +01:00
} ,
' expected_warnings ' : [
' DASH manifest missing ' ,
]
} ,
2019-03-09 13:14:41 +01:00
# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
2014-12-11 16:28:07 +01:00
{
' url ' : ' lqQg6PlCWgI ' ,
' info_dict ' : {
' id ' : ' lqQg6PlCWgI ' ,
' ext ' : ' mp4 ' ,
2017-01-26 15:43:14 +01:00
' duration ' : 6085 ,
2015-11-23 16:37:21 +01:00
' upload_date ' : ' 20150827 ' ,
2014-12-11 16:34:37 +01:00
' uploader_id ' : ' olympic ' ,
2017-01-02 13:08:07 +01:00
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/olympic ' ,
2014-12-11 16:34:37 +01:00
' description ' : ' HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games ' ,
2021-07-21 05:52:34 +02:00
' uploader ' : ' Olympics ' ,
2014-12-11 16:34:37 +01:00
' title ' : ' Hockey - Women - GER-AUS - London 2012 Olympic Games ' ,
} ,
' params ' : {
' skip_download ' : ' requires avconv ' ,
2014-12-11 16:28:07 +01:00
}
2014-12-11 16:34:37 +01:00
} ,
2015-01-10 05:45:51 +01:00
# Non-square pixels
{
' url ' : ' https://www.youtube.com/watch?v=_b-2C3KPAM0 ' ,
' info_dict ' : {
' id ' : ' _b-2C3KPAM0 ' ,
' ext ' : ' mp4 ' ,
' stretched_ratio ' : 16 / 9. ,
2017-01-26 15:43:14 +01:00
' duration ' : 85 ,
2015-01-10 05:45:51 +01:00
' upload_date ' : ' 20110310 ' ,
' uploader_id ' : ' AllenMeow ' ,
2017-01-02 13:08:07 +01:00
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/AllenMeow ' ,
2015-01-10 05:45:51 +01:00
' description ' : ' made by Wacom from Korea | 字幕&加油添醋 by TY \' s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯 ' ,
2018-06-02 21:23:45 +02:00
' uploader ' : ' 孫ᄋᄅ ' ,
2015-01-10 05:45:51 +01:00
' title ' : ' [A-made] 變態妍字幕版 太妍 我就是這樣的人 ' ,
} ,
2015-04-05 20:35:55 +02:00
} ,
# url_encoded_fmt_stream_map is empty string
{
' url ' : ' qEJwOuvDf7I ' ,
' info_dict ' : {
' id ' : ' qEJwOuvDf7I ' ,
2015-08-12 17:27:58 +02:00
' ext ' : ' webm ' ,
2015-04-05 20:35:55 +02:00
' title ' : ' Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге ' ,
' description ' : ' ' ,
' upload_date ' : ' 20150404 ' ,
' uploader_id ' : ' spbelect ' ,
' uploader ' : ' Наблюдатели Петербурга ' ,
} ,
' params ' : {
' skip_download ' : ' requires avconv ' ,
2016-01-19 15:56:04 +01:00
} ,
' skip ' : ' This live event has ended. ' ,
2015-04-05 20:35:55 +02:00
} ,
2019-03-09 13:14:41 +01:00
# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
2015-06-27 10:55:46 +02:00
{
' url ' : ' https://www.youtube.com/watch?v=FIl7x6_3R5Y ' ,
' info_dict ' : {
' id ' : ' FIl7x6_3R5Y ' ,
2018-06-02 21:23:45 +02:00
' ext ' : ' webm ' ,
2015-06-27 10:55:46 +02:00
' title ' : ' md5:7b81415841e02ecd4313668cde88737a ' ,
' description ' : ' md5:116377fd2963b81ec4ce64b542173306 ' ,
2017-01-26 15:43:14 +01:00
' duration ' : 220 ,
2015-06-27 10:55:46 +02:00
' upload_date ' : ' 20150625 ' ,
' uploader_id ' : ' dorappi2000 ' ,
2017-01-02 13:08:07 +01:00
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/dorappi2000 ' ,
2015-06-27 10:55:46 +02:00
' uploader ' : ' dorappi2000 ' ,
2018-06-02 21:23:45 +02:00
' formats ' : ' mincount:31 ' ,
2015-06-27 10:55:46 +02:00
} ,
2018-06-02 21:23:45 +02:00
' skip ' : ' not actual anymore ' ,
2015-07-20 19:34:24 +02:00
} ,
2015-06-10 08:47:02 +02:00
# DASH manifest with segment_list
{
' url ' : ' https://www.youtube.com/embed/CsmdDsKjzN8 ' ,
' md5 ' : ' 8ce563a1d667b599d21064e982ab9e31 ' ,
' info_dict ' : {
' id ' : ' CsmdDsKjzN8 ' ,
' ext ' : ' mp4 ' ,
2015-07-20 19:48:50 +02:00
' upload_date ' : ' 20150501 ' , # According to '<meta itemprop="datePublished"', but in other places it's 20150510
2015-06-10 08:47:02 +02:00
' uploader ' : ' Airtek ' ,
' description ' : ' Retransmisión en directo de la XVIII media maratón de Zaragoza. ' ,
' uploader_id ' : ' UCzTzUmjXxxacNnL8I3m4LnQ ' ,
' title ' : ' Retransmisión XVIII Media maratón Zaragoza 2015 ' ,
} ,
' params ' : {
' youtube_include_dash_manifest ' : True ,
' format ' : ' 135 ' , # bestvideo
2016-06-24 17:47:19 +02:00
} ,
' skip ' : ' This live event has ended. ' ,
2015-07-20 19:34:24 +02:00
} ,
2015-07-25 17:30:34 +02:00
{
# Multifeed videos (multiple cameras), URL is for Main Camera
2021-02-04 15:37:17 +01:00
' url ' : ' https://www.youtube.com/watch?v=jvGDaLqkpTg ' ,
2015-07-25 17:30:34 +02:00
' info_dict ' : {
2021-02-04 15:37:17 +01:00
' id ' : ' jvGDaLqkpTg ' ,
' title ' : ' Tom Clancy Free Weekend Rainbow Whatever ' ,
' description ' : ' md5:e03b909557865076822aa169218d6a5d ' ,
2015-07-25 17:30:34 +02:00
} ,
' playlist ' : [ {
' info_dict ' : {
2021-02-04 15:37:17 +01:00
' id ' : ' jvGDaLqkpTg ' ,
2015-07-25 17:30:34 +02:00
' ext ' : ' mp4 ' ,
2021-02-04 15:37:17 +01:00
' title ' : ' Tom Clancy Free Weekend Rainbow Whatever (Main Camera) ' ,
' description ' : ' md5:e03b909557865076822aa169218d6a5d ' ,
' duration ' : 10643 ,
' upload_date ' : ' 20161111 ' ,
' uploader ' : ' Team PGP ' ,
' uploader_id ' : ' UChORY56LMMETTuGjXaJXvLg ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/channel/UChORY56LMMETTuGjXaJXvLg ' ,
2015-07-25 17:30:34 +02:00
} ,
} , {
' info_dict ' : {
2021-02-04 15:37:17 +01:00
' id ' : ' 3AKt1R1aDnw ' ,
2015-07-25 17:30:34 +02:00
' ext ' : ' mp4 ' ,
2021-02-04 15:37:17 +01:00
' title ' : ' Tom Clancy Free Weekend Rainbow Whatever (Camera 2) ' ,
' description ' : ' md5:e03b909557865076822aa169218d6a5d ' ,
' duration ' : 10991 ,
' upload_date ' : ' 20161111 ' ,
' uploader ' : ' Team PGP ' ,
' uploader_id ' : ' UChORY56LMMETTuGjXaJXvLg ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/channel/UChORY56LMMETTuGjXaJXvLg ' ,
2015-07-25 17:30:34 +02:00
} ,
} , {
' info_dict ' : {
2021-02-04 15:37:17 +01:00
' id ' : ' RtAMM00gpVc ' ,
2015-07-25 17:30:34 +02:00
' ext ' : ' mp4 ' ,
2021-02-04 15:37:17 +01:00
' title ' : ' Tom Clancy Free Weekend Rainbow Whatever (Camera 3) ' ,
' description ' : ' md5:e03b909557865076822aa169218d6a5d ' ,
' duration ' : 10995 ,
' upload_date ' : ' 20161111 ' ,
' uploader ' : ' Team PGP ' ,
' uploader_id ' : ' UChORY56LMMETTuGjXaJXvLg ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/channel/UChORY56LMMETTuGjXaJXvLg ' ,
2015-07-25 17:30:34 +02:00
} ,
} , {
' info_dict ' : {
2021-02-04 15:37:17 +01:00
' id ' : ' 6N2fdlP3C5U ' ,
2015-07-25 17:30:34 +02:00
' ext ' : ' mp4 ' ,
2021-02-04 15:37:17 +01:00
' title ' : ' Tom Clancy Free Weekend Rainbow Whatever (Camera 4) ' ,
' description ' : ' md5:e03b909557865076822aa169218d6a5d ' ,
' duration ' : 10990 ,
' upload_date ' : ' 20161111 ' ,
' uploader ' : ' Team PGP ' ,
' uploader_id ' : ' UChORY56LMMETTuGjXaJXvLg ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/channel/UChORY56LMMETTuGjXaJXvLg ' ,
2015-07-25 17:30:34 +02:00
} ,
} ] ,
' params ' : {
' skip_download ' : True ,
} ,
2021-07-27 16:03:59 +02:00
' skip ' : ' Not multifeed anymore ' ,
2015-08-16 22:04:13 +02:00
} ,
2016-02-13 00:18:58 +01:00
{
2019-03-09 13:14:41 +01:00
# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
2016-02-13 00:18:58 +01:00
' url ' : ' https://www.youtube.com/watch?v=gVfLd0zydlo ' ,
' info_dict ' : {
' id ' : ' gVfLd0zydlo ' ,
' title ' : ' DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30 ' ,
} ,
' playlist_count ' : 2 ,
2016-06-24 17:47:19 +02:00
' skip ' : ' Not multifeed anymore ' ,
2016-02-13 00:18:58 +01:00
} ,
2015-08-16 22:04:13 +02:00
{
2016-09-17 16:48:20 +02:00
' url ' : ' https://vid.plus/FlRa-iH7PGw ' ,
2015-08-16 22:04:13 +02:00
' only_matching ' : True ,
2015-11-22 13:49:33 +01:00
} ,
2016-04-03 22:26:20 +02:00
{
2016-09-17 16:48:20 +02:00
' url ' : ' https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html ' ,
2016-04-03 22:26:20 +02:00
' only_matching ' : True ,
} ,
2015-11-22 13:49:33 +01:00
{
2019-03-09 13:14:41 +01:00
# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
2016-01-18 18:19:38 +01:00
# Also tests cut-off URL expansion in video description (see
2019-03-09 13:14:41 +01:00
# https://github.com/ytdl-org/youtube-dl/issues/1892,
# https://github.com/ytdl-org/youtube-dl/issues/8164)
2015-11-22 13:49:33 +01:00
' url ' : ' https://www.youtube.com/watch?v=lsguqyKfVQg ' ,
' info_dict ' : {
' id ' : ' lsguqyKfVQg ' ,
' ext ' : ' mp4 ' ,
' title ' : ' { dark walk}; Loki/AC/Dishonored; collab w/Elflover21 ' ,
2021-07-21 05:52:34 +02:00
' alt_title ' : ' Dark Walk ' ,
2015-11-22 13:49:33 +01:00
' description ' : ' md5:8085699c11dc3f597ce0410b0dcbb34a ' ,
2017-01-26 15:43:14 +01:00
' duration ' : 133 ,
2015-11-22 13:49:33 +01:00
' upload_date ' : ' 20151119 ' ,
' uploader_id ' : ' IronSoulElf ' ,
2017-01-02 13:08:07 +01:00
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/IronSoulElf ' ,
2015-11-22 13:49:33 +01:00
' uploader ' : ' IronSoulElf ' ,
2021-07-21 05:52:34 +02:00
' creator ' : ' Todd Haberman; \n Daniel Law Heath and Aaron Kaplan ' ,
' track ' : ' Dark Walk ' ,
' artist ' : ' Todd Haberman; \n Daniel Law Heath and Aaron Kaplan ' ,
2019-04-28 18:37:46 +02:00
' album ' : ' Position Music - Production Music Vol. 143 - Dark Walk ' ,
2015-11-22 13:49:33 +01:00
} ,
' params ' : {
' skip_download ' : True ,
} ,
} ,
2015-11-23 16:02:37 +01:00
{
2019-03-09 13:14:41 +01:00
# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
2015-11-23 16:02:37 +01:00
' url ' : ' https://www.youtube.com/watch?v=Ms7iBXnlUO8 ' ,
' only_matching ' : True ,
} ,
2015-11-28 01:07:07 +01:00
{
# Video with yt:stretch=17:0
' url ' : ' https://www.youtube.com/watch?v=Q39EVAstoRM ' ,
' info_dict ' : {
' id ' : ' Q39EVAstoRM ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Clash Of Clans#14 Dicas De Ataque Para CV 4 ' ,
' description ' : ' md5:ee18a25c350637c8faff806845bddee9 ' ,
' upload_date ' : ' 20151107 ' ,
' uploader_id ' : ' UCCr7TALkRbo3EtFzETQF1LA ' ,
' uploader ' : ' CH GAMER DROID ' ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
2016-06-24 17:47:19 +02:00
' skip ' : ' This video does not exist. ' ,
2015-11-28 01:07:07 +01:00
} ,
2021-04-17 05:02:33 +02:00
{
# Video with incomplete 'yt:stretch=16:'
' url ' : ' https://www.youtube.com/watch?v=FRhJzUSJbGI ' ,
' only_matching ' : True ,
} ,
2016-03-02 18:07:25 +01:00
{
# Video licensed under Creative Commons
' url ' : ' https://www.youtube.com/watch?v=M4gD1WSo5mA ' ,
' info_dict ' : {
' id ' : ' M4gD1WSo5mA ' ,
' ext ' : ' mp4 ' ,
' title ' : ' md5:e41008789470fc2533a3252216f1c1d1 ' ,
' description ' : ' md5:a677553cf0840649b731a3024aeff4cc ' ,
2017-01-26 15:43:14 +01:00
' duration ' : 721 ,
2016-03-02 18:07:25 +01:00
' upload_date ' : ' 20150127 ' ,
' uploader_id ' : ' BerkmanCenter ' ,
2017-01-02 13:08:07 +01:00
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/BerkmanCenter ' ,
2017-01-26 15:43:14 +01:00
' uploader ' : ' The Berkman Klein Center for Internet & Society ' ,
2016-03-02 18:07:25 +01:00
' license ' : ' Creative Commons Attribution license (reuse allowed) ' ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
} ,
2016-03-02 18:49:10 +01:00
{
# Channel-like uploader_url
' url ' : ' https://www.youtube.com/watch?v=eQcmzGIKrzg ' ,
' info_dict ' : {
' id ' : ' eQcmzGIKrzg ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Democratic Socialism and Foreign Policy | Bernie Sanders ' ,
2021-02-04 15:37:17 +01:00
' description ' : ' md5:13a2503d7b5904ef4b223aa101628f39 ' ,
2017-01-26 15:43:14 +01:00
' duration ' : 4060 ,
2016-03-02 18:49:10 +01:00
' upload_date ' : ' 20151119 ' ,
2018-06-02 21:23:45 +02:00
' uploader ' : ' Bernie Sanders ' ,
2016-03-02 18:49:10 +01:00
' uploader_id ' : ' UCH1dpzjCEiGAt8CXkryhkZg ' ,
2017-01-02 13:08:07 +01:00
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/channel/UCH1dpzjCEiGAt8CXkryhkZg ' ,
2016-03-02 18:49:10 +01:00
' license ' : ' Creative Commons Attribution license (reuse allowed) ' ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
} ,
2015-11-29 16:01:59 +01:00
{
' url ' : ' https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY ' ,
' only_matching ' : True ,
2016-07-11 21:10:35 +02:00
} ,
{
2019-03-09 13:14:41 +01:00
# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
2016-07-11 21:10:35 +02:00
' url ' : ' https://www.youtube.com/watch?v=i1Ko8UG-Tdo ' ,
' only_matching ' : True ,
2016-09-02 20:17:15 +02:00
} ,
{
# Rental video preview
' url ' : ' https://www.youtube.com/watch?v=yYr8q0y5Jfg ' ,
' info_dict ' : {
' id ' : ' uGpuVWrhIzE ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Piku - Trailer ' ,
' description ' : ' md5:c36bd60c3fd6f1954086c083c72092eb ' ,
' upload_date ' : ' 20150811 ' ,
' uploader ' : ' FlixMatrix ' ,
' uploader_id ' : ' FlixMatrixKaravan ' ,
2017-01-02 13:08:07 +01:00
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/FlixMatrixKaravan ' ,
2016-09-02 20:17:15 +02:00
' license ' : ' Standard YouTube License ' ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
2018-06-02 21:23:45 +02:00
' skip ' : ' This video is not available. ' ,
2017-01-09 16:30:46 +01:00
} ,
2017-01-21 12:10:32 +01:00
{
# YouTube Red video with episode data
' url ' : ' https://www.youtube.com/watch?v=iqKdEhx-dD4 ' ,
' info_dict ' : {
' id ' : ' iqKdEhx-dD4 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Isolation - Mind Field (Ep 1) ' ,
2021-02-04 15:37:17 +01:00
' description ' : ' md5:f540112edec5d09fc8cc752d3d4ba3cd ' ,
2017-01-26 15:43:14 +01:00
' duration ' : 2085 ,
2017-01-21 12:10:32 +01:00
' upload_date ' : ' 20170118 ' ,
' uploader ' : ' Vsauce ' ,
' uploader_id ' : ' Vsauce ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/Vsauce ' ,
' series ' : ' Mind Field ' ,
' season_number ' : 1 ,
' episode_number ' : 1 ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
' expected_warnings ' : [
' Skipping DASH manifest ' ,
] ,
} ,
2017-08-26 10:38:38 +02:00
{
# The following content has been identified by the YouTube community
# as inappropriate or offensive to some audiences.
' url ' : ' https://www.youtube.com/watch?v=6SJNVb0GnPI ' ,
' info_dict ' : {
' id ' : ' 6SJNVb0GnPI ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Race Differences in Intelligence ' ,
' description ' : ' md5:5d161533167390427a1f8ee89a1fc6f1 ' ,
' duration ' : 965 ,
' upload_date ' : ' 20140124 ' ,
' uploader ' : ' New Century Foundation ' ,
' uploader_id ' : ' UCEJYpZGqgUob0zVVEaLhvVg ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/channel/UCEJYpZGqgUob0zVVEaLhvVg ' ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
2021-02-04 15:37:17 +01:00
' skip ' : ' This video has been removed for violating YouTube \' s policy on hate speech. ' ,
2017-08-26 10:38:38 +02:00
} ,
2017-01-09 16:30:46 +01:00
{
# itag 212
' url ' : ' 1t24XAntNCY ' ,
' only_matching ' : True ,
2017-02-26 10:51:21 +01:00
} ,
{
# geo restricted to JP
' url ' : ' sJL6WA-aGkQ ' ,
' only_matching ' : True ,
} ,
2018-09-23 19:14:49 +02:00
{
' url ' : ' https://invidio.us/watch?v=BaW_jenozKc ' ,
' only_matching ' : True ,
} ,
2021-02-19 21:44:36 +01:00
{
' url ' : ' https://redirect.invidious.io/watch?v=BaW_jenozKc ' ,
' only_matching ' : True ,
} ,
{
# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
' url ' : ' https://redirect.invidious.io/Yh0AhrY9GjA ' ,
' only_matching ' : True ,
} ,
2018-12-26 09:30:48 +01:00
{
# DRM protected
' url ' : ' https://www.youtube.com/watch?v=s7_qI6_mIXc ' ,
' only_matching ' : True ,
2019-01-15 20:18:27 +01:00
} ,
{
# Video with unsupported adaptive stream type formats
' url ' : ' https://www.youtube.com/watch?v=Z4Vy8R84T1U ' ,
' info_dict ' : {
' id ' : ' Z4Vy8R84T1U ' ,
' ext ' : ' mp4 ' ,
' title ' : ' saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta ' ,
' description ' : ' md5:d41d8cd98f00b204e9800998ecf8427e ' ,
' duration ' : 433 ,
' upload_date ' : ' 20130923 ' ,
' uploader ' : ' Amelia Putri Harwita ' ,
' uploader_id ' : ' UCpOxM49HJxmC1qCalXyB3_Q ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/channel/UCpOxM49HJxmC1qCalXyB3_Q ' ,
' formats ' : ' maxcount:10 ' ,
} ,
' params ' : {
' skip_download ' : True ,
' youtube_include_dash_manifest ' : False ,
} ,
2020-03-05 18:05:50 +01:00
' skip ' : ' not actual anymore ' ,
2019-04-22 06:26:48 +02:00
} ,
{
2019-04-27 10:16:17 +02:00
# Youtube Music Auto-generated description
2019-04-22 06:26:48 +02:00
' url ' : ' https://music.youtube.com/watch?v=MgNrAu2pzNs ' ,
' info_dict ' : {
' id ' : ' MgNrAu2pzNs ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Voyeur Girl ' ,
' description ' : ' md5:7ae382a65843d6df2685993e90a8628f ' ,
' upload_date ' : ' 20190312 ' ,
2020-03-05 18:05:50 +01:00
' uploader ' : ' Stephen - Topic ' ,
' uploader_id ' : ' UC-pWHpBjdGG69N9mM2auIAA ' ,
2019-04-22 06:26:48 +02:00
' artist ' : ' Stephen ' ,
' track ' : ' Voyeur Girl ' ,
' album ' : ' it \' s too much love to know my dear ' ,
' release_date ' : ' 20190313 ' ,
' release_year ' : 2019 ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
} ,
2019-11-30 17:51:34 +01:00
{
' url ' : ' https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q ' ,
' only_matching ' : True ,
} ,
2020-04-30 19:40:38 +02:00
{
# invalid -> valid video id redirection
' url ' : ' DJztXj2GPfl ' ,
' info_dict ' : {
' id ' : ' DJztXj2GPfk ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack) ' ,
' description ' : ' md5:bf577a41da97918e94fa9798d9228825 ' ,
' upload_date ' : ' 20090125 ' ,
' uploader ' : ' Prochorowka ' ,
' uploader_id ' : ' Prochorowka ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/Prochorowka ' ,
' artist ' : ' Panjabi MC ' ,
' track ' : ' Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix ' ,
' album ' : ' Beware of the Boys (Mundian To Bach Ke) ' ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
2021-02-04 15:37:17 +01:00
' skip ' : ' Video unavailable ' ,
2020-09-13 16:23:21 +02:00
} ,
{
# empty description results in an empty string
' url ' : ' https://www.youtube.com/watch?v=x41yOUIvK2k ' ,
' info_dict ' : {
' id ' : ' x41yOUIvK2k ' ,
' ext ' : ' mp4 ' ,
' title ' : ' IMG 3456 ' ,
' description ' : ' ' ,
' upload_date ' : ' 20170613 ' ,
' uploader_id ' : ' ElevageOrVert ' ,
' uploader ' : ' ElevageOrVert ' ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
} ,
2020-11-21 15:50:42 +01:00
{
2021-01-01 13:26:37 +01:00
# with '};' inside yt initial data (see [1])
# see [2] for an example with '};' inside ytInitialPlayerResponse
# 1. https://github.com/ytdl-org/youtube-dl/issues/27093
# 2. https://github.com/ytdl-org/youtube-dl/issues/27216
2020-11-21 15:50:42 +01:00
' url ' : ' https://www.youtube.com/watch?v=CHqg6qOn4no ' ,
' info_dict ' : {
' id ' : ' CHqg6qOn4no ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Part 77 Sort a list of simple types in c# ' ,
' description ' : ' md5:b8746fa52e10cdbf47997903f13b20dc ' ,
' upload_date ' : ' 20130831 ' ,
' uploader_id ' : ' kudvenkat ' ,
' uploader ' : ' kudvenkat ' ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
} ,
2021-01-01 13:26:37 +01:00
{
# another example of '};' in ytInitialData
' url ' : ' https://www.youtube.com/watch?v=gVfgbahppCY ' ,
' only_matching ' : True ,
} ,
{
' url ' : ' https://www.youtube.com/watch_popup?v=63RmMXCd_bQ ' ,
' only_matching ' : True ,
} ,
2021-02-04 15:37:17 +01:00
{
2021-02-10 22:22:55 +01:00
# https://github.com/ytdl-org/youtube-dl/pull/28094
' url ' : ' OtqTfy26tG0 ' ,
' info_dict ' : {
' id ' : ' OtqTfy26tG0 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Burn Out ' ,
' description ' : ' md5:8d07b84dcbcbfb34bc12a56d968b6131 ' ,
' upload_date ' : ' 20141120 ' ,
' uploader ' : ' The Cinematic Orchestra - Topic ' ,
' uploader_id ' : ' UCIzsJBIyo8hhpFm1NK0uLgw ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/channel/UCIzsJBIyo8hhpFm1NK0uLgw ' ,
' artist ' : ' The Cinematic Orchestra ' ,
' track ' : ' Burn Out ' ,
' album ' : ' Every Day ' ,
' release_data ' : None ,
' release_year ' : None ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
2021-02-04 15:37:17 +01:00
} ,
2021-02-19 21:44:36 +01:00
{
# controversial video, only works with bpctr when authenticated with cookies
' url ' : ' https://www.youtube.com/watch?v=nGC3D_FkCmg ' ,
' only_matching ' : True ,
} ,
2021-07-20 20:01:28 +02:00
{
# controversial video, requires bpctr/contentCheckOk
' url ' : ' https://www.youtube.com/watch?v=SZJvDhaSDnc ' ,
' info_dict ' : {
' id ' : ' SZJvDhaSDnc ' ,
' ext ' : ' mp4 ' ,
' title ' : ' San Diego teen commits suicide after bullying over embarrassing video ' ,
' channel_id ' : ' UC-SJ6nODDmufqBzPBwCvYvQ ' ,
' uploader ' : ' CBS This Morning ' ,
2021-07-21 05:52:34 +02:00
' uploader_id ' : ' CBSThisMorning ' ,
2021-07-20 20:01:28 +02:00
' upload_date ' : ' 20140716 ' ,
' description ' : ' md5:acde3a73d3f133fc97e837a9f76b53b7 '
}
} ,
2021-04-10 18:47:11 +02:00
{
# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
' url ' : ' cBvYw8_A0vQ ' ,
' info_dict ' : {
' id ' : ' cBvYw8_A0vQ ' ,
' ext ' : ' mp4 ' ,
' title ' : ' 4K Ueno Okachimachi Street Scenes 上野御徒町歩き ' ,
' description ' : ' md5:ea770e474b7cd6722b4c95b833c03630 ' ,
' upload_date ' : ' 20201120 ' ,
' uploader ' : ' Walk around Japan ' ,
' uploader_id ' : ' UC3o_t8PzBmXf5S9b7GLx1Mw ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw ' ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
2021-05-14 09:01:53 +02:00
} , {
# Has multiple audio streams
' url ' : ' WaOKSUlf4TM ' ,
' only_matching ' : True
2021-05-15 16:38:47 +02:00
} , {
# Requires Premium: has format 141 when requested using YTM url
' url ' : ' https://music.youtube.com/watch?v=XclachpHxis ' ,
' only_matching ' : True
} , {
2021-05-12 21:20:02 +02:00
# multiple subtitles with same lang_code
' url ' : ' https://www.youtube.com/watch?v=wsQiKKfKxug ' ,
' only_matching ' : True ,
2021-06-30 00:07:49 +02:00
} , {
# Force use android client fallback
' url ' : ' https://www.youtube.com/watch?v=YOelRv7fMxY ' ,
' info_dict ' : {
' id ' : ' YOelRv7fMxY ' ,
2021-07-21 05:52:34 +02:00
' title ' : ' DIGGING A SECRET TUNNEL Part 1 ' ,
2021-06-30 00:07:49 +02:00
' ext ' : ' 3gp ' ,
' upload_date ' : ' 20210624 ' ,
' channel_id ' : ' UCp68_FLety0O-n9QU6phsgw ' ,
' uploader ' : ' colinfurze ' ,
2021-07-21 05:52:34 +02:00
' uploader_id ' : ' colinfurze ' ,
2021-06-30 00:07:49 +02:00
' channel_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/channel/UCp68_FLety0O-n9QU6phsgw ' ,
2021-07-21 05:52:34 +02:00
' description ' : ' md5:b5096f56af7ccd7a555c84db81738b22 '
2021-06-30 00:07:49 +02:00
} ,
' params ' : {
' format ' : ' 17 ' , # 3gp format available on android
' extractor_args ' : { ' youtube ' : { ' player_client ' : [ ' android ' ] } } ,
} ,
2021-05-12 21:20:02 +02:00
} ,
2021-06-30 00:07:49 +02:00
{
# Skip download of additional client configs (remix client config in this case)
' url ' : ' https://music.youtube.com/watch?v=MgNrAu2pzNs ' ,
' only_matching ' : True ,
' params ' : {
' extractor_args ' : { ' youtube ' : { ' player_skip ' : [ ' configs ' ] } } ,
} ,
2021-08-22 17:04:59 +02:00
} , {
# shorts
' url ' : ' https://www.youtube.com/shorts/BGQWPY4IigY ' ,
' only_matching ' : True ,
2021-11-19 01:06:28 +01:00
} , {
' note ' : ' Storyboards ' ,
' url ' : ' https://www.youtube.com/watch?v=5KLPxDtMqe8 ' ,
' info_dict ' : {
' id ' : ' 5KLPxDtMqe8 ' ,
' ext ' : ' mhtml ' ,
' format_id ' : ' sb0 ' ,
' title ' : ' Your Brain is Plastic ' ,
' uploader_id ' : ' scishow ' ,
' description ' : ' md5:89cd86034bdb5466cd87c6ba206cd2bc ' ,
' upload_date ' : ' 20140324 ' ,
' uploader ' : ' SciShow ' ,
} , ' params ' : { ' format ' : ' mhtml ' , ' skip_download ' : True }
}
2013-06-27 19:13:11 +02:00
]
2021-04-17 05:02:33 +02:00
@classmethod
def suitable ( cls , url ) :
2021-08-22 21:02:00 +02:00
from . . utils import parse_qs
2021-04-17 05:02:33 +02:00
qs = parse_qs ( url )
if qs . get ( ' list ' , [ None ] ) [ 0 ] :
return False
return super ( YoutubeIE , cls ) . suitable ( url )
2013-09-21 14:19:30 +02:00
def __init__ ( self , * args , * * kwargs ) :
super ( YoutubeIE , self ) . __init__ ( * args , * * kwargs )
2021-02-04 15:37:17 +01:00
self . _code_cache = { }
2013-09-21 15:19:48 +02:00
self . _player_cache = { }
2013-09-21 14:19:30 +02:00
2021-09-06 09:26:41 +02:00
def _extract_player_url ( self , * ytcfgs , webpage = None ) :
player_url = traverse_obj (
ytcfgs , ( . . . , ' PLAYER_JS_URL ' ) , ( . . . , ' WEB_PLAYER_CONTEXT_CONFIGS ' , . . . , ' jsUrl ' ) ,
get_all = False , expected_type = compat_str )
2021-07-21 05:52:34 +02:00
if not player_url :
2021-09-06 09:26:41 +02:00
return
2021-06-30 00:07:49 +02:00
if player_url . startswith ( ' // ' ) :
player_url = ' https: ' + player_url
elif not re . match ( r ' https?:// ' , player_url ) :
player_url = compat_urlparse . urljoin (
' https://www.youtube.com ' , player_url )
return player_url
2021-09-06 09:26:41 +02:00
def _download_player_url ( self , video_id , fatal = False ) :
res = self . _download_webpage (
' https://www.youtube.com/iframe_api ' ,
note = ' Downloading iframe API JS ' , video_id = video_id , fatal = fatal )
if res :
player_version = self . _search_regex (
r ' player \\ ?/([0-9a-fA-F] {8} ) \\ ?/ ' , res , ' player version ' , fatal = fatal )
if player_version :
return f ' https://www.youtube.com/s/player/ { player_version } /player_ias.vflset/en_US/base.js '
2014-08-02 12:21:53 +02:00
def _signature_cache_id ( self , example_sig ) :
""" Return a string representation of a signature """
2014-09-13 07:51:06 +02:00
return ' . ' . join ( compat_str ( len ( part ) ) for part in example_sig . split ( ' . ' ) )
2014-08-02 12:21:53 +02:00
2020-05-02 02:18:08 +02:00
@classmethod
def _extract_player_info ( cls , player_url ) :
for player_re in cls . _PLAYER_INFO_RE :
id_m = re . search ( player_re , player_url )
if id_m :
break
else :
2014-07-23 02:19:33 +02:00
raise ExtractorError ( ' Cannot identify player %r ' % player_url )
2021-02-04 15:37:17 +01:00
return id_m . group ( ' id ' )
2020-05-02 02:18:08 +02:00
2021-10-31 05:23:58 +01:00
def _load_player ( self , video_id , player_url , fatal = True ) :
2021-06-30 00:07:49 +02:00
player_id = self . _extract_player_info ( player_url )
if player_id not in self . _code_cache :
2021-10-03 23:14:55 +02:00
code = self . _download_webpage (
2021-06-30 00:07:49 +02:00
player_url , video_id , fatal = fatal ,
note = ' Downloading player ' + player_id ,
errnote = ' Download of %s failed ' % player_url )
2021-10-03 23:14:55 +02:00
if code :
self . _code_cache [ player_id ] = code
2021-10-31 05:23:58 +01:00
return self . _code_cache . get ( player_id )
2021-06-30 00:07:49 +02:00
2020-05-02 02:18:08 +02:00
def _extract_signature_function ( self , video_id , player_url , example_sig ) :
2021-02-04 15:37:17 +01:00
player_id = self . _extract_player_info ( player_url )
2013-09-21 14:19:30 +02:00
2013-09-22 00:35:03 +02:00
# Read from filesystem cache
2021-02-04 15:37:17 +01:00
func_id = ' js_ %s _ %s ' % (
player_id , self . _signature_cache_id ( example_sig ) )
2013-09-22 00:35:03 +02:00
assert os . path . basename ( func_id ) == func_id
2014-09-03 12:41:05 +02:00
2014-09-24 09:51:45 +02:00
cache_spec = self . _downloader . cache . load ( ' youtube-sigfuncs ' , func_id )
2014-09-03 12:41:05 +02:00
if cache_spec is not None :
2014-09-13 07:51:06 +02:00
return lambda s : ' ' . join ( s [ i ] for i in cache_spec )
2013-09-21 15:19:48 +02:00
2021-10-31 05:23:58 +01:00
code = self . _load_player ( video_id , player_url )
if code :
2021-06-30 00:07:49 +02:00
res = self . _parse_sig_js ( code )
2013-09-21 14:19:30 +02:00
2021-06-30 00:07:49 +02:00
test_string = ' ' . join ( map ( compat_chr , range ( len ( example_sig ) ) ) )
cache_res = res ( test_string )
cache_spec = [ ord ( c ) for c in cache_res ]
2013-09-21 15:19:48 +02:00
2021-06-30 00:07:49 +02:00
self . _downloader . cache . store ( ' youtube-sigfuncs ' , func_id , cache_spec )
return res
2013-09-21 15:19:48 +02:00
2014-08-02 12:21:53 +02:00
def _print_sig_code ( self , func , example_sig ) :
2021-10-31 05:23:58 +01:00
if not self . get_param ( ' youtube_print_sig_code ' ) :
return
2013-09-22 10:30:02 +02:00
def gen_sig_code ( idxs ) :
def _genslice ( start , end , step ) :
2014-09-13 07:51:06 +02:00
starts = ' ' if start == 0 else str ( start )
2014-11-23 21:20:46 +01:00
ends = ( ' : %d ' % ( end + step ) ) if end + step > = 0 else ' : '
2014-09-24 09:51:45 +02:00
steps = ' ' if step == 1 else ( ' : %d ' % step )
2014-09-13 07:51:06 +02:00
return ' s[ %s %s %s ] ' % ( starts , ends , steps )
2013-09-22 10:30:02 +02:00
step = None
2014-12-17 00:06:41 +01:00
# Quelch pyflakes warnings - start will be set when step is set
start = ' (Never used) '
2013-09-22 10:30:02 +02:00
for i , prev in zip ( idxs [ 1 : ] , idxs [ : - 1 ] ) :
if step is not None :
if i - prev == step :
continue
yield _genslice ( start , prev , step )
step = None
continue
if i - prev in [ - 1 , 1 ] :
step = i - prev
start = prev
continue
else :
2014-09-13 07:51:06 +02:00
yield ' s[ %d ] ' % prev
2013-09-22 10:30:02 +02:00
if step is None :
2014-09-13 07:51:06 +02:00
yield ' s[ %d ] ' % i
2013-09-22 10:30:02 +02:00
else :
yield _genslice ( start , i , step )
2014-09-13 07:51:06 +02:00
test_string = ' ' . join ( map ( compat_chr , range ( len ( example_sig ) ) ) )
2013-09-22 12:18:16 +02:00
cache_res = func ( test_string )
2013-09-22 10:30:02 +02:00
cache_spec = [ ord ( c ) for c in cache_res ]
2014-09-13 07:51:06 +02:00
expr_code = ' + ' . join ( gen_sig_code ( cache_spec ) )
2014-08-02 12:21:53 +02:00
signature_id_tuple = ' ( %s ) ' % (
' , ' . join ( compat_str ( len ( p ) ) for p in example_sig . split ( ' . ' ) ) )
2014-09-24 09:51:45 +02:00
code = ( ' if tuple(len(p) for p in s.split( \' . \' )) == %s : \n '
2014-09-13 07:51:06 +02:00
' return %s \n ' ) % ( signature_id_tuple , expr_code )
2014-09-24 09:51:45 +02:00
self . to_screen ( ' Extracted signature function: \n ' + code )
2013-09-22 10:30:02 +02:00
2013-09-21 14:19:30 +02:00
def _parse_sig_js ( self , jscode ) :
funcname = self . _search_regex (
2019-06-21 17:58:42 +02:00
( r ' \ b[cs] \ s*&& \ s*[adf] \ .set \ ([^,]+ \ s*, \ s*encodeURIComponent \ s* \ ( \ s*(?P<sig>[a-zA-Z0-9$]+) \ ( ' ,
r ' \ b[a-zA-Z0-9]+ \ s*&& \ s*[a-zA-Z0-9]+ \ .set \ ([^,]+ \ s*, \ s*encodeURIComponent \ s* \ ( \ s*(?P<sig>[a-zA-Z0-9$]+) \ ( ' ,
2021-08-08 05:54:37 +02:00
r ' \ bm=(?P<sig>[a-zA-Z0-9$] { 2,}) \ (decodeURIComponent \ (h \ .s \ ) \ ) ' ,
r ' \ bc&& \ (c=(?P<sig>[a-zA-Z0-9$] { 2,}) \ (decodeURIComponent \ (c \ ) \ ) ' ,
r ' (?: \ b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$] { 2,}) \ s*= \ s*function \ ( \ s*a \ s* \ ) \ s* { \ s*a \ s*= \ s*a \ .split \ ( \ s* " " \ s* \ );[a-zA-Z0-9$] {2} \ .[a-zA-Z0-9$] {2} \ (a, \ d+ \ ) ' ,
r ' (?: \ b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$] { 2,}) \ s*= \ s*function \ ( \ s*a \ s* \ ) \ s* { \ s*a \ s*= \ s*a \ .split \ ( \ s* " " \ s* \ ) ' ,
2019-06-21 21:22:07 +02:00
r ' (?P<sig>[a-zA-Z0-9$]+) \ s*= \ s*function \ ( \ s*a \ s* \ ) \ s* { \ s*a \ s*= \ s*a \ .split \ ( \ s* " " \ s* \ ) ' ,
2019-06-21 17:58:42 +02:00
# Obsolete patterns
r ' ([ " \' ])signature \ 1 \ s*, \ s*(?P<sig>[a-zA-Z0-9$]+) \ ( ' ,
2018-09-07 22:36:10 +02:00
r ' \ .sig \ | \ |(?P<sig>[a-zA-Z0-9$]+) \ ( ' ,
2019-06-21 17:58:42 +02:00
r ' yt \ .akamaized \ .net/ \ ) \ s* \ | \ | \ s*.*? \ s*[cs] \ s*&& \ s*[adf] \ .set \ ([^,]+ \ s*, \ s*(?:encodeURIComponent \ s* \ ()? \ s*(?P<sig>[a-zA-Z0-9$]+) \ ( ' ,
r ' \ b[cs] \ s*&& \ s*[adf] \ .set \ ([^,]+ \ s*, \ s*(?P<sig>[a-zA-Z0-9$]+) \ ( ' ,
r ' \ b[a-zA-Z0-9]+ \ s*&& \ s*[a-zA-Z0-9]+ \ .set \ ([^,]+ \ s*, \ s*(?P<sig>[a-zA-Z0-9$]+) \ ( ' ,
r ' \ bc \ s*&& \ s*a \ .set \ ([^,]+ \ s*, \ s* \ ([^)]* \ ) \ s* \ ( \ s*(?P<sig>[a-zA-Z0-9$]+) \ ( ' ,
r ' \ bc \ s*&& \ s*[a-zA-Z0-9]+ \ .set \ ([^,]+ \ s*, \ s* \ ([^)]* \ ) \ s* \ ( \ s*(?P<sig>[a-zA-Z0-9$]+) \ ( ' ,
r ' \ bc \ s*&& \ s*[a-zA-Z0-9]+ \ .set \ ([^,]+ \ s*, \ s* \ ([^)]* \ ) \ s* \ ( \ s*(?P<sig>[a-zA-Z0-9$]+) \ ( ' ) ,
2017-01-31 16:19:29 +01:00
jscode , ' Initial JS player signature function name ' , group = ' sig ' )
2014-03-30 07:02:58 +02:00
jsi = JSInterpreter ( jscode )
initial_function = jsi . extract_function ( funcname )
2013-09-21 14:19:30 +02:00
return lambda s : initial_function ( [ s ] )
2021-02-04 15:37:17 +01:00
def _decrypt_signature ( self , s , video_id , player_url ) :
2013-06-27 22:20:50 +02:00
""" Turn the encrypted s field into a working signature """
2013-06-27 01:51:10 +02:00
2014-07-11 10:44:39 +02:00
if player_url is None :
2014-09-24 09:51:45 +02:00
raise ExtractorError ( ' Cannot decrypt signature without player_url ' )
2013-09-27 06:15:21 +02:00
2014-07-11 10:44:39 +02:00
try :
2014-08-02 12:23:18 +02:00
player_id = ( player_url , self . _signature_cache_id ( s ) )
2014-07-11 10:44:39 +02:00
if player_id not in self . _player_cache :
func = self . _extract_signature_function (
2014-08-02 12:21:53 +02:00
video_id , player_url , s
2014-07-11 10:44:39 +02:00
)
self . _player_cache [ player_id ] = func
func = self . _player_cache [ player_id ]
2021-10-31 05:23:58 +01:00
self . _print_sig_code ( func , s )
2014-07-11 10:44:39 +02:00
return func ( s )
except Exception as e :
2021-10-31 05:23:58 +01:00
raise ExtractorError ( ' Signature extraction failed: ' + traceback . format_exc ( ) , cause = e )
def _decrypt_nsig ( self , s , video_id , player_url ) :
""" Turn the encrypted n field into a working signature """
if player_url is None :
raise ExtractorError ( ' Cannot decrypt nsig without player_url ' )
if player_url . startswith ( ' // ' ) :
player_url = ' https: ' + player_url
elif not re . match ( r ' https?:// ' , player_url ) :
player_url = compat_urlparse . urljoin (
' https://www.youtube.com ' , player_url )
sig_id = ( ' nsig_value ' , s )
if sig_id in self . _player_cache :
return self . _player_cache [ sig_id ]
try :
player_id = ( ' nsig ' , player_url )
if player_id not in self . _player_cache :
self . _player_cache [ player_id ] = self . _extract_n_function ( video_id , player_url )
func = self . _player_cache [ player_id ]
self . _player_cache [ sig_id ] = func ( s )
self . write_debug ( f ' Decrypted nsig { s } => { self . _player_cache [ sig_id ] } ' )
return self . _player_cache [ sig_id ]
except Exception as e :
2021-11-09 23:49:33 +01:00
raise ExtractorError ( traceback . format_exc ( ) , cause = e , video_id = video_id )
2021-10-31 05:23:58 +01:00
def _extract_n_function_name ( self , jscode ) :
return self . _search_regex (
( r ' \ .get \ ( " n " \ ) \ )&& \ (b=(?P<nfunc>[a-zA-Z0-9$] {3} ) \ ([a-zA-Z0-9] \ ) ' , ) ,
jscode , ' Initial JS player n function name ' , group = ' nfunc ' )
def _extract_n_function ( self , video_id , player_url ) :
player_id = self . _extract_player_info ( player_url )
func_code = self . _downloader . cache . load ( ' youtube-nsig ' , player_id )
if func_code :
jsi = JSInterpreter ( func_code )
else :
jscode = self . _load_player ( video_id , player_url )
funcname = self . _extract_n_function_name ( jscode )
jsi = JSInterpreter ( jscode )
func_code = jsi . extract_function_code ( funcname )
self . _downloader . cache . store ( ' youtube-nsig ' , player_id , func_code )
if self . get_param ( ' youtube_print_sig_code ' ) :
self . to_screen ( f ' Extracted nsig function from { player_id } : \n { func_code [ 1 ] } \n ' )
return lambda s : jsi . extract_function_from_code ( * func_code ) ( [ s ] )
2013-09-21 14:19:30 +02:00
2021-06-30 00:07:49 +02:00
def _extract_signature_timestamp ( self , video_id , player_url , ytcfg = None , fatal = False ) :
"""
Extract signatureTimestamp ( sts )
Required to tell API what sig / player version is in use .
"""
sts = None
if isinstance ( ytcfg , dict ) :
sts = int_or_none ( ytcfg . get ( ' STS ' ) )
if not sts :
# Attempt to extract from player
if player_url is None :
error_msg = ' Cannot extract signature timestamp without player_url. '
if fatal :
raise ExtractorError ( error_msg )
self . report_warning ( error_msg )
return
2021-10-31 05:23:58 +01:00
code = self . _load_player ( video_id , player_url , fatal = fatal )
if code :
2021-06-30 00:07:49 +02:00
sts = int_or_none ( self . _search_regex (
r ' (?:signatureTimestamp|sts) \ s*: \ s*(?P<sts>[0-9] {5} ) ' , code ,
' JS player signature timestamp ' , group = ' sts ' , fatal = fatal ) )
return sts
2021-07-21 05:52:34 +02:00
def _mark_watched ( self , video_id , player_responses ) :
2021-11-19 01:06:28 +01:00
playback_url = get_first (
player_responses , ( ' playbackTracking ' , ' videostatsPlaybackUrl ' , ' baseUrl ' ) ,
expected_type = url_or_none )
2016-02-29 20:01:33 +01:00
if not playback_url :
2021-07-21 07:47:27 +02:00
self . report_warning ( ' Unable to mark watched ' )
2016-02-29 20:01:33 +01:00
return
parsed_playback_url = compat_urlparse . urlparse ( playback_url )
qs = compat_urlparse . parse_qs ( parsed_playback_url . query )
# cpn generation algorithm is reverse engineered from base.js.
# In fact it works even with dummy cpn.
CPN_ALPHABET = ' abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_ '
cpn = ' ' . join ( ( CPN_ALPHABET [ random . randint ( 0 , 256 ) & 63 ] for _ in range ( 0 , 16 ) ) )
qs . update ( {
' ver ' : [ ' 2 ' ] ,
' cpn ' : [ cpn ] ,
} )
playback_url = compat_urlparse . urlunparse (
2016-03-25 20:46:57 +01:00
parsed_playback_url . _replace ( query = compat_urllib_parse_urlencode ( qs , True ) ) )
2016-02-29 20:01:33 +01:00
self . _download_webpage (
playback_url , video_id , ' Marking watched ' ,
' Unable to mark watched ' , fatal = False )
2017-09-05 19:48:37 +02:00
@staticmethod
def _extract_urls ( webpage ) :
# Embedded YouTube player
entries = [
unescapeHTML ( mobj . group ( ' url ' ) )
for mobj in re . finditer ( r ''' (?x)
( ? :
< iframe [ ^ > ] + ? src = |
data - video - url = |
< embed [ ^ > ] + ? src = |
embedSWF \( ? : \s * |
< object [ ^ > ] + data = |
new \s + SWFObject \(
)
( [ " \' ])
( ? P < url > ( ? : https ? : ) ? / / ( ? : www \. ) ? youtube ( ? : - nocookie ) ? \. com /
2017-10-27 17:26:43 +02:00
( ? : embed | v | p ) / [ 0 - 9 A - Za - z_ - ] { 11 } . * ? )
2017-09-05 19:48:37 +02:00
\1 ''' , webpage)]
# lazyYT YouTube embed
entries . extend ( list ( map (
unescapeHTML ,
re . findall ( r ' class= " lazyYT " data-youtube-id= " ([^ " ]+) " ' , webpage ) ) ) )
# Wordpress "YouTube Video Importer" plugin
matches = re . findall ( r ''' (?x)<div[^>]+
class = ( ? P < q1 > [ \' " ])[^ \' " ]* \b yvii_single_video_player \b [^ \' " ]*(?P=q1)[^>]+
data - video_id = ( ? P < q2 > [ \' " ])([^ \' " ]+)(?P=q2) ' ' ' , webpage )
entries . extend ( m [ - 1 ] for m in matches )
return entries
@staticmethod
def _extract_url ( webpage ) :
urls = YoutubeIE . _extract_urls ( webpage )
return urls [ 0 ] if urls else None
2014-02-08 19:20:11 +01:00
@classmethod
def extract_id ( cls , url ) :
mobj = re . match ( cls . _VALID_URL , url , re . VERBOSE )
2013-06-23 19:58:33 +02:00
if mobj is None :
2014-09-24 09:51:45 +02:00
raise ExtractorError ( ' Invalid URL: %s ' % url )
2021-08-19 03:41:24 +02:00
return mobj . group ( ' id ' )
2013-06-23 19:58:33 +02:00
2021-07-20 02:02:41 +02:00
def _extract_chapters_from_json ( self , data , duration ) :
chapter_list = traverse_obj (
data , (
' playerOverlays ' , ' playerOverlayRenderer ' , ' decoratedPlayerBarRenderer ' ,
' decoratedPlayerBarRenderer ' , ' playerBar ' , ' chapteredPlayerBarRenderer ' , ' chapters '
) , expected_type = list )
return self . _extract_chapters (
chapter_list ,
chapter_time = lambda chapter : float_or_none (
traverse_obj ( chapter , ( ' chapterRenderer ' , ' timeRangeStartMillis ' ) ) , scale = 1000 ) ,
chapter_title = lambda chapter : traverse_obj (
chapter , ( ' chapterRenderer ' , ' title ' , ' simpleText ' ) , expected_type = str ) ,
duration = duration )
def _extract_chapters_from_engagement_panel ( self , data , duration ) :
content_list = traverse_obj (
2020-11-19 20:22:59 +01:00
data ,
2021-07-20 02:02:41 +02:00
( ' engagementPanels ' , . . . , ' engagementPanelSectionListRenderer ' , ' content ' , ' macroMarkersListRenderer ' , ' contents ' ) ,
2021-07-20 03:51:16 +02:00
expected_type = list , default = [ ] )
2021-07-24 02:46:46 +02:00
chapter_time = lambda chapter : parse_duration ( self . _get_text ( chapter , ' timeDescription ' ) )
chapter_title = lambda chapter : self . _get_text ( chapter , ' title ' )
2021-07-20 02:02:41 +02:00
return next ( (
filter ( None , (
self . _extract_chapters (
traverse_obj ( contents , ( . . . , ' macroMarkersListItemRenderer ' ) ) ,
chapter_time , chapter_title , duration )
for contents in content_list
) ) ) , [ ] )
def _extract_chapters ( self , chapter_list , chapter_time , chapter_title , duration ) :
2020-06-05 23:16:31 +02:00
chapters = [ ]
2021-07-20 02:02:41 +02:00
last_chapter = { ' start_time ' : 0 }
for idx , chapter in enumerate ( chapter_list or [ ] ) :
title = chapter_title ( chapter )
2020-06-05 23:16:31 +02:00
start_time = chapter_time ( chapter )
if start_time is None :
continue
2021-07-20 02:02:41 +02:00
last_chapter [ ' end_time ' ] = start_time
if start_time < last_chapter [ ' start_time ' ] :
if idx == 1 :
chapters . pop ( )
self . report_warning ( ' Invalid start time for chapter " %s " ' % last_chapter [ ' title ' ] )
else :
self . report_warning ( f ' Invalid start time for chapter " { title } " ' )
continue
last_chapter = { ' start_time ' : start_time , ' title ' : title }
chapters . append ( last_chapter )
last_chapter [ ' end_time ' ] = duration
2020-06-05 23:16:31 +02:00
return chapters
2021-02-04 15:37:17 +01:00
def _extract_yt_initial_variable ( self , webpage , regex , video_id , name ) :
return self . _parse_json ( self . _search_regex (
( r ' %s \ s* %s ' % ( regex , self . _YT_INITIAL_BOUNDARY_RE ) ,
regex ) , webpage , name , default = ' {} ' ) , video_id , fatal = False )
2020-06-05 23:16:31 +02:00
2021-04-07 13:37:43 +02:00
@staticmethod
def parse_time_text ( time_text ) :
"""
Parse the comment time text
time_text is in the format ' X units ago (edited) '
"""
time_text_split = time_text . split ( ' ' )
if len ( time_text_split ) > = 3 :
2021-07-20 03:51:16 +02:00
try :
return datetime_from_str ( ' now- %s %s ' % ( time_text_split [ 0 ] , time_text_split [ 1 ] ) , precision = ' auto ' )
except ValueError :
return None
2021-04-07 13:37:43 +02:00
2021-03-14 23:41:11 +01:00
def _extract_comment ( self , comment_renderer , parent = None ) :
comment_id = comment_renderer . get ( ' commentId ' )
if not comment_id :
return
2021-07-19 06:55:07 +02:00
2021-07-24 02:46:46 +02:00
text = self . _get_text ( comment_renderer , ' contentText ' )
2021-07-19 06:55:07 +02:00
2021-07-15 01:24:42 +02:00
# note: timestamp is an estimate calculated from the current time and time_text
2021-07-24 02:46:46 +02:00
time_text = self . _get_text ( comment_renderer , ' publishedTimeText ' ) or ' '
2021-07-19 06:55:07 +02:00
time_text_dt = self . parse_time_text ( time_text )
if isinstance ( time_text_dt , datetime . datetime ) :
timestamp = calendar . timegm ( time_text_dt . timetuple ( ) )
2021-07-24 02:46:46 +02:00
author = self . _get_text ( comment_renderer , ' authorText ' )
2021-03-14 23:41:11 +01:00
author_id = try_get ( comment_renderer ,
lambda x : x [ ' authorEndpoint ' ] [ ' browseEndpoint ' ] [ ' browseId ' ] , compat_str )
2021-07-19 06:55:07 +02:00
2021-07-15 01:24:42 +02:00
votes = parse_count ( try_get ( comment_renderer , ( lambda x : x [ ' voteCount ' ] [ ' simpleText ' ] ,
lambda x : x [ ' likeCount ' ] ) , compat_str ) ) or 0
2021-03-14 23:41:11 +01:00
author_thumbnail = try_get ( comment_renderer ,
lambda x : x [ ' authorThumbnail ' ] [ ' thumbnails ' ] [ - 1 ] [ ' url ' ] , compat_str )
author_is_uploader = try_get ( comment_renderer , lambda x : x [ ' authorIsChannelOwner ' ] , bool )
2021-07-12 03:20:03 +02:00
is_favorited = ' creatorHeart ' in ( try_get (
comment_renderer , lambda x : x [ ' actionButtons ' ] [ ' commentActionButtonsRenderer ' ] , dict ) or { } )
2021-03-14 23:41:11 +01:00
return {
' id ' : comment_id ,
' text ' : text ,
2021-04-07 13:37:43 +02:00
' timestamp ' : timestamp ,
2021-03-14 23:41:11 +01:00
' time_text ' : time_text ,
' like_count ' : votes ,
2021-07-12 03:20:03 +02:00
' is_favorited ' : is_favorited ,
2021-03-14 23:41:11 +01:00
' author ' : author ,
' author_id ' : author_id ,
' author_thumbnail ' : author_thumbnail ,
' author_is_uploader ' : author_is_uploader ,
' parent ' : parent or ' root '
}
2021-09-24 02:52:17 +02:00
def _comment_entries ( self , root_continuation_data , ytcfg , video_id , parent = None , comment_counts = None ) :
2021-07-12 01:18:40 +02:00
def extract_header ( contents ) :
_continuation = None
for content in contents :
comments_header_renderer = try_get ( content , lambda x : x [ ' commentsHeaderRenderer ' ] )
2021-07-19 06:55:07 +02:00
expected_comment_count = parse_count ( self . _get_text (
2021-07-24 02:46:46 +02:00
comments_header_renderer , ' countText ' , ' commentsCount ' , max_runs = 1 ) )
2021-07-19 06:55:07 +02:00
2021-07-12 01:18:40 +02:00
if expected_comment_count :
2021-07-19 06:55:07 +02:00
comment_counts [ 1 ] = expected_comment_count
self . to_screen ( ' Downloading ~ %d comments ' % expected_comment_count )
2021-07-12 01:18:40 +02:00
sort_mode_str = self . _configuration_arg ( ' comment_sort ' , [ ' ' ] ) [ 0 ]
comment_sort_index = int ( sort_mode_str != ' top ' ) # 1 = new, 0 = top
sort_menu_item = try_get (
comments_header_renderer ,
lambda x : x [ ' sortMenu ' ] [ ' sortFilterSubMenuRenderer ' ] [ ' subMenuItems ' ] [ comment_sort_index ] , dict ) or { }
sort_continuation_ep = sort_menu_item . get ( ' serviceEndpoint ' ) or { }
_continuation = self . _extract_continuation_ep_data ( sort_continuation_ep ) or self . _extract_continuation ( sort_menu_item )
if not _continuation :
continue
sort_text = sort_menu_item . get ( ' title ' )
if isinstance ( sort_text , compat_str ) :
sort_text = sort_text . lower ( )
else :
sort_text = ' top comments ' if comment_sort_index == 0 else ' newest first '
self . to_screen ( ' Sorting comments by %s ' % sort_text )
break
2021-10-12 11:50:50 +02:00
return _continuation
2021-03-14 23:41:11 +01:00
2021-07-12 01:18:40 +02:00
def extract_thread ( contents ) :
2021-03-14 23:41:11 +01:00
if not parent :
comment_counts [ 2 ] = 0
for content in contents :
comment_thread_renderer = try_get ( content , lambda x : x [ ' commentThreadRenderer ' ] )
comment_renderer = try_get (
comment_thread_renderer , ( lambda x : x [ ' comment ' ] [ ' commentRenderer ' ] , dict ) ) or try_get (
content , ( lambda x : x [ ' commentRenderer ' ] , dict ) )
if not comment_renderer :
continue
comment = self . _extract_comment ( comment_renderer , parent )
if not comment :
continue
comment_counts [ 0 ] + = 1
yield comment
# Attempt to get the replies
comment_replies_renderer = try_get (
comment_thread_renderer , lambda x : x [ ' replies ' ] [ ' commentRepliesRenderer ' ] , dict )
if comment_replies_renderer :
comment_counts [ 2 ] + = 1
comment_entries_iter = self . _comment_entries (
2021-09-24 02:52:17 +02:00
comment_replies_renderer , ytcfg , video_id ,
parent = comment . get ( ' id ' ) , comment_counts = comment_counts )
2021-03-14 23:41:11 +01:00
for reply_comment in comment_entries_iter :
yield reply_comment
2021-07-12 01:18:40 +02:00
# YouTube comments have a max depth of 2
max_depth = int_or_none ( self . _configuration_arg ( ' max_comment_depth ' , [ ' ' ] ) [ 0 ] ) or float ( ' inf ' )
if max_depth == 1 and parent :
return
2021-03-14 23:41:11 +01:00
if not comment_counts :
# comment so far, est. total comments, current comment thread #
comment_counts = [ 0 , 0 , 0 ]
2021-07-12 01:18:40 +02:00
continuation = self . _extract_continuation ( root_continuation_data )
2021-07-19 06:55:07 +02:00
if continuation and len ( continuation [ ' continuation ' ] ) < 27 :
2021-07-12 01:18:40 +02:00
self . write_debug ( ' Detected old API continuation token. Generating new API compatible token. ' )
continuation_token = self . _generate_comment_continuation ( video_id )
2021-07-19 06:55:07 +02:00
continuation = self . _build_api_continuation_query ( continuation_token , None )
2021-07-12 01:18:40 +02:00
2021-10-18 04:58:42 +02:00
message = self . _get_text ( root_continuation_data , ( ' contents ' , . . . , ' messageRenderer ' , ' text ' ) , max_runs = 1 )
if message and not parent :
self . report_warning ( message , video_id = video_id )
2021-07-12 01:18:40 +02:00
visitor_data = None
is_first_continuation = parent is None
2021-03-14 23:41:11 +01:00
for page_num in itertools . count ( 0 ) :
if not continuation :
break
2021-09-24 02:52:17 +02:00
headers = self . generate_api_headers ( ytcfg = ytcfg , visitor_data = visitor_data )
2021-07-12 01:18:40 +02:00
comment_prog_str = ' ( %d / %d ) ' % ( comment_counts [ 0 ] , comment_counts [ 1 ] )
if page_num == 0 :
if is_first_continuation :
note_prefix = ' Downloading comment section API JSON '
2021-03-14 23:41:11 +01:00
else :
2021-07-12 01:18:40 +02:00
note_prefix = ' Downloading comment API JSON reply thread %d %s ' % (
comment_counts [ 2 ] , comment_prog_str )
else :
note_prefix = ' %s Downloading comment %s API JSON page %d %s ' % (
' ' if parent else ' ' , ' replies ' if parent else ' ' ,
page_num , comment_prog_str )
response = self . _extract_response (
2021-07-19 06:55:07 +02:00
item_id = None , query = continuation ,
2021-07-12 01:18:40 +02:00
ep = ' next ' , ytcfg = ytcfg , headers = headers , note = note_prefix ,
check_get_keys = ( ' onResponseReceivedEndpoints ' , ' continuationContents ' ) )
2021-03-14 23:41:11 +01:00
if not response :
break
2021-04-11 22:23:04 +02:00
visitor_data = try_get (
response ,
lambda x : x [ ' responseContext ' ] [ ' webResponseContextExtensionData ' ] [ ' ytConfigData ' ] [ ' visitorData ' ] ,
compat_str ) or visitor_data
2021-03-14 23:41:11 +01:00
2021-07-12 01:18:40 +02:00
continuation_contents = dict_get ( response , ( ' onResponseReceivedEndpoints ' , ' continuationContents ' ) )
2021-03-14 23:41:11 +01:00
2021-07-12 01:18:40 +02:00
continuation = None
if isinstance ( continuation_contents , list ) :
for continuation_section in continuation_contents :
if not isinstance ( continuation_section , dict ) :
continue
continuation_items = try_get (
continuation_section ,
( lambda x : x [ ' reloadContinuationItemsCommand ' ] [ ' continuationItems ' ] ,
lambda x : x [ ' appendContinuationItemsAction ' ] [ ' continuationItems ' ] ) ,
list ) or [ ]
if is_first_continuation :
2021-10-12 11:50:50 +02:00
continuation = extract_header ( continuation_items )
2021-07-12 01:18:40 +02:00
is_first_continuation = False
if continuation :
break
continue
count = 0
for count , entry in enumerate ( extract_thread ( continuation_items ) ) :
yield entry
continuation = self . _extract_continuation ( { ' contents ' : continuation_items } )
if continuation :
# Sometimes YouTube provides a continuation without any comments
# In most cases we end up just downloading these with very little comments to come.
if count == 0 :
if not parent :
self . report_warning ( ' No comments received - assuming end of comments ' )
continuation = None
2021-03-14 23:41:11 +01:00
break
2021-07-12 01:18:40 +02:00
# Deprecated response structure
elif isinstance ( continuation_contents , dict ) :
known_continuation_renderers = ( ' itemSectionContinuation ' , ' commentRepliesContinuation ' )
for key , continuation_renderer in continuation_contents . items ( ) :
if key not in known_continuation_renderers :
continue
if not isinstance ( continuation_renderer , dict ) :
continue
if is_first_continuation :
header_continuation_items = [ continuation_renderer . get ( ' header ' ) or { } ]
2021-10-12 11:50:50 +02:00
continuation = extract_header ( header_continuation_items )
2021-07-12 01:18:40 +02:00
is_first_continuation = False
if continuation :
break
2021-03-14 23:41:11 +01:00
2021-07-12 01:18:40 +02:00
# Sometimes YouTube provides a continuation without any comments
# In most cases we end up just downloading these with very little comments to come.
count = 0
for count , entry in enumerate ( extract_thread ( continuation_renderer . get ( ' contents ' ) or { } ) ) :
yield entry
continuation = self . _extract_continuation ( continuation_renderer )
if count == 0 :
if not parent :
self . report_warning ( ' No comments received - assuming end of comments ' )
continuation = None
break
2021-03-14 23:41:11 +01:00
2021-07-12 01:18:40 +02:00
@staticmethod
def _generate_comment_continuation ( video_id ) :
"""
Generates initial comment section continuation token from given video id
"""
b64_vid_id = base64 . b64encode ( bytes ( video_id . encode ( ' utf-8 ' ) ) )
parts = ( ' Eg0SCw== ' , b64_vid_id , ' GAYyJyIRIgs= ' , b64_vid_id , ' MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u ' )
new_continuation_intlist = list ( itertools . chain . from_iterable (
[ bytes_to_intlist ( base64 . b64decode ( part ) ) for part in parts ] ) )
return base64 . b64encode ( intlist_to_bytes ( new_continuation_intlist ) ) . decode ( ' utf-8 ' )
2021-10-12 11:50:50 +02:00
def _get_comments ( self , ytcfg , video_id , contents , webpage ) :
2021-03-14 23:41:11 +01:00
""" Entry for comment extraction """
2021-07-12 01:18:40 +02:00
def _real_comment_extract ( contents ) :
2021-10-18 04:58:42 +02:00
renderer = next ( (
item for item in traverse_obj ( contents , ( . . . , ' itemSectionRenderer ' ) , default = { } )
if item . get ( ' sectionIdentifier ' ) == ' comment-item-section ' ) , None )
yield from self . _comment_entries ( renderer , ytcfg , video_id )
2021-09-24 02:52:17 +02:00
2021-10-12 11:50:50 +02:00
max_comments = int_or_none ( self . _configuration_arg ( ' max_comments ' , [ ' ' ] ) [ 0 ] )
2021-07-30 21:36:00 +02:00
# Force English regardless of account setting to prevent parsing issues
# See: https://github.com/yt-dlp/yt-dlp/issues/532
ytcfg = copy . deepcopy ( ytcfg )
traverse_obj (
ytcfg , ( ' INNERTUBE_CONTEXT ' , ' client ' ) , expected_type = dict , default = { } ) [ ' hl ' ] = ' en '
2021-10-12 11:50:50 +02:00
return itertools . islice ( _real_comment_extract ( contents ) , 0 , max_comments )
2021-03-14 23:41:11 +01:00
2021-06-30 00:07:49 +02:00
@staticmethod
2021-09-24 02:52:17 +02:00
def _get_checkok_params ( ) :
return { ' contentCheckOk ' : True , ' racyCheckOk ' : True }
@classmethod
def _generate_player_context ( cls , sts = None ) :
2021-06-30 00:07:49 +02:00
context = {
' html5Preference ' : ' HTML5_PREF_WANTS ' ,
}
if sts is not None :
context [ ' signatureTimestamp ' ] = sts
return {
' playbackContext ' : {
' contentPlaybackContext ' : context
2021-07-20 20:01:28 +02:00
} ,
2021-09-24 02:52:17 +02:00
* * cls . _get_checkok_params ( )
2021-06-30 00:07:49 +02:00
}
2021-08-01 23:13:46 +02:00
@staticmethod
def _is_agegated ( player_response ) :
if traverse_obj ( player_response , ( ' playabilityStatus ' , ' desktopLegacyAgeGateReason ' ) ) :
2021-07-29 07:23:56 +02:00
return True
2021-08-01 23:13:46 +02:00
reasons = traverse_obj ( player_response , ( ' playabilityStatus ' , ( ' status ' , ' reason ' ) ) , default = [ ] )
AGE_GATE_REASONS = (
' confirm your age ' , ' age-restricted ' , ' inappropriate ' , # reason
' age_verification_required ' , ' age_check_required ' , # status
)
return any ( expected in reason for expected in AGE_GATE_REASONS for reason in reasons )
@staticmethod
def _is_unplayable ( player_response ) :
return traverse_obj ( player_response , ( ' playabilityStatus ' , ' status ' ) ) == ' UNPLAYABLE '
2021-07-29 07:23:56 +02:00
2021-09-24 02:52:17 +02:00
def _extract_player_response ( self , client , video_id , master_ytcfg , player_ytcfg , player_url , initial_pr ) :
2021-06-30 00:07:49 +02:00
2021-07-21 05:52:34 +02:00
session_index = self . _extract_session_index ( player_ytcfg , master_ytcfg )
syncid = self . _extract_account_syncid ( player_ytcfg , master_ytcfg , initial_pr )
2021-09-06 09:26:41 +02:00
sts = self . _extract_signature_timestamp ( video_id , player_url , master_ytcfg , fatal = False ) if player_url else None
2021-07-21 05:52:34 +02:00
headers = self . generate_api_headers (
2021-09-24 02:52:17 +02:00
ytcfg = player_ytcfg , account_syncid = syncid , session_index = session_index , default_client = client )
2021-05-15 16:38:47 +02:00
2021-07-21 05:52:34 +02:00
yt_query = { ' videoId ' : video_id }
yt_query . update ( self . _generate_player_context ( sts ) )
return self . _extract_response (
item_id = video_id , ep = ' player ' , query = yt_query ,
2021-08-07 08:45:58 +02:00
ytcfg = player_ytcfg , headers = headers , fatal = True ,
2021-07-30 21:45:04 +02:00
default_client = client ,
2021-07-21 05:52:34 +02:00
note = ' Downloading %s player API JSON ' % client . replace ( ' _ ' , ' ' ) . strip ( )
) or None
def _get_requested_clients ( self , url , smuggled_data ) :
2021-07-25 23:55:53 +02:00
requested_clients = [ ]
2021-11-16 02:22:01 +01:00
default = [ ' android ' , ' web ' ]
2021-07-30 21:45:04 +02:00
allowed_clients = sorted (
[ client for client in INNERTUBE_CLIENTS . keys ( ) if client [ : 1 ] != ' _ ' ] ,
key = lambda client : INNERTUBE_CLIENTS [ client ] [ ' priority ' ] , reverse = True )
2021-07-25 23:55:53 +02:00
for client in self . _configuration_arg ( ' player_client ' ) :
if client in allowed_clients :
requested_clients . append ( client )
2021-11-16 02:22:01 +01:00
elif client == ' default ' :
requested_clients . extend ( default )
2021-07-25 23:55:53 +02:00
elif client == ' all ' :
requested_clients . extend ( allowed_clients )
else :
self . report_warning ( f ' Skipping unsupported client { client } ' )
2021-07-21 05:52:34 +02:00
if not requested_clients :
2021-11-16 02:22:01 +01:00
requested_clients = default
2015-07-25 17:30:34 +02:00
2021-07-21 05:52:34 +02:00
if smuggled_data . get ( ' is_music_url ' ) or self . is_music_url ( url ) :
requested_clients . extend (
2021-08-01 23:13:46 +02:00
f ' { client } _music ' for client in requested_clients if f ' { client } _music ' in INNERTUBE_CLIENTS )
2018-11-03 00:26:16 +01:00
2021-07-21 05:52:34 +02:00
return orderedSet ( requested_clients )
2015-07-25 17:30:34 +02:00
2021-07-27 11:40:44 +02:00
def _extract_player_ytcfg ( self , client , video_id ) :
url = {
' web_music ' : ' https://music.youtube.com ' ,
' web_embedded ' : f ' https://www.youtube.com/embed/ { video_id } ?html5=1 '
} . get ( client )
if not url :
return { }
webpage = self . _download_webpage ( url , video_id , fatal = False , note = f ' Downloading { client } config ' )
return self . extract_ytcfg ( video_id , webpage ) or { }
2021-09-24 02:52:17 +02:00
def _extract_player_responses ( self , clients , video_id , webpage , master_ytcfg ) :
2021-07-21 05:52:34 +02:00
initial_pr = None
if webpage :
initial_pr = self . _extract_yt_initial_variable (
webpage , self . _YT_INITIAL_PLAYER_RESPONSE_RE ,
video_id , ' initial player response ' )
2020-04-09 17:42:43 +02:00
2021-07-27 11:40:44 +02:00
original_clients = clients
clients = clients [ : : - 1 ]
2021-09-06 09:26:41 +02:00
prs = [ ]
2021-08-01 23:13:46 +02:00
def append_client ( client_name ) :
if client_name in INNERTUBE_CLIENTS and client_name not in original_clients :
clients . append ( client_name )
2021-08-07 08:45:58 +02:00
# Android player_response does not have microFormats which are needed for
# extraction of some data. So we return the initial_pr with formats
# stripped out even if not requested by the user
# See: https://github.com/yt-dlp/yt-dlp/issues/501
if initial_pr :
pr = dict ( initial_pr )
pr [ ' streamingData ' ] = None
2021-09-06 09:26:41 +02:00
prs . append ( pr )
2021-08-07 08:45:58 +02:00
last_error = None
2021-09-06 09:26:41 +02:00
tried_iframe_fallback = False
player_url = None
2021-07-27 11:40:44 +02:00
while clients :
client = clients . pop ( )
2021-07-21 05:52:34 +02:00
player_ytcfg = master_ytcfg if client == ' web ' else { }
2021-07-27 11:40:44 +02:00
if ' configs ' not in self . _configuration_arg ( ' player_skip ' ) :
player_ytcfg = self . _extract_player_ytcfg ( client , video_id ) or player_ytcfg
2021-09-06 09:26:41 +02:00
player_url = player_url or self . _extract_player_url ( master_ytcfg , player_ytcfg , webpage = webpage )
require_js_player = self . _get_default_ytcfg ( client ) . get ( ' REQUIRE_JS_PLAYER ' )
if ' js ' in self . _configuration_arg ( ' player_skip ' ) :
require_js_player = False
player_url = None
if not player_url and not tried_iframe_fallback and require_js_player :
player_url = self . _download_player_url ( video_id )
tried_iframe_fallback = True
2021-08-07 08:45:58 +02:00
try :
pr = initial_pr if client == ' web ' and initial_pr else self . _extract_player_response (
2021-09-24 02:52:17 +02:00
client , video_id , player_ytcfg or master_ytcfg , player_ytcfg , player_url if require_js_player else None , initial_pr )
2021-08-07 08:45:58 +02:00
except ExtractorError as e :
if last_error :
self . report_warning ( last_error )
last_error = e
continue
2021-07-21 05:52:34 +02:00
if pr :
2021-09-06 09:26:41 +02:00
prs . append ( pr )
2021-07-27 11:40:44 +02:00
2021-08-01 23:13:46 +02:00
# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2021-09-24 02:52:17 +02:00
if client . endswith ( ' _agegate ' ) and self . _is_unplayable ( pr ) and self . is_authenticated :
2021-08-01 23:13:46 +02:00
append_client ( client . replace ( ' _agegate ' , ' _creator ' ) )
elif self . _is_agegated ( pr ) :
append_client ( f ' { client } _agegate ' )
2021-07-27 11:40:44 +02:00
2021-08-07 08:45:58 +02:00
if last_error :
2021-09-06 09:26:41 +02:00
if not len ( prs ) :
2021-08-07 08:45:58 +02:00
raise last_error
self . report_warning ( last_error )
2021-09-06 09:26:41 +02:00
return prs , player_url
2021-07-21 05:52:34 +02:00
def _extract_formats ( self , streaming_data , video_id , player_url , is_live ) :
2021-10-31 08:56:44 +01:00
itags , stream_ids = { } , [ ]
2021-07-26 00:03:42 +02:00
itag_qualities , res_qualities = { } , { }
2021-05-17 21:41:57 +02:00
q = qualities ( [
2021-07-26 00:03:42 +02:00
# Normally tiny is the smallest video-only formats. But
# audio-only formats with unknown quality may get tagged as tiny
' tiny ' ,
' audio_quality_ultralow ' , ' audio_quality_low ' , ' audio_quality_medium ' , ' audio_quality_high ' , # Audio only formats
2021-05-17 21:41:57 +02:00
' small ' , ' medium ' , ' large ' , ' hd720 ' , ' hd1080 ' , ' hd1440 ' , ' hd2160 ' , ' hd2880 ' , ' highres '
] )
2021-07-21 05:52:34 +02:00
streaming_formats = traverse_obj ( streaming_data , ( . . . , ( ' formats ' , ' adaptiveFormats ' ) , . . . ) , default = [ ] )
2021-05-15 16:38:47 +02:00
2021-02-04 15:37:17 +01:00
for fmt in streaming_formats :
if fmt . get ( ' targetDurationSec ' ) or fmt . get ( ' drmFamilies ' ) :
continue
2020-08-05 02:30:10 +02:00
2021-02-10 22:22:55 +01:00
itag = str_or_none ( fmt . get ( ' itag ' ) )
2021-05-15 16:38:47 +02:00
audio_track = fmt . get ( ' audioTrack ' ) or { }
stream_id = ' %s . %s ' % ( itag or ' ' , audio_track . get ( ' id ' , ' ' ) )
if stream_id in stream_ids :
continue
2021-02-10 22:22:55 +01:00
quality = fmt . get ( ' quality ' )
2021-07-26 00:03:42 +02:00
height = int_or_none ( fmt . get ( ' height ' ) )
2021-05-17 21:41:57 +02:00
if quality == ' tiny ' or not quality :
quality = fmt . get ( ' audioQuality ' , ' ' ) . lower ( ) or quality
2021-07-26 00:03:42 +02:00
# The 3gp format (17) in android client has a quality of "small",
# but is actually worse than other formats
if itag == ' 17 ' :
quality = ' tiny '
if quality :
if itag :
itag_qualities [ itag ] = quality
if height :
res_qualities [ height ] = quality
2021-02-10 22:22:55 +01:00
# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
# (adding `&sq=0` to the URL) and parsing emsg box to determine the
# number of fragment that would subsequently requested with (`&sq=N`)
if fmt . get ( ' type ' ) == ' FORMAT_STREAM_TYPE_OTF ' :
continue
2021-02-04 15:37:17 +01:00
fmt_url = fmt . get ( ' url ' )
if not fmt_url :
sc = compat_parse_qs ( fmt . get ( ' signatureCipher ' ) )
fmt_url = url_or_none ( try_get ( sc , lambda x : x [ ' url ' ] [ 0 ] ) )
encrypted_sig = try_get ( sc , lambda x : x [ ' s ' ] [ 0 ] )
if not ( sc and fmt_url and encrypted_sig ) :
continue
if not player_url :
2014-08-02 06:35:18 +02:00
continue
2021-02-04 15:37:17 +01:00
signature = self . _decrypt_signature ( sc [ ' s ' ] [ 0 ] , video_id , player_url )
sp = try_get ( sc , lambda x : x [ ' sp ' ] [ 0 ] ) or ' signature '
fmt_url + = ' & ' + sp + ' = ' + signature
2021-10-31 05:23:58 +01:00
query = parse_qs ( fmt_url )
throttled = False
2021-11-28 13:31:46 +01:00
if query . get ( ' n ' ) :
2021-10-31 05:23:58 +01:00
try :
fmt_url = update_url_query ( fmt_url , {
' n ' : self . _decrypt_nsig ( query [ ' n ' ] [ 0 ] , video_id , player_url ) } )
except ExtractorError as e :
2021-11-09 23:49:33 +01:00
self . report_warning (
f ' nsig extraction failed: You may experience throttling for some formats \n '
f ' n = { query [ " n " ] [ 0 ] } ; player = { player_url } \n { e } ' , only_once = True )
2021-10-31 05:23:58 +01:00
throttled = True
2021-02-04 15:37:17 +01:00
if itag :
2021-10-31 08:56:44 +01:00
itags [ itag ] = ' https '
2021-05-15 16:38:47 +02:00
stream_ids . append ( stream_id )
2021-02-10 22:22:55 +01:00
tbr = float_or_none (
fmt . get ( ' averageBitrate ' ) or fmt . get ( ' bitrate ' ) , 1000 )
2021-02-04 15:37:17 +01:00
dct = {
' asr ' : int_or_none ( fmt . get ( ' audioSampleRate ' ) ) ,
' filesize ' : int_or_none ( fmt . get ( ' contentLength ' ) ) ,
' format_id ' : itag ,
2021-11-06 02:05:24 +01:00
' format_note ' : join_nonempty (
2021-08-26 04:10:02 +02:00
' %s %s ' % ( audio_track . get ( ' displayName ' ) or ' ' ,
' (default) ' if audio_track . get ( ' audioIsDefault ' ) else ' ' ) ,
2021-10-31 05:23:58 +01:00
fmt . get ( ' qualityLabel ' ) or quality . replace ( ' audio_quality_ ' , ' ' ) ,
2021-11-06 02:05:24 +01:00
throttled and ' THROTTLED ' , delim = ' , ' ) ,
2021-11-04 19:00:11 +01:00
' source_preference ' : - 10 if throttled else - 1 ,
2021-11-03 23:10:35 +01:00
' fps ' : int_or_none ( fmt . get ( ' fps ' ) ) or None ,
2021-07-26 00:03:42 +02:00
' height ' : height ,
2021-02-18 19:12:56 +01:00
' quality ' : q ( quality ) ,
2021-02-10 22:22:55 +01:00
' tbr ' : tbr ,
2021-02-04 15:37:17 +01:00
' url ' : fmt_url ,
2021-07-26 00:03:42 +02:00
' width ' : int_or_none ( fmt . get ( ' width ' ) ) ,
2021-05-14 09:01:53 +02:00
' language ' : audio_track . get ( ' id ' , ' ' ) . split ( ' . ' ) [ 0 ] ,
2021-08-26 04:10:02 +02:00
' language_preference ' : 1 if audio_track . get ( ' audioIsDefault ' ) else - 1 ,
2021-02-04 15:37:17 +01:00
}
2021-07-08 18:11:08 +02:00
mime_mobj = re . match (
r ' ((?:[^/]+)/(?:[^;]+))(?:; \ s*codecs= " ([^ " ]+) " )? ' , fmt . get ( ' mimeType ' ) or ' ' )
if mime_mobj :
dct [ ' ext ' ] = mimetype2ext ( mime_mobj . group ( 1 ) )
dct . update ( parse_codecs ( mime_mobj . group ( 2 ) ) )
2021-02-10 22:22:55 +01:00
no_audio = dct . get ( ' acodec ' ) == ' none '
no_video = dct . get ( ' vcodec ' ) == ' none '
if no_audio :
dct [ ' vbr ' ] = tbr
if no_video :
dct [ ' abr ' ] = tbr
if no_audio or no_video :
2021-02-04 15:37:17 +01:00
dct [ ' downloader_options ' ] = {
# Youtube throttles chunks >~10M
' http_chunk_size ' : 10485760 ,
2019-09-11 17:44:47 +02:00
}
2021-02-24 21:57:04 +01:00
if dct . get ( ' ext ' ) :
dct [ ' container ' ] = dct [ ' ext ' ] + ' _dash '
2021-07-21 05:52:34 +02:00
yield dct
2021-02-04 15:37:17 +01:00
2021-07-08 17:33:13 +02:00
skip_manifests = self . _configuration_arg ( ' skip ' )
2021-08-07 08:02:15 +02:00
get_dash = (
( not is_live or self . _configuration_arg ( ' include_live_dash ' ) )
and ' dash ' not in skip_manifests and self . get_param ( ' youtube_include_dash_manifest ' , True ) )
2021-06-25 16:05:41 +02:00
get_hls = ' hls ' not in skip_manifests and self . get_param ( ' youtube_include_hls_manifest ' , True )
2021-10-31 08:56:44 +01:00
def process_manifest_format ( f , proto , itag ) :
if itag in itags :
if itags [ itag ] == proto or f ' { itag } - { proto } ' in itags :
return False
itag = f ' { itag } - { proto } '
if itag :
f [ ' format_id ' ] = itag
itags [ itag ] = proto
f [ ' quality ' ] = next ( (
q ( qdict [ val ] )
2021-11-13 10:41:59 +01:00
for val , qdict in ( ( f . get ( ' format_id ' , ' ' ) . split ( ' - ' ) [ 0 ] , itag_qualities ) , ( f . get ( ' height ' ) , res_qualities ) )
2021-10-31 08:56:44 +01:00
if val in qdict ) , - 1 )
return True
2021-07-26 00:03:42 +02:00
2021-07-21 05:52:34 +02:00
for sd in streaming_data :
2021-06-25 16:05:41 +02:00
hls_manifest_url = get_hls and sd . get ( ' hlsManifestUrl ' )
2021-05-15 16:38:47 +02:00
if hls_manifest_url :
2021-07-26 00:03:42 +02:00
for f in self . _extract_m3u8_formats ( hls_manifest_url , video_id , ' mp4 ' , fatal = False ) :
2021-10-31 08:56:44 +01:00
if process_manifest_format ( f , ' hls ' , self . _search_regex (
r ' /itag/( \ d+) ' , f [ ' url ' ] , ' itag ' , default = None ) ) :
yield f
2021-02-04 15:37:17 +01:00
2021-06-25 16:05:41 +02:00
dash_manifest_url = get_dash and sd . get ( ' dashManifestUrl ' )
if dash_manifest_url :
2021-07-26 00:03:42 +02:00
for f in self . _extract_mpd_formats ( dash_manifest_url , video_id , fatal = False ) :
2021-10-31 08:56:44 +01:00
if process_manifest_format ( f , ' dash ' , f [ ' format_id ' ] ) :
f [ ' filesize ' ] = int_or_none ( self . _search_regex (
r ' /clen/( \ d+) ' , f . get ( ' fragment_base_url ' ) or f [ ' url ' ] , ' file size ' , default = None ) )
yield f
2021-07-21 05:52:34 +02:00
2021-11-16 20:56:23 +01:00
def _extract_storyboard ( self , player_responses , duration ) :
spec = get_first (
player_responses , ( ' storyboards ' , ' playerStoryboardSpecRenderer ' , ' spec ' ) , default = ' ' ) . split ( ' | ' ) [ : : - 1 ]
if not spec :
return
base_url = spec . pop ( )
L = len ( spec ) - 1
for i , args in enumerate ( spec ) :
args = args . split ( ' # ' )
counts = list ( map ( int_or_none , args [ : 5 ] ) )
if len ( args ) != 8 or not all ( counts ) :
self . report_warning ( f ' Malformed storyboard { i } : { " # " . join ( args ) } { bug_reports_message ( ) } ' )
continue
width , height , frame_count , cols , rows = counts
N , sigh = args [ 6 : ]
url = base_url . replace ( ' $L ' , str ( L - i ) ) . replace ( ' $N ' , N ) + f ' &sigh= { sigh } '
fragment_count = frame_count / ( cols * rows )
fragment_duration = duration / fragment_count
yield {
' format_id ' : f ' sb { i } ' ,
' format_note ' : ' storyboard ' ,
' ext ' : ' mhtml ' ,
' protocol ' : ' mhtml ' ,
' acodec ' : ' none ' ,
' vcodec ' : ' none ' ,
' url ' : url ,
' width ' : width ,
' height ' : height ,
' fragments ' : [ {
' path ' : url . replace ( ' $M ' , str ( j ) ) ,
' duration ' : min ( fragment_duration , duration - ( j * fragment_duration ) ) ,
} for j in range ( math . ceil ( fragment_count ) ) ] ,
}
2021-07-21 05:52:34 +02:00
def _real_extract ( self , url ) :
url , smuggled_data = unsmuggle_url ( url , { } )
video_id = self . _match_id ( url )
base_url = self . http_scheme ( ) + ' //www.youtube.com/ '
webpage_url = base_url + ' watch?v= ' + video_id
2021-09-06 09:26:41 +02:00
webpage = None
if ' webpage ' not in self . _configuration_arg ( ' player_skip ' ) :
webpage = self . _download_webpage (
webpage_url + ' &bpctr=9999999999&has_verified=1 ' , video_id , fatal = False )
2021-07-21 05:52:34 +02:00
master_ytcfg = self . extract_ytcfg ( video_id , webpage ) or self . _get_default_ytcfg ( )
2021-09-06 09:26:41 +02:00
player_responses , player_url = self . _extract_player_responses (
2021-07-21 05:52:34 +02:00
self . _get_requested_clients ( url , smuggled_data ) ,
2021-09-24 02:52:17 +02:00
video_id , webpage , master_ytcfg )
2021-07-21 05:52:34 +02:00
playability_statuses = traverse_obj (
player_responses , ( . . . , ' playabilityStatus ' ) , expected_type = dict , default = [ ] )
trailer_video_id = get_first (
playability_statuses ,
( ' errorScreen ' , ' playerLegacyDesktopYpcTrailerRenderer ' , ' trailerVideoId ' ) ,
expected_type = str )
if trailer_video_id :
return self . url_result (
trailer_video_id , self . ie_key ( ) , trailer_video_id )
search_meta = ( ( lambda x : self . _html_search_meta ( x , webpage , default = None ) )
if webpage else ( lambda x : None ) )
video_details = traverse_obj (
player_responses , ( . . . , ' videoDetails ' ) , expected_type = dict , default = [ ] )
microformats = traverse_obj (
player_responses , ( . . . , ' microformat ' , ' playerMicroformatRenderer ' ) ,
expected_type = dict , default = [ ] )
video_title = (
get_first ( video_details , ' title ' )
or self . _get_text ( microformats , ( . . . , ' title ' ) )
or search_meta ( [ ' og:title ' , ' twitter:title ' , ' title ' ] ) )
video_description = get_first ( video_details , ' shortDescription ' )
2021-11-03 11:57:34 +01:00
multifeed_metadata_list = get_first (
player_responses ,
( ' multicamera ' , ' playerLegacyMulticameraRenderer ' , ' metadataList ' ) ,
expected_type = str )
if multifeed_metadata_list and not smuggled_data . get ( ' force_singlefeed ' ) :
if self . get_param ( ' noplaylist ' ) :
2021-07-21 05:52:34 +02:00
self . to_screen ( ' Downloading just video %s because of --no-playlist ' % video_id )
2021-11-03 11:57:34 +01:00
else :
entries = [ ]
feed_ids = [ ]
for feed in multifeed_metadata_list . split ( ' , ' ) :
# Unquote should take place before split on comma (,) since textual
# fields may contain comma as well (see
# https://github.com/ytdl-org/youtube-dl/issues/8536)
feed_data = compat_parse_qs (
compat_urllib_parse_unquote_plus ( feed ) )
def feed_entry ( name ) :
return try_get (
feed_data , lambda x : x [ name ] [ 0 ] , compat_str )
feed_id = feed_entry ( ' id ' )
if not feed_id :
continue
feed_title = feed_entry ( ' title ' )
title = video_title
if feed_title :
title + = ' ( %s ) ' % feed_title
entries . append ( {
' _type ' : ' url_transparent ' ,
' ie_key ' : ' Youtube ' ,
' url ' : smuggle_url (
' %s watch?v= %s ' % ( base_url , feed_data [ ' id ' ] [ 0 ] ) ,
{ ' force_singlefeed ' : True } ) ,
' title ' : title ,
} )
feed_ids . append ( feed_id )
self . to_screen (
' Downloading multifeed video ( %s ) - add --no-playlist to just download video %s '
% ( ' , ' . join ( feed_ids ) , video_id ) )
return self . playlist_result (
entries , video_id , video_title , video_description )
2021-07-21 05:52:34 +02:00
2021-07-21 17:15:45 +02:00
live_broadcast_details = traverse_obj ( microformats , ( . . . , ' liveBroadcastDetails ' ) )
2021-07-21 05:52:34 +02:00
is_live = get_first ( video_details , ' isLive ' )
2021-07-21 17:15:45 +02:00
if is_live is None :
is_live = get_first ( live_broadcast_details , ' isLiveNow ' )
2021-07-21 05:52:34 +02:00
streaming_data = traverse_obj ( player_responses , ( . . . , ' streamingData ' ) , default = [ ] )
formats = list ( self . _extract_formats ( streaming_data , video_id , player_url , is_live ) )
2019-09-11 17:44:47 +02:00
2021-02-04 15:37:17 +01:00
if not formats :
2021-07-21 05:52:34 +02:00
if not self . get_param ( ' allow_unplayable_formats ' ) and traverse_obj ( streaming_data , ( . . . , ' licenseInfos ' ) ) :
2021-08-22 22:08:38 +02:00
self . report_drm ( video_id )
2021-07-21 05:52:34 +02:00
pemr = get_first (
playability_statuses ,
( ' errorScreen ' , ' playerErrorMessageRenderer ' ) , expected_type = dict ) or { }
reason = self . _get_text ( pemr , ' reason ' ) or get_first ( playability_statuses , ' reason ' )
subreason = clean_html ( self . _get_text ( pemr , ' subreason ' ) or ' ' )
2021-02-04 15:37:17 +01:00
if subreason :
if subreason == ' The uploader has not made this video available in your country. ' :
2021-07-21 05:52:34 +02:00
countries = get_first ( microformats , ' availableCountries ' )
2021-02-04 15:37:17 +01:00
if not countries :
regions_allowed = search_meta ( ' regionsAllowed ' )
countries = regions_allowed . split ( ' , ' ) if regions_allowed else None
2021-04-17 02:09:58 +02:00
self . raise_geo_restricted ( subreason , countries , metadata_available = True )
2021-07-21 05:52:34 +02:00
reason + = f ' . { subreason } '
2021-02-04 15:37:17 +01:00
if reason :
2021-04-17 02:09:58 +02:00
self . raise_no_formats ( reason , expected = True )
2019-09-11 17:44:47 +02:00
2021-07-21 05:52:34 +02:00
keywords = get_first ( video_details , ' keywords ' , expected_type = list ) or [ ]
2021-02-04 15:37:17 +01:00
if not keywords and webpage :
keywords = [
unescapeHTML ( m . group ( ' content ' ) )
for m in re . finditer ( self . _meta_regex ( ' og:video:tag ' ) , webpage ) ]
for keyword in keywords :
if keyword . startswith ( ' yt:stretch= ' ) :
2021-04-17 05:02:33 +02:00
mobj = re . search ( r ' ( \ d+) \ s*: \ s*( \ d+) ' , keyword )
if mobj :
# NB: float is intentional for forcing float division
w , h = ( float ( v ) for v in mobj . groups ( ) )
if w > 0 and h > 0 :
ratio = w / h
for f in formats :
if f . get ( ' vcodec ' ) != ' none ' :
f [ ' stretched_ratio ' ] = ratio
break
2015-01-30 04:43:50 +01:00
2021-02-04 15:37:17 +01:00
thumbnails = [ ]
2021-07-21 05:52:34 +02:00
thumbnail_dicts = traverse_obj (
( video_details , microformats ) , ( . . . , . . . , ' thumbnail ' , ' thumbnails ' , . . . ) ,
expected_type = dict , default = [ ] )
for thumbnail in thumbnail_dicts :
thumbnail_url = thumbnail . get ( ' url ' )
if not thumbnail_url :
continue
# Sometimes youtube gives a wrong thumbnail URL. See:
# https://github.com/yt-dlp/yt-dlp/issues/233
# https://github.com/ytdl-org/youtube-dl/issues/28023
if ' maxresdefault ' in thumbnail_url :
thumbnail_url = thumbnail_url . split ( ' ? ' ) [ 0 ]
thumbnails . append ( {
' url ' : thumbnail_url ,
' height ' : int_or_none ( thumbnail . get ( ' height ' ) ) ,
' width ' : int_or_none ( thumbnail . get ( ' width ' ) ) ,
} )
2021-05-18 15:45:11 +02:00
thumbnail_url = search_meta ( [ ' og:image ' , ' twitter:image ' ] )
if thumbnail_url :
thumbnails . append ( {
' url ' : thumbnail_url ,
} )
2021-10-24 11:25:28 +02:00
original_thumbnails = thumbnails . copy ( )
2021-07-15 19:19:59 +02:00
# The best resolution thumbnails sometimes does not appear in the webpage
# See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2021-07-20 00:10:35 +02:00
# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2021-10-18 11:53:42 +02:00
thumbnail_names = [
' maxresdefault ' , ' hq720 ' , ' sddefault ' , ' sd1 ' , ' sd2 ' , ' sd3 ' ,
2021-07-20 00:10:35 +02:00
' hqdefault ' , ' hq1 ' , ' hq2 ' , ' hq3 ' , ' 0 ' ,
' mqdefault ' , ' mq1 ' , ' mq2 ' , ' mq3 ' ,
' default ' , ' 1 ' , ' 2 ' , ' 3 '
]
n_thumbnail_names = len ( thumbnail_names )
2021-07-15 19:19:59 +02:00
thumbnails . extend ( {
' url ' : ' https://i.ytimg.com/vi {webp} / {video_id} / {name} {live} . {ext} ' . format (
video_id = video_id , name = name , ext = ext ,
webp = ' _webp ' if ext == ' webp ' else ' ' , live = ' _live ' if is_live else ' ' ) ,
2021-07-20 00:10:35 +02:00
} for name in thumbnail_names for ext in ( ' webp ' , ' jpg ' ) )
2021-07-15 19:19:59 +02:00
for thumb in thumbnails :
2021-07-20 00:10:35 +02:00
i = next ( ( i for i , t in enumerate ( thumbnail_names ) if f ' / { video_id } / { t } ' in thumb [ ' url ' ] ) , n_thumbnail_names )
2021-07-15 19:19:59 +02:00
thumb [ ' preference ' ] = ( 0 if ' .webp ' in thumb [ ' url ' ] else - 1 ) - ( 2 * i )
2021-05-18 15:45:11 +02:00
self . _remove_duplicate_formats ( thumbnails )
2021-10-24 11:25:28 +02:00
self . _downloader . _sort_thumbnails ( original_thumbnails )
2021-02-04 15:37:17 +01:00
2021-07-21 17:15:45 +02:00
category = get_first ( microformats , ' category ' ) or search_meta ( ' genre ' )
channel_id = str_or_none (
get_first ( video_details , ' channelId ' )
or get_first ( microformats , ' externalChannelId ' )
or search_meta ( ' channelId ' ) )
duration = int_or_none (
get_first ( video_details , ' lengthSeconds ' )
or get_first ( microformats , ' lengthSeconds ' )
or parse_duration ( search_meta ( ' duration ' ) ) ) or None
owner_profile_url = get_first ( microformats , ' ownerProfileUrl ' )
live_content = get_first ( video_details , ' isLiveContent ' )
is_upcoming = get_first ( video_details , ' isUpcoming ' )
if is_live is None :
if is_upcoming or live_content is False :
is_live = False
if is_upcoming is None and ( live_content or is_live ) :
is_upcoming = False
live_starttime = parse_iso8601 ( get_first ( live_broadcast_details , ' startTimestamp ' ) )
live_endtime = parse_iso8601 ( get_first ( live_broadcast_details , ' endTimestamp ' ) )
if not duration and live_endtime and live_starttime :
duration = live_endtime - live_starttime
2021-11-16 20:56:23 +01:00
formats . extend ( self . _extract_storyboard ( player_responses , duration ) )
# Source is given priority since formats that throttle are given lower source_preference
# When throttling issue is fully fixed, remove this
self . _sort_formats ( formats , ( ' quality ' , ' res ' , ' fps ' , ' hdr:12 ' , ' source ' , ' codec:vp9.2 ' , ' lang ' , ' proto ' ) )
2021-02-04 15:37:17 +01:00
info = {
' id ' : video_id ,
' title ' : self . _live_title ( video_title ) if is_live else video_title ,
' formats ' : formats ,
' thumbnails ' : thumbnails ,
2021-10-24 11:25:28 +02:00
# The best thumbnail that we are sure exists. Prevents unnecessary
# URL checking if user don't care about getting the best possible thumbnail
' thumbnail ' : traverse_obj ( original_thumbnails , ( - 1 , ' url ' ) ) ,
2021-02-04 15:37:17 +01:00
' description ' : video_description ,
' upload_date ' : unified_strdate (
2021-07-21 05:52:34 +02:00
get_first ( microformats , ' uploadDate ' )
2021-02-04 15:37:17 +01:00
or search_meta ( ' uploadDate ' ) ) ,
2021-07-21 05:52:34 +02:00
' uploader ' : get_first ( video_details , ' author ' ) ,
2021-02-04 15:37:17 +01:00
' uploader_id ' : self . _search_regex ( r ' /(?:channel|user)/([^/?&#]+) ' , owner_profile_url , ' uploader id ' ) if owner_profile_url else None ,
' uploader_url ' : owner_profile_url ,
' channel_id ' : channel_id ,
2021-07-21 05:52:34 +02:00
' channel_url ' : f ' https://www.youtube.com/channel/ { channel_id } ' if channel_id else None ,
2021-02-04 15:37:17 +01:00
' duration ' : duration ,
' view_count ' : int_or_none (
2021-07-21 05:52:34 +02:00
get_first ( ( video_details , microformats ) , ( . . . , ' viewCount ' ) )
2021-02-04 15:37:17 +01:00
or search_meta ( ' interactionCount ' ) ) ,
2021-07-21 05:52:34 +02:00
' average_rating ' : float_or_none ( get_first ( video_details , ' averageRating ' ) ) ,
2021-02-04 15:37:17 +01:00
' age_limit ' : 18 if (
2021-07-21 05:52:34 +02:00
get_first ( microformats , ' isFamilySafe ' ) is False
2021-02-04 15:37:17 +01:00
or search_meta ( ' isFamilyFriendly ' ) == ' false '
or search_meta ( ' og:restrictions:age ' ) == ' 18+ ' ) else 0 ,
' webpage_url ' : webpage_url ,
' categories ' : [ category ] if category else None ,
' tags ' : keywords ,
2021-07-21 05:52:34 +02:00
' playable_in_embed ' : get_first ( playability_statuses , ' playableInEmbed ' ) ,
2021-07-21 17:15:45 +02:00
' is_live ' : is_live ,
' was_live ' : ( False if is_live or is_upcoming or live_content is False
else None if is_live is None or is_upcoming is None
else live_content ) ,
' live_status ' : ' is_upcoming ' if is_upcoming else None , # rest will be set by YoutubeDL
' release_timestamp ' : live_starttime ,
2021-02-04 15:37:17 +01:00
}
2020-06-15 21:29:04 +02:00
2021-07-23 06:07:15 +02:00
pctr = traverse_obj ( player_responses , ( . . . , ' captions ' , ' playerCaptionsTracklistRenderer ' ) , expected_type = dict )
2021-02-04 15:37:17 +01:00
if pctr :
2021-10-12 08:33:56 +02:00
def get_lang_code ( track ) :
return ( remove_start ( track . get ( ' vssId ' ) or ' ' , ' . ' ) . replace ( ' . ' , ' - ' )
or track . get ( ' languageCode ' ) )
# Converted into dicts to remove duplicates
captions = {
get_lang_code ( sub ) : sub
for sub in traverse_obj ( pctr , ( . . . , ' captionTracks ' , . . . ) , default = [ ] ) }
translation_languages = {
lang . get ( ' languageCode ' ) : self . _get_text ( lang . get ( ' languageName ' ) , max_runs = 1 )
for lang in traverse_obj ( pctr , ( . . . , ' translationLanguages ' , . . . ) , default = [ ] ) }
2021-05-13 13:19:40 +02:00
def process_language ( container , base_url , lang_code , sub_name , query ) :
2021-05-12 21:20:02 +02:00
lang_subs = container . setdefault ( lang_code , [ ] )
2021-02-04 15:37:17 +01:00
for fmt in self . _SUBTITLE_FORMATS :
query . update ( {
' fmt ' : fmt ,
} )
lang_subs . append ( {
' ext ' : fmt ,
' url ' : update_url_query ( base_url , query ) ,
2021-05-13 13:19:40 +02:00
' name ' : sub_name ,
2021-02-04 15:37:17 +01:00
} )
2018-06-02 21:08:38 +02:00
2021-10-12 08:33:56 +02:00
subtitles , automatic_captions = { } , { }
for lang_code , caption_track in captions . items ( ) :
base_url = caption_track . get ( ' baseUrl ' )
2021-02-04 15:37:17 +01:00
if not base_url :
continue
2021-10-12 08:33:56 +02:00
lang_name = self . _get_text ( caption_track , ' name ' , max_runs = 1 )
2021-02-04 15:37:17 +01:00
if caption_track . get ( ' kind ' ) != ' asr ' :
if not lang_code :
continue
process_language (
2021-10-12 08:33:56 +02:00
subtitles , base_url , lang_code , lang_name , { } )
if not caption_track . get ( ' isTranslatable ' ) :
continue
2021-07-23 06:07:15 +02:00
for trans_code , trans_name in translation_languages . items ( ) :
if not trans_code :
2021-02-04 15:37:17 +01:00
continue
2021-10-12 08:33:56 +02:00
if caption_track . get ( ' kind ' ) != ' asr ' :
trans_code + = f ' - { lang_code } '
trans_name + = format_field ( lang_name , template = ' from %s ' )
2021-02-04 15:37:17 +01:00
process_language (
2021-10-12 08:33:56 +02:00
automatic_captions , base_url , trans_code , trans_name , { ' tlang ' : trans_code } )
info [ ' automatic_captions ' ] = automatic_captions
info [ ' subtitles ' ] = subtitles
2018-06-02 21:08:38 +02:00
2021-02-04 15:37:17 +01:00
parsed_url = compat_urllib_parse_urlparse ( url )
for component in [ parsed_url . fragment , parsed_url . query ] :
query = compat_parse_qs ( component )
for k , v in query . items ( ) :
for d_k , s_ks in [ ( ' start ' , ( ' start ' , ' t ' ) ) , ( ' end ' , ( ' end ' , ) ) ] :
d_k + = ' _time '
if d_k not in info and k in s_ks :
info [ d_k ] = parse_duration ( query [ k ] [ 0 ] )
2019-04-27 10:16:17 +02:00
# Youtube Music Auto-generated description
if video_description :
2020-11-26 18:27:34 +01:00
mobj = re . search ( r ' (?s)(?P<track>[^· \ n]+)·(?P<artist>[^ \ n]+) \ n+(?P<album>[^ \ n]+)(?:.+?℗ \ s*(?P<release_year> \ d {4} )(?! \ d))?(?:.+?Released on \ s*: \ s*(?P<release_date> \ d {4} - \ d {2} - \ d {2} ))?(.+? \ nArtist \ s*: \ s*(?P<clean_artist>[^ \ n]+))?.+ \ nAuto-generated by YouTube \ . \ s*$ ' , video_description )
2019-04-27 10:16:17 +02:00
if mobj :
release_year = mobj . group ( ' release_year ' )
release_date = mobj . group ( ' release_date ' )
if release_date :
release_date = release_date . replace ( ' - ' , ' ' )
if not release_year :
2021-02-04 15:37:17 +01:00
release_year = release_date [ : 4 ]
info . update ( {
' album ' : mobj . group ( ' album ' . strip ( ) ) ,
' artist ' : mobj . group ( ' clean_artist ' ) or ' , ' . join ( a . strip ( ) for a in mobj . group ( ' artist ' ) . split ( ' · ' ) ) ,
' track ' : mobj . group ( ' track ' ) . strip ( ) ,
' release_date ' : release_date ,
2021-02-10 22:22:55 +01:00
' release_year ' : int_or_none ( release_year ) ,
2021-02-04 15:37:17 +01:00
} )
2018-06-02 21:08:38 +02:00
2021-02-04 15:37:17 +01:00
initial_data = None
if webpage :
initial_data = self . _extract_yt_initial_variable (
webpage , self . _YT_INITIAL_DATA_RE , video_id ,
' yt initial data ' )
if not initial_data :
2021-09-24 02:52:17 +02:00
query = { ' videoId ' : video_id }
query . update ( self . _get_checkok_params ( ) )
2021-06-30 00:07:49 +02:00
initial_data = self . _extract_response (
item_id = video_id , ep = ' next ' , fatal = False ,
2021-09-24 02:52:17 +02:00
ytcfg = master_ytcfg , query = query ,
headers = self . generate_api_headers ( ytcfg = master_ytcfg ) ,
2021-06-30 00:07:49 +02:00
note = ' Downloading initial data API JSON ' )
2021-02-04 15:37:17 +01:00
2021-06-23 02:12:39 +02:00
try :
# This will error if there is no livechat
initial_data [ ' contents ' ] [ ' twoColumnWatchNextResults ' ] [ ' conversationBar ' ] [ ' liveChatRenderer ' ] [ ' continuations ' ] [ 0 ] [ ' reloadContinuationData ' ] [ ' continuation ' ]
2021-10-12 08:33:56 +02:00
info . setdefault ( ' subtitles ' , { } ) [ ' live_chat ' ] = [ {
2021-06-23 02:12:39 +02:00
' url ' : ' https://www.youtube.com/watch?v= %s ' % video_id , # url is needed to set cookies
' video_id ' : video_id ,
' ext ' : ' json ' ,
2021-07-02 02:29:29 +02:00
' protocol ' : ' youtube_live_chat ' if is_live or is_upcoming else ' youtube_live_chat_replay ' ,
2021-06-23 02:12:39 +02:00
} ]
except ( KeyError , IndexError , TypeError ) :
pass
2021-02-04 15:37:17 +01:00
if initial_data :
2021-07-20 02:02:41 +02:00
info [ ' chapters ' ] = (
self . _extract_chapters_from_json ( initial_data , duration )
or self . _extract_chapters_from_engagement_panel ( initial_data , duration )
or None )
2021-02-04 15:37:17 +01:00
contents = try_get (
initial_data ,
lambda x : x [ ' contents ' ] [ ' twoColumnWatchNextResults ' ] [ ' results ' ] [ ' results ' ] [ ' contents ' ] ,
list ) or [ ]
for content in contents :
vpir = content . get ( ' videoPrimaryInfoRenderer ' )
if vpir :
stl = vpir . get ( ' superTitleLink ' )
if stl :
2021-07-19 06:55:07 +02:00
stl = self . _get_text ( stl )
2021-02-04 15:37:17 +01:00
if try_get (
vpir ,
lambda x : x [ ' superTitleIcon ' ] [ ' iconType ' ] ) == ' LOCATION_PIN ' :
info [ ' location ' ] = stl
else :
mobj = re . search ( r ' (.+?) \ s*S( \ d+) \ s*• \ s*E( \ d+) ' , stl )
if mobj :
info . update ( {
' series ' : mobj . group ( 1 ) ,
' season_number ' : int ( mobj . group ( 2 ) ) ,
' episode_number ' : int ( mobj . group ( 3 ) ) ,
} )
for tlb in ( try_get (
vpir ,
lambda x : x [ ' videoActions ' ] [ ' menuRenderer ' ] [ ' topLevelButtons ' ] ,
list ) or [ ] ) :
tbr = tlb . get ( ' toggleButtonRenderer ' ) or { }
for getter , regex in [ (
lambda x : x [ ' defaultText ' ] [ ' accessibility ' ] [ ' accessibilityData ' ] ,
r ' (?P<count>[ \ d,]+) \ s*(?P<type>(?:dis)?like) ' ) , ( [
lambda x : x [ ' accessibility ' ] ,
lambda x : x [ ' accessibilityData ' ] [ ' accessibilityData ' ] ,
] , r ' (?P<type>(?:dis)?like) this video along with (?P<count>[ \ d,]+) other people ' ) ] :
label = ( try_get ( tbr , getter , dict ) or { } ) . get ( ' label ' )
if label :
mobj = re . match ( regex , label )
if mobj :
info [ mobj . group ( ' type ' ) + ' _count ' ] = str_to_int ( mobj . group ( ' count ' ) )
break
sbr_tooltip = try_get (
vpir , lambda x : x [ ' sentimentBar ' ] [ ' sentimentBarRenderer ' ] [ ' tooltip ' ] )
if sbr_tooltip :
like_count , dislike_count = sbr_tooltip . split ( ' / ' )
info . update ( {
' like_count ' : str_to_int ( like_count ) ,
' dislike_count ' : str_to_int ( dislike_count ) ,
} )
vsir = content . get ( ' videoSecondaryInfoRenderer ' )
if vsir :
2021-07-24 02:46:46 +02:00
info [ ' channel ' ] = self . _get_text ( vsir , ( ' owner ' , ' videoOwnerRenderer ' , ' title ' ) )
2021-02-04 15:37:17 +01:00
rows = try_get (
vsir ,
lambda x : x [ ' metadataRowContainer ' ] [ ' metadataRowContainerRenderer ' ] [ ' rows ' ] ,
list ) or [ ]
multiple_songs = False
for row in rows :
if try_get ( row , lambda x : x [ ' metadataRowRenderer ' ] [ ' hasDividerLine ' ] ) is True :
multiple_songs = True
break
for row in rows :
mrr = row . get ( ' metadataRowRenderer ' ) or { }
mrr_title = mrr . get ( ' title ' )
if not mrr_title :
continue
2021-07-24 02:46:46 +02:00
mrr_title = self . _get_text ( mrr , ' title ' )
mrr_contents_text = self . _get_text ( mrr , ( ' contents ' , 0 ) )
2021-02-04 15:37:17 +01:00
if mrr_title == ' License ' :
info [ ' license ' ] = mrr_contents_text
elif not multiple_songs :
if mrr_title == ' Album ' :
info [ ' album ' ] = mrr_contents_text
elif mrr_title == ' Artist ' :
info [ ' artist ' ] = mrr_contents_text
elif mrr_title == ' Song ' :
info [ ' track ' ] = mrr_contents_text
fallbacks = {
' channel ' : ' uploader ' ,
' channel_id ' : ' uploader_id ' ,
' channel_url ' : ' uploader_url ' ,
}
for to , frm in fallbacks . items ( ) :
if not info . get ( to ) :
info [ to ] = info . get ( frm )
for s_k , d_k in [ ( ' artist ' , ' creator ' ) , ( ' track ' , ' alt_title ' ) ] :
v = info . get ( s_k )
if v :
info [ d_k ] = v
2020-09-15 17:16:58 +02:00
2021-07-21 05:52:34 +02:00
is_private = get_first ( video_details , ' isPrivate ' , expected_type = bool )
is_unlisted = get_first ( microformats , ' isUnlisted ' , expected_type = bool )
2021-03-21 22:23:34 +01:00
is_membersonly = None
2021-04-03 10:50:58 +02:00
is_premium = None
2021-03-21 22:23:34 +01:00
if initial_data and is_private is not None :
is_membersonly = False
2021-04-03 10:50:58 +02:00
is_premium = False
2021-07-15 04:42:30 +02:00
contents = try_get ( initial_data , lambda x : x [ ' contents ' ] [ ' twoColumnWatchNextResults ' ] [ ' results ' ] [ ' results ' ] [ ' contents ' ] , list ) or [ ]
badge_labels = set ( )
for content in contents :
if not isinstance ( content , dict ) :
continue
badge_labels . update ( self . _extract_badges ( content . get ( ' videoPrimaryInfoRenderer ' ) ) )
for badge_label in badge_labels :
if badge_label . lower ( ) == ' members only ' :
is_membersonly = True
elif badge_label . lower ( ) == ' premium ' :
is_premium = True
elif badge_label . lower ( ) == ' unlisted ' :
is_unlisted = True
2021-03-21 22:23:34 +01:00
info [ ' availability ' ] = self . _availability (
is_private = is_private ,
2021-04-03 10:50:58 +02:00
needs_premium = is_premium ,
2021-03-21 22:23:34 +01:00
needs_subscription = is_membersonly ,
needs_auth = info [ ' age_limit ' ] > = 18 ,
is_unlisted = None if is_private is None else is_unlisted )
2021-10-12 11:50:50 +02:00
info [ ' __post_extractor ' ] = self . extract_comments ( master_ytcfg , video_id , contents , webpage )
2013-12-18 03:30:55 +01:00
2021-07-21 05:52:34 +02:00
self . mark_watched ( video_id , player_responses )
2016-02-29 20:01:33 +01:00
2021-02-04 15:37:17 +01:00
return info
2013-06-23 19:58:33 +02:00
2021-11-24 04:57:59 +01:00
2021-11-24 04:01:52 +01:00
class YoutubeTabBaseInfoExtractor ( YoutubeBaseInfoExtractor ) :
2014-11-23 20:41:03 +01:00
2021-11-24 04:01:52 +01:00
def _extract_channel_id ( self , webpage ) :
channel_id = self . _html_search_meta (
' channelId ' , webpage , ' channel id ' , default = None )
if channel_id :
return channel_id
channel_url = self . _html_search_meta (
( ' og:url ' , ' al:ios:url ' , ' al:android:url ' , ' al:web:url ' ,
' twitter:url ' , ' twitter:app:url:iphone ' , ' twitter:app:url:ipad ' ,
' twitter:app:url:googleplay ' ) , webpage , ' channel url ' )
return self . _search_regex (
r ' https?://(?:www \ .)?youtube \ .com/channel/([^/?#&])+ ' ,
channel_url , ' channel id ' )
2020-11-03 16:15:16 +01:00
2020-11-19 20:22:59 +01:00
@staticmethod
2021-03-04 19:05:26 +01:00
def _extract_basic_item_renderer ( item ) :
# Modified from _extract_grid_item_renderer
2021-04-17 05:02:33 +02:00
known_basic_renderers = (
' playlistRenderer ' , ' videoRenderer ' , ' channelRenderer ' , ' showRenderer '
2021-03-04 19:05:26 +01:00
)
for key , renderer in item . items ( ) :
2021-04-17 05:02:33 +02:00
if not isinstance ( renderer , dict ) :
2021-03-04 19:05:26 +01:00
continue
2021-04-17 05:02:33 +02:00
elif key in known_basic_renderers :
return renderer
elif key . startswith ( ' grid ' ) and key . endswith ( ' Renderer ' ) :
return renderer
2020-11-19 20:22:59 +01:00
def _grid_entries ( self , grid_renderer ) :
for item in grid_renderer [ ' items ' ] :
if not isinstance ( item , dict ) :
2015-04-28 17:07:56 +02:00
continue
2021-03-04 19:05:26 +01:00
renderer = self . _extract_basic_item_renderer ( item )
2020-11-19 20:22:59 +01:00
if not isinstance ( renderer , dict ) :
continue
2021-07-24 02:46:46 +02:00
title = self . _get_text ( renderer , ' title ' )
2021-07-19 06:55:07 +02:00
2020-11-19 20:22:59 +01:00
# playlist
playlist_id = renderer . get ( ' playlistId ' )
if playlist_id :
yield self . url_result (
' https://www.youtube.com/playlist?list= %s ' % playlist_id ,
ie = YoutubeTabIE . ie_key ( ) , video_id = playlist_id ,
video_title = title )
2021-04-17 05:02:33 +02:00
continue
2020-11-19 20:22:59 +01:00
# video
video_id = renderer . get ( ' videoId ' )
if video_id :
yield self . _extract_video ( renderer )
2021-04-17 05:02:33 +02:00
continue
2020-11-19 20:22:59 +01:00
# channel
channel_id = renderer . get ( ' channelId ' )
if channel_id :
yield self . url_result (
' https://www.youtube.com/channel/ %s ' % channel_id ,
ie = YoutubeTabIE . ie_key ( ) , video_title = title )
2021-04-17 05:02:33 +02:00
continue
# generic endpoint URL support
ep_url = urljoin ( ' https://www.youtube.com/ ' , try_get (
renderer , lambda x : x [ ' navigationEndpoint ' ] [ ' commandMetadata ' ] [ ' webCommandMetadata ' ] [ ' url ' ] ,
compat_str ) )
if ep_url :
for ie in ( YoutubeTabIE , YoutubePlaylistIE , YoutubeIE ) :
if ie . suitable ( ep_url ) :
yield self . url_result (
ep_url , ie = ie . ie_key ( ) , video_id = ie . _match_id ( ep_url ) , video_title = title )
break
2020-11-19 20:22:59 +01:00
2020-11-23 22:59:10 +01:00
def _shelf_entries_from_content ( self , shelf_renderer ) :
content = shelf_renderer . get ( ' content ' )
if not isinstance ( content , dict ) :
2020-11-19 20:22:59 +01:00
return
2021-03-04 19:05:26 +01:00
renderer = content . get ( ' gridRenderer ' ) or content . get ( ' expandedShelfContentsRenderer ' )
2020-11-23 22:59:10 +01:00
if renderer :
# TODO: add support for nested playlists so each shelf is processed
# as separate playlist
# TODO: this includes only first N items
for entry in self . _grid_entries ( renderer ) :
yield entry
renderer = content . get ( ' horizontalListRenderer ' )
if renderer :
# TODO
pass
2020-11-19 20:22:59 +01:00
2021-01-01 13:26:37 +01:00
def _shelf_entries ( self , shelf_renderer , skip_channels = False ) :
2020-11-19 20:22:59 +01:00
ep = try_get (
shelf_renderer , lambda x : x [ ' endpoint ' ] [ ' commandMetadata ' ] [ ' webCommandMetadata ' ] [ ' url ' ] ,
compat_str )
shelf_url = urljoin ( ' https://www.youtube.com ' , ep )
2020-11-23 22:59:10 +01:00
if shelf_url :
2021-01-01 13:26:37 +01:00
# Skipping links to another channels, note that checking for
# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
# will not work
if skip_channels and ' /channels? ' in shelf_url :
return
2021-07-24 02:46:46 +02:00
title = self . _get_text ( shelf_renderer , ' title ' )
2020-11-23 22:59:10 +01:00
yield self . url_result ( shelf_url , video_title = title )
# Shelf may not contain shelf URL, fallback to extraction from content
for entry in self . _shelf_entries_from_content ( shelf_renderer ) :
yield entry
2013-06-23 19:58:33 +02:00
2020-11-19 20:22:59 +01:00
def _playlist_entries ( self , video_list_renderer ) :
for content in video_list_renderer [ ' contents ' ] :
if not isinstance ( content , dict ) :
continue
renderer = content . get ( ' playlistVideoRenderer ' ) or content . get ( ' playlistPanelVideoRenderer ' )
if not isinstance ( renderer , dict ) :
continue
video_id = renderer . get ( ' videoId ' )
if not video_id :
continue
yield self . _extract_video ( renderer )
2017-12-18 21:51:28 +01:00
2020-11-21 23:21:09 +01:00
def _rich_entries ( self , rich_grid_renderer ) :
renderer = try_get (
2020-11-23 22:47:42 +01:00
rich_grid_renderer , lambda x : x [ ' content ' ] [ ' videoRenderer ' ] , dict ) or { }
2020-11-21 23:21:09 +01:00
video_id = renderer . get ( ' videoId ' )
if not video_id :
return
yield self . _extract_video ( renderer )
2020-11-19 20:22:59 +01:00
def _video_entry ( self , video_renderer ) :
video_id = video_renderer . get ( ' videoId ' )
if video_id :
return self . _extract_video ( video_renderer )
2016-09-02 18:43:20 +02:00
2020-11-19 20:22:59 +01:00
def _post_thread_entries ( self , post_thread_renderer ) :
post_renderer = try_get (
post_thread_renderer , lambda x : x [ ' post ' ] [ ' backstagePostRenderer ' ] , dict )
if not post_renderer :
return
# video attachment
video_renderer = try_get (
2021-04-27 08:03:58 +02:00
post_renderer , lambda x : x [ ' backstageAttachment ' ] [ ' videoRenderer ' ] , dict ) or { }
video_id = video_renderer . get ( ' videoId ' )
if video_id :
entry = self . _extract_video ( video_renderer )
2020-11-19 20:22:59 +01:00
if entry :
yield entry
2021-04-27 08:03:58 +02:00
# playlist attachment
playlist_id = try_get (
post_renderer , lambda x : x [ ' backstageAttachment ' ] [ ' playlistRenderer ' ] [ ' playlistId ' ] , compat_str )
if playlist_id :
yield self . url_result (
2021-04-28 16:29:40 +02:00
' https://www.youtube.com/playlist?list= %s ' % playlist_id ,
ie = YoutubeTabIE . ie_key ( ) , video_id = playlist_id )
2020-11-19 20:22:59 +01:00
# inline video links
runs = try_get ( post_renderer , lambda x : x [ ' contentText ' ] [ ' runs ' ] , list ) or [ ]
for run in runs :
if not isinstance ( run , dict ) :
continue
ep_url = try_get (
run , lambda x : x [ ' navigationEndpoint ' ] [ ' urlEndpoint ' ] [ ' url ' ] , compat_str )
if not ep_url :
continue
if not YoutubeIE . suitable ( ep_url ) :
continue
ep_video_id = YoutubeIE . _match_id ( ep_url )
if video_id == ep_video_id :
continue
2021-04-27 08:03:58 +02:00
yield self . url_result ( ep_url , ie = YoutubeIE . ie_key ( ) , video_id = ep_video_id )
2016-09-02 18:43:20 +02:00
2020-11-19 20:22:59 +01:00
def _post_thread_continuation_entries ( self , post_thread_continuation ) :
contents = post_thread_continuation . get ( ' contents ' )
if not isinstance ( contents , list ) :
return
for content in contents :
renderer = content . get ( ' backstagePostThreadRenderer ' )
if not isinstance ( renderer , dict ) :
continue
for entry in self . _post_thread_entries ( renderer ) :
yield entry
2017-12-18 21:51:28 +01:00
2021-04-04 00:05:17 +02:00
r ''' # unused
def _rich_grid_entries ( self , contents ) :
for content in contents :
video_renderer = try_get ( content , lambda x : x [ ' richItemRenderer ' ] [ ' content ' ] [ ' videoRenderer ' ] , dict )
if video_renderer :
entry = self . _video_entry ( video_renderer )
if entry :
yield entry
'''
2021-11-24 04:01:52 +01:00
def _extract_entries ( self , parent_renderer , continuation_list ) :
# continuation_list is modified in-place with continuation_list = [continuation_token]
continuation_list [ : ] = [ None ]
contents = try_get ( parent_renderer , lambda x : x [ ' contents ' ] , list ) or [ ]
for content in contents :
if not isinstance ( content , dict ) :
continue
is_renderer = try_get ( content , lambda x : x [ ' itemSectionRenderer ' ] , dict )
if not is_renderer :
renderer = content . get ( ' richItemRenderer ' )
if renderer :
for entry in self . _rich_entries ( renderer ) :
yield entry
continuation_list [ 0 ] = self . _extract_continuation ( parent_renderer )
continue
isr_contents = try_get ( is_renderer , lambda x : x [ ' contents ' ] , list ) or [ ]
for isr_content in isr_contents :
if not isinstance ( isr_content , dict ) :
2020-11-19 20:22:59 +01:00
continue
2021-02-09 17:07:59 +01:00
2021-11-24 04:01:52 +01:00
known_renderers = {
' playlistVideoListRenderer ' : self . _playlist_entries ,
' gridRenderer ' : self . _grid_entries ,
' shelfRenderer ' : lambda x : self . _shelf_entries ( x ) ,
' backstagePostThreadRenderer ' : self . _post_thread_entries ,
' videoRenderer ' : lambda x : [ self . _video_entry ( x ) ] ,
2021-11-24 04:57:59 +01:00
' playlistRenderer ' : lambda x : self . _grid_entries ( { ' items ' : [ { ' playlistRenderer ' : x } ] } ) ,
' channelRenderer ' : lambda x : self . _grid_entries ( { ' items ' : [ { ' channelRenderer ' : x } ] } ) ,
2021-11-24 04:01:52 +01:00
}
for key , renderer in isr_content . items ( ) :
if key not in known_renderers :
continue
for entry in known_renderers [ key ] ( renderer ) :
if entry :
yield entry
continuation_list [ 0 ] = self . _extract_continuation ( renderer )
break
2020-11-23 22:47:42 +01:00
if not continuation_list [ 0 ] :
2021-11-24 04:01:52 +01:00
continuation_list [ 0 ] = self . _extract_continuation ( is_renderer )
2020-11-21 23:21:09 +01:00
2021-11-24 04:01:52 +01:00
if not continuation_list [ 0 ] :
continuation_list [ 0 ] = self . _extract_continuation ( parent_renderer )
def _entries ( self , tab , item_id , ytcfg , account_syncid , visitor_data ) :
continuation_list = [ None ]
extract_entries = lambda x : self . _extract_entries ( x , continuation_list )
2021-01-01 13:26:37 +01:00
tab_content = try_get ( tab , lambda x : x [ ' content ' ] , dict )
if not tab_content :
return
2020-11-21 23:21:09 +01:00
parent_renderer = (
2021-01-01 13:26:37 +01:00
try_get ( tab_content , lambda x : x [ ' sectionListRenderer ' ] , dict )
or try_get ( tab_content , lambda x : x [ ' richGridRenderer ' ] , dict ) or { } )
2020-11-23 22:47:42 +01:00
for entry in extract_entries ( parent_renderer ) :
yield entry
2020-11-21 23:21:09 +01:00
continuation = continuation_list [ 0 ]
2021-03-05 14:59:14 +01:00
2020-11-19 20:22:59 +01:00
for page_num in itertools . count ( 1 ) :
if not continuation :
break
2021-09-24 02:52:17 +02:00
headers = self . generate_api_headers (
ytcfg = ytcfg , account_syncid = account_syncid , visitor_data = visitor_data )
2021-04-15 13:22:59 +02:00
response = self . _extract_response (
item_id = ' %s page %s ' % ( item_id , page_num ) ,
2021-07-19 06:55:07 +02:00
query = continuation , headers = headers , ytcfg = ytcfg ,
2021-04-15 13:22:59 +02:00
check_get_keys = ( ' continuationContents ' , ' onResponseReceivedActions ' , ' onResponseReceivedEndpoints ' ) )
2021-03-03 11:32:40 +01:00
if not response :
2020-11-19 20:22:59 +01:00
break
2021-10-08 23:19:25 +02:00
# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
# See: https://github.com/ytdl-org/youtube-dl/issues/28702
visitor_data = self . _extract_visitor_data ( response ) or visitor_data
2016-02-18 17:03:46 +01:00
2021-02-09 17:07:59 +01:00
known_continuation_renderers = {
' playlistVideoListContinuation ' : self . _playlist_entries ,
' gridContinuation ' : self . _grid_entries ,
' itemSectionContinuation ' : self . _post_thread_continuation_entries ,
' sectionListContinuation ' : extract_entries , # for feeds
}
2020-11-19 20:22:59 +01:00
continuation_contents = try_get (
2021-02-09 17:07:59 +01:00
response , lambda x : x [ ' continuationContents ' ] , dict ) or { }
continuation_renderer = None
for key , value in continuation_contents . items ( ) :
if key not in known_continuation_renderers :
2020-11-21 23:21:09 +01:00
continue
2021-02-09 17:07:59 +01:00
continuation_renderer = value
continuation_list = [ None ]
for entry in known_continuation_renderers [ key ] ( continuation_renderer ) :
yield entry
continuation = continuation_list [ 0 ] or self . _extract_continuation ( continuation_renderer )
break
if continuation_renderer :
continue
2013-06-23 19:58:33 +02:00
2021-02-09 16:02:12 +01:00
known_renderers = {
' gridPlaylistRenderer ' : ( self . _grid_entries , ' items ' ) ,
' gridVideoRenderer ' : ( self . _grid_entries , ' items ' ) ,
2021-07-20 19:51:04 +02:00
' gridChannelRenderer ' : ( self . _grid_entries , ' items ' ) ,
2021-02-09 16:02:12 +01:00
' playlistVideoRenderer ' : ( self . _playlist_entries , ' contents ' ) ,
2021-03-04 19:05:26 +01:00
' itemSectionRenderer ' : ( extract_entries , ' contents ' ) , # for feeds
2021-02-18 09:24:06 +01:00
' richItemRenderer ' : ( extract_entries , ' contents ' ) , # for hashtag
2021-03-07 06:27:45 +01:00
' backstagePostThreadRenderer ' : ( self . _post_thread_continuation_entries , ' contents ' )
2021-02-09 16:02:12 +01:00
}
2021-04-01 10:28:33 +02:00
on_response_received = dict_get ( response , ( ' onResponseReceivedActions ' , ' onResponseReceivedEndpoints ' ) )
2020-11-19 20:22:59 +01:00
continuation_items = try_get (
2021-04-01 10:28:33 +02:00
on_response_received , lambda x : x [ 0 ] [ ' appendContinuationItemsAction ' ] [ ' continuationItems ' ] , list )
2021-02-09 16:02:12 +01:00
continuation_item = try_get ( continuation_items , lambda x : x [ 0 ] , dict ) or { }
video_items_renderer = None
for key , value in continuation_item . items ( ) :
if key not in known_renderers :
2020-11-19 20:22:59 +01:00
continue
2021-02-09 16:02:12 +01:00
video_items_renderer = { known_renderers [ key ] [ 1 ] : continuation_items }
2021-02-18 09:24:06 +01:00
continuation_list = [ None ]
2021-02-09 16:02:12 +01:00
for entry in known_renderers [ key ] [ 0 ] ( video_items_renderer ) :
yield entry
2021-02-18 09:24:06 +01:00
continuation = continuation_list [ 0 ] or self . _extract_continuation ( video_items_renderer )
2021-02-09 16:02:12 +01:00
break
if video_items_renderer :
continue
2020-11-19 20:22:59 +01:00
break
2016-07-09 03:37:02 +02:00
2020-11-19 20:22:59 +01:00
@staticmethod
def _extract_selected_tab ( tabs ) :
for tab in tabs :
2021-05-18 16:00:21 +02:00
renderer = dict_get ( tab , ( ' tabRenderer ' , ' expandableTabRenderer ' ) ) or { }
if renderer . get ( ' selected ' ) is True :
return renderer
2015-09-14 00:32:20 +02:00
else :
2020-11-19 20:22:59 +01:00
raise ExtractorError ( ' Unable to find selected tab ' )
2014-12-06 14:02:19 +01:00
2021-07-15 04:42:30 +02:00
@classmethod
def _extract_uploader ( cls , data ) :
2020-11-19 20:22:59 +01:00
uploader = { }
2021-07-15 04:42:30 +02:00
renderer = cls . _extract_sidebar_info_renderer ( data , ' playlistSidebarSecondaryInfoRenderer ' ) or { }
owner = try_get (
renderer , lambda x : x [ ' videoOwner ' ] [ ' videoOwnerRenderer ' ] [ ' title ' ] [ ' runs ' ] [ 0 ] , dict )
if owner :
uploader [ ' uploader ' ] = owner . get ( ' text ' )
uploader [ ' uploader_id ' ] = try_get (
owner , lambda x : x [ ' navigationEndpoint ' ] [ ' browseEndpoint ' ] [ ' browseId ' ] , compat_str )
uploader [ ' uploader_url ' ] = urljoin (
' https://www.youtube.com/ ' ,
try_get ( owner , lambda x : x [ ' navigationEndpoint ' ] [ ' browseEndpoint ' ] [ ' canonicalBaseUrl ' ] , compat_str ) )
2021-02-02 21:52:27 +01:00
return { k : v for k , v in uploader . items ( ) if v is not None }
2020-11-19 20:22:59 +01:00
2021-10-08 23:19:25 +02:00
def _extract_from_tabs ( self , item_id , ytcfg , data , tabs ) :
2021-02-02 17:21:32 +01:00
playlist_id = title = description = channel_url = channel_name = channel_id = None
2021-10-08 23:19:25 +02:00
thumbnails_list = [ ]
tags = [ ]
2021-02-02 17:21:32 +01:00
2020-11-19 20:22:59 +01:00
selected_tab = self . _extract_selected_tab ( tabs )
renderer = try_get (
data , lambda x : x [ ' metadata ' ] [ ' channelMetadataRenderer ' ] , dict )
if renderer :
2021-02-02 17:21:32 +01:00
channel_name = renderer . get ( ' title ' )
channel_url = renderer . get ( ' channelUrl ' )
channel_id = renderer . get ( ' externalId ' )
2021-04-04 00:05:17 +02:00
else :
2021-01-28 20:01:50 +01:00
renderer = try_get (
data , lambda x : x [ ' metadata ' ] [ ' playlistMetadataRenderer ' ] , dict )
2021-04-04 00:05:17 +02:00
2020-11-19 20:22:59 +01:00
if renderer :
title = renderer . get ( ' title ' )
2021-02-07 15:44:44 +01:00
description = renderer . get ( ' description ' , ' ' )
2021-02-02 17:21:32 +01:00
playlist_id = channel_id
tags = renderer . get ( ' keywords ' , ' ' ) . split ( )
thumbnails_list = (
try_get ( renderer , lambda x : x [ ' avatar ' ] [ ' thumbnails ' ] , list )
2021-02-08 14:50:19 +01:00
or try_get (
2021-07-15 04:42:30 +02:00
self . _extract_sidebar_info_renderer ( data , ' playlistSidebarPrimaryInfoRenderer ' ) ,
lambda x : x [ ' thumbnailRenderer ' ] [ ' playlistVideoThumbnailRenderer ' ] [ ' thumbnail ' ] [ ' thumbnails ' ] ,
2021-02-08 14:50:19 +01:00
list )
2021-02-02 17:21:32 +01:00
or [ ] )
thumbnails = [ ]
for t in thumbnails_list :
if not isinstance ( t , dict ) :
continue
thumbnail_url = url_or_none ( t . get ( ' url ' ) )
if not thumbnail_url :
continue
thumbnails . append ( {
' url ' : thumbnail_url ,
' width ' : int_or_none ( t . get ( ' width ' ) ) ,
' height ' : int_or_none ( t . get ( ' height ' ) ) ,
} )
2020-11-21 23:21:09 +01:00
if playlist_id is None :
2020-11-23 22:47:42 +01:00
playlist_id = item_id
if title is None :
2021-04-04 00:05:17 +02:00
title = (
try_get ( data , lambda x : x [ ' header ' ] [ ' hashtagHeaderRenderer ' ] [ ' hashtag ' ] [ ' simpleText ' ] )
or playlist_id )
2021-02-02 17:21:32 +01:00
title + = format_field ( selected_tab , ' title ' , ' - %s ' )
2021-05-18 16:00:21 +02:00
title + = format_field ( selected_tab , ' expandedText ' , ' - %s ' )
2021-02-02 17:21:32 +01:00
metadata = {
' playlist_id ' : playlist_id ,
' playlist_title ' : title ,
' playlist_description ' : description ,
' uploader ' : channel_name ,
' uploader_id ' : channel_id ,
' uploader_url ' : channel_url ,
' thumbnails ' : thumbnails ,
' tags ' : tags ,
}
2021-07-15 04:42:30 +02:00
availability = self . _extract_availability ( data )
if availability :
metadata [ ' availability ' ] = availability
2021-02-02 17:21:32 +01:00
if not channel_id :
metadata . update ( self . _extract_uploader ( data ) )
metadata . update ( {
' channel ' : metadata [ ' uploader ' ] ,
' channel_id ' : metadata [ ' uploader_id ' ] ,
' channel_url ' : metadata [ ' uploader_url ' ] } )
return self . playlist_result (
2021-03-05 14:59:14 +01:00
self . _entries (
2021-10-08 23:19:25 +02:00
selected_tab , playlist_id , ytcfg ,
self . _extract_account_syncid ( ytcfg , data ) ,
self . _extract_visitor_data ( data , ytcfg ) ) ,
2021-02-02 17:21:32 +01:00
* * metadata )
2016-07-05 18:30:44 +02:00
2021-10-08 23:19:25 +02:00
def _extract_mix_playlist ( self , playlist , playlist_id , data , ytcfg ) :
first_id = last_id = response = None
2021-03-07 06:24:54 +01:00
for page_num in itertools . count ( 1 ) :
2021-03-04 19:05:26 +01:00
videos = list ( self . _playlist_entries ( playlist ) )
if not videos :
return
2021-03-07 06:24:54 +01:00
start = next ( ( i for i , v in enumerate ( videos ) if v [ ' id ' ] == last_id ) , - 1 ) + 1
if start > = len ( videos ) :
return
for video in videos [ start : ] :
if video [ ' id ' ] == first_id :
self . to_screen ( ' First video %s found again; Assuming end of Mix ' % first_id )
return
yield video
first_id = first_id or videos [ 0 ] [ ' id ' ]
last_id = videos [ - 1 ] [ ' id ' ]
2021-04-15 13:22:59 +02:00
watch_endpoint = try_get (
playlist , lambda x : x [ ' contents ' ] [ - 1 ] [ ' playlistPanelVideoRenderer ' ] [ ' navigationEndpoint ' ] [ ' watchEndpoint ' ] )
2021-10-08 23:19:25 +02:00
headers = self . generate_api_headers (
ytcfg = ytcfg , account_syncid = self . _extract_account_syncid ( ytcfg , data ) ,
visitor_data = self . _extract_visitor_data ( response , data , ytcfg ) )
2021-04-15 13:22:59 +02:00
query = {
' playlistId ' : playlist_id ,
' videoId ' : watch_endpoint . get ( ' videoId ' ) or last_id ,
' index ' : watch_endpoint . get ( ' index ' ) or len ( videos ) ,
' params ' : watch_endpoint . get ( ' params ' ) or ' OAE % 3D '
}
response = self . _extract_response (
item_id = ' %s page %d ' % ( playlist_id , page_num ) ,
2021-07-19 06:55:07 +02:00
query = query , ep = ' next ' , headers = headers , ytcfg = ytcfg ,
2021-04-15 13:22:59 +02:00
check_get_keys = ' contents '
)
2021-03-04 19:05:26 +01:00
playlist = try_get (
2021-04-15 13:22:59 +02:00
response , lambda x : x [ ' contents ' ] [ ' twoColumnWatchNextResults ' ] [ ' playlist ' ] [ ' playlist ' ] , dict )
2021-03-04 19:05:26 +01:00
2021-10-08 23:19:25 +02:00
def _extract_from_playlist ( self , item_id , url , data , playlist , ytcfg ) :
2020-11-19 20:22:59 +01:00
title = playlist . get ( ' title ' ) or try_get (
data , lambda x : x [ ' titleText ' ] [ ' simpleText ' ] , compat_str )
playlist_id = playlist . get ( ' playlistId ' ) or item_id
2021-03-04 19:05:26 +01:00
# Delegating everything except mix playlists to regular tab-based playlist URL
2021-01-01 13:26:37 +01:00
playlist_url = urljoin ( url , try_get (
playlist , lambda x : x [ ' endpoint ' ] [ ' commandMetadata ' ] [ ' webCommandMetadata ' ] [ ' url ' ] ,
compat_str ) )
if playlist_url and playlist_url != url :
return self . url_result (
playlist_url , ie = YoutubeTabIE . ie_key ( ) , video_id = playlist_id ,
video_title = title )
2021-03-04 19:05:26 +01:00
2020-11-19 20:22:59 +01:00
return self . playlist_result (
2021-10-08 23:19:25 +02:00
self . _extract_mix_playlist ( playlist , playlist_id , data , ytcfg ) ,
2021-03-04 19:05:26 +01:00
playlist_id = playlist_id , playlist_title = title )
2013-06-23 19:58:33 +02:00
2021-07-15 04:42:30 +02:00
def _extract_availability ( self , data ) :
"""
Gets the availability of a given playlist / tab .
Note : Unless YouTube tells us explicitly , we do not assume it is public
@param data : response
"""
is_private = is_unlisted = None
renderer = self . _extract_sidebar_info_renderer ( data , ' playlistSidebarPrimaryInfoRenderer ' ) or { }
badge_labels = self . _extract_badges ( renderer )
# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
privacy_dropdown_entries = try_get (
renderer , lambda x : x [ ' privacyForm ' ] [ ' dropdownFormFieldRenderer ' ] [ ' dropdown ' ] [ ' dropdownRenderer ' ] [ ' entries ' ] , list ) or [ ]
for renderer_dict in privacy_dropdown_entries :
is_selected = try_get (
renderer_dict , lambda x : x [ ' privacyDropdownItemRenderer ' ] [ ' isSelected ' ] , bool ) or False
if not is_selected :
continue
2021-07-24 02:46:46 +02:00
label = self . _get_text ( renderer_dict , ( ' privacyDropdownItemRenderer ' , ' label ' ) )
2021-07-15 04:42:30 +02:00
if label :
badge_labels . add ( label . lower ( ) )
break
for badge_label in badge_labels :
if badge_label == ' unlisted ' :
is_unlisted = True
elif badge_label == ' private ' :
is_private = True
elif badge_label == ' public ' :
is_unlisted = is_private = False
return self . _availability ( is_private , False , False , False , is_unlisted )
@staticmethod
def _extract_sidebar_info_renderer ( data , info_renderer , expected_type = dict ) :
sidebar_renderer = try_get (
data , lambda x : x [ ' sidebar ' ] [ ' playlistSidebarRenderer ' ] [ ' items ' ] , list ) or [ ]
for item in sidebar_renderer :
renderer = try_get ( item , lambda x : x [ info_renderer ] , expected_type )
if renderer :
return renderer
2021-10-08 23:19:25 +02:00
def _reload_with_unavailable_videos ( self , item_id , data , ytcfg ) :
2021-04-17 00:39:08 +02:00
"""
Get playlist with unavailable videos if the ' show unavailable videos ' button exists .
"""
2021-04-17 04:19:49 +02:00
browse_id = params = None
2021-07-15 04:42:30 +02:00
renderer = self . _extract_sidebar_info_renderer ( data , ' playlistSidebarPrimaryInfoRenderer ' )
if not renderer :
return
menu_renderer = try_get (
renderer , lambda x : x [ ' menu ' ] [ ' menuRenderer ' ] [ ' items ' ] , list ) or [ ]
for menu_item in menu_renderer :
if not isinstance ( menu_item , dict ) :
2021-04-17 00:39:08 +02:00
continue
2021-07-15 04:42:30 +02:00
nav_item_renderer = menu_item . get ( ' menuNavigationItemRenderer ' )
text = try_get (
nav_item_renderer , lambda x : x [ ' text ' ] [ ' simpleText ' ] , compat_str )
if not text or text . lower ( ) != ' show unavailable videos ' :
continue
browse_endpoint = try_get (
nav_item_renderer , lambda x : x [ ' navigationEndpoint ' ] [ ' browseEndpoint ' ] , dict ) or { }
browse_id = browse_endpoint . get ( ' browseId ' )
params = browse_endpoint . get ( ' params ' )
break
2021-04-17 04:19:49 +02:00
2021-07-21 05:52:34 +02:00
headers = self . generate_api_headers (
2021-09-24 02:52:17 +02:00
ytcfg = ytcfg , account_syncid = self . _extract_account_syncid ( ytcfg , data ) ,
2021-10-08 23:19:25 +02:00
visitor_data = self . _extract_visitor_data ( data , ytcfg ) )
2021-07-15 04:42:30 +02:00
query = {
' params ' : params or ' wgYCCAA= ' ,
' browseId ' : browse_id or ' VL %s ' % item_id
}
return self . _extract_response (
item_id = item_id , headers = headers , query = query ,
2021-07-19 06:55:07 +02:00
check_get_keys = ' contents ' , fatal = False , ytcfg = ytcfg ,
2021-07-15 04:42:30 +02:00
note = ' Downloading API JSON with unavailable videos ' )
2021-04-17 00:39:08 +02:00
2021-10-08 23:19:25 +02:00
def _extract_webpage ( self , url , item_id , fatal = True ) :
2021-05-17 14:23:08 +02:00
retries = self . get_param ( ' extractor_retries ' , 3 )
2021-03-01 00:48:37 +01:00
count = - 1
2021-10-08 23:19:25 +02:00
webpage = data = last_error = None
2021-02-25 22:52:44 +01:00
while count < retries :
2021-03-01 00:48:37 +01:00
count + = 1
2021-02-25 22:52:44 +01:00
# Sometimes youtube returns a webpage with incomplete ytInitialData
2021-03-01 00:48:37 +01:00
# See: https://github.com/yt-dlp/yt-dlp/issues/116
2021-10-08 23:19:25 +02:00
if last_error :
2021-03-02 22:23:19 +01:00
self . report_warning ( ' %s . Retrying ... ' % last_error )
2021-10-08 23:19:25 +02:00
try :
webpage = self . _download_webpage (
url , item_id ,
note = ' Downloading webpage %s ' % ( ' (retry # %d ) ' % count if count else ' ' , ) )
data = self . extract_yt_initial_data ( item_id , webpage or ' ' , fatal = fatal ) or { }
except ExtractorError as e :
if isinstance ( e . cause , network_exceptions ) :
if not isinstance ( e . cause , compat_HTTPError ) or e . cause . code not in ( 403 , 429 ) :
last_error = error_to_compat_str ( e . cause or e . msg )
if count < retries :
continue
if fatal :
raise
self . report_warning ( error_to_compat_str ( e ) )
2021-02-25 22:52:44 +01:00
break
2021-10-08 23:19:25 +02:00
else :
try :
self . _extract_and_report_alerts ( data )
except ExtractorError as e :
if fatal :
raise
self . report_warning ( error_to_compat_str ( e ) )
break
if dict_get ( data , ( ' contents ' , ' currentVideoEndpoint ' ) ) :
break
last_error = ' Incomplete yt initial data received '
if count > = retries :
if fatal :
raise ExtractorError ( last_error )
self . report_warning ( last_error )
break
2021-03-04 19:05:26 +01:00
return webpage , data
2021-10-08 23:19:25 +02:00
def _extract_data ( self , url , item_id , ytcfg = None , fatal = True , webpage_fatal = False , default_client = ' web ' ) :
data = None
if ' webpage ' not in self . _configuration_arg ( ' skip ' ) :
webpage , data = self . _extract_webpage ( url , item_id , fatal = webpage_fatal )
ytcfg = ytcfg or self . extract_ytcfg ( item_id , webpage )
if not data :
if not ytcfg and self . is_authenticated :
msg = ' Playlists that require authentication may not extract correctly without a successful webpage download. '
if ' authcheck ' not in self . _configuration_arg ( ' skip ' ) and fatal :
raise ExtractorError (
msg + ' If you are not downloading private content, or your cookies are only for the first account and channel, '
' pass " --extractor-args youtubetab:skip=authcheck " to skip this check ' ,
expected = True )
self . report_warning ( msg , only_once = True )
data = self . _extract_tab_endpoint ( url , item_id , ytcfg , fatal = fatal , default_client = default_client )
return data , ytcfg
def _extract_tab_endpoint ( self , url , item_id , ytcfg = None , fatal = True , default_client = ' web ' ) :
headers = self . generate_api_headers ( ytcfg = ytcfg , default_client = default_client )
resolve_response = self . _extract_response (
item_id = item_id , query = { ' url ' : url } , check_get_keys = ' endpoint ' , headers = headers , ytcfg = ytcfg , fatal = fatal ,
ep = ' navigation/resolve_url ' , note = ' Downloading API parameters API JSON ' , default_client = default_client )
endpoints = { ' browseEndpoint ' : ' browse ' , ' watchEndpoint ' : ' next ' }
for ep_key , ep in endpoints . items ( ) :
params = try_get ( resolve_response , lambda x : x [ ' endpoint ' ] [ ep_key ] , dict )
if params :
return self . _extract_response (
item_id = item_id , query = params , ep = ep , headers = headers ,
ytcfg = ytcfg , fatal = fatal , default_client = default_client ,
check_get_keys = ( ' contents ' , ' currentVideoEndpoint ' ) )
err_note = ' Failed to resolve url (does the playlist exist?) '
if fatal :
raise ExtractorError ( err_note , expected = True )
self . report_warning ( err_note , item_id )
2021-11-24 04:01:52 +01:00
@staticmethod
def _smuggle_data ( entries , data ) :
for entry in entries :
if data :
entry [ ' url ' ] = smuggle_url ( entry [ ' url ' ] , data )
yield entry
_SEARCH_PARAMS = None
def _search_results ( self , query , params = NO_DEFAULT ) :
data = { ' query ' : query }
if params is NO_DEFAULT :
params = self . _SEARCH_PARAMS
if params :
data [ ' params ' ] = params
2021-11-24 04:57:59 +01:00
continuation_list = [ None ]
2021-11-24 04:01:52 +01:00
for page_num in itertools . count ( 1 ) :
2021-11-24 04:57:59 +01:00
data . update ( continuation_list [ 0 ] or { } )
2021-11-24 04:01:52 +01:00
search = self . _extract_response (
item_id = ' query " %s " page %s ' % ( query , page_num ) , ep = ' search ' , query = data ,
2021-11-24 04:57:59 +01:00
check_get_keys = ( ' contents ' , ' onResponseReceivedCommands ' ) )
2021-11-24 04:01:52 +01:00
slr_contents = try_get (
search ,
( lambda x : x [ ' contents ' ] [ ' twoColumnSearchResultsRenderer ' ] [ ' primaryContents ' ] [ ' sectionListRenderer ' ] [ ' contents ' ] ,
lambda x : x [ ' onResponseReceivedCommands ' ] [ 0 ] [ ' appendContinuationItemsAction ' ] [ ' continuationItems ' ] ) ,
list )
2021-11-24 04:57:59 +01:00
yield from self . _extract_entries ( { ' contents ' : slr_contents } , continuation_list )
if not continuation_list [ 0 ] :
2021-11-24 04:01:52 +01:00
break
class YoutubeTabIE ( YoutubeTabBaseInfoExtractor ) :
IE_DESC = ' YouTube Tabs '
_VALID_URL = r ''' (?x:
https ? : / /
( ? : \w + \. ) ?
( ? :
youtube ( ? : kids ) ? \. com |
% ( invidious ) s
) /
( ? :
( ? P < channel_type > channel | c | user | browse ) / |
( ? P < not_channel >
feed / | hashtag / |
( ? : playlist | watch ) \? . * ? \blist =
) |
( ? ! ( ? : % ( reserved_names ) s ) \b ) # Direct URLs
)
( ? P < id > [ ^ / ? \#&]+)
) ''' % {
' reserved_names ' : YoutubeBaseInfoExtractor . _RESERVED_NAMES ,
' invidious ' : ' | ' . join ( YoutubeBaseInfoExtractor . _INVIDIOUS_SITES ) ,
}
IE_NAME = ' youtube:tab '
_TESTS = [ {
' note ' : ' playlists, multipage ' ,
' url ' : ' https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid ' ,
' playlist_mincount ' : 94 ,
' info_dict ' : {
' id ' : ' UCqj7Cz7revf5maW9g5pgNcg ' ,
' title ' : ' Игорь Клейнер - Playlists ' ,
' description ' : ' md5:be97ee0f14ee314f1f002cf187166ee2 ' ,
' uploader ' : ' Игорь Клейнер ' ,
' uploader_id ' : ' UCqj7Cz7revf5maW9g5pgNcg ' ,
} ,
} , {
' note ' : ' playlists, multipage, different order ' ,
' url ' : ' https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd ' ,
' playlist_mincount ' : 94 ,
' info_dict ' : {
' id ' : ' UCqj7Cz7revf5maW9g5pgNcg ' ,
' title ' : ' Игорь Клейнер - Playlists ' ,
' description ' : ' md5:be97ee0f14ee314f1f002cf187166ee2 ' ,
' uploader_id ' : ' UCqj7Cz7revf5maW9g5pgNcg ' ,
' uploader ' : ' Игорь Клейнер ' ,
} ,
} , {
' note ' : ' playlists, series ' ,
' url ' : ' https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3 ' ,
' playlist_mincount ' : 5 ,
' info_dict ' : {
' id ' : ' UCYO_jab_esuFRV4b17AJtAw ' ,
' title ' : ' 3Blue1Brown - Playlists ' ,
' description ' : ' md5:e1384e8a133307dd10edee76e875d62f ' ,
' uploader_id ' : ' UCYO_jab_esuFRV4b17AJtAw ' ,
' uploader ' : ' 3Blue1Brown ' ,
} ,
} , {
' note ' : ' playlists, singlepage ' ,
' url ' : ' https://www.youtube.com/user/ThirstForScience/playlists ' ,
' playlist_mincount ' : 4 ,
' info_dict ' : {
' id ' : ' UCAEtajcuhQ6an9WEzY9LEMQ ' ,
' title ' : ' ThirstForScience - Playlists ' ,
' description ' : ' md5:609399d937ea957b0f53cbffb747a14c ' ,
' uploader ' : ' ThirstForScience ' ,
' uploader_id ' : ' UCAEtajcuhQ6an9WEzY9LEMQ ' ,
}
} , {
' url ' : ' https://www.youtube.com/c/ChristophLaimer/playlists ' ,
' only_matching ' : True ,
} , {
' note ' : ' basic, single video playlist ' ,
' url ' : ' https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc ' ,
' info_dict ' : {
' uploader_id ' : ' UCmlqkdCBesrv2Lak1mF_MxA ' ,
' uploader ' : ' Sergey M. ' ,
' id ' : ' PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc ' ,
' title ' : ' youtube-dl public playlist ' ,
} ,
' playlist_count ' : 1 ,
} , {
' note ' : ' empty playlist ' ,
' url ' : ' https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf ' ,
' info_dict ' : {
' uploader_id ' : ' UCmlqkdCBesrv2Lak1mF_MxA ' ,
' uploader ' : ' Sergey M. ' ,
' id ' : ' PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf ' ,
' title ' : ' youtube-dl empty playlist ' ,
} ,
' playlist_count ' : 0 ,
} , {
' note ' : ' Home tab ' ,
' url ' : ' https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured ' ,
' info_dict ' : {
' id ' : ' UCKfVa3S1e4PHvxWcwyMMg8w ' ,
' title ' : ' lex will - Home ' ,
' description ' : ' md5:2163c5d0ff54ed5f598d6a7e6211e488 ' ,
' uploader ' : ' lex will ' ,
' uploader_id ' : ' UCKfVa3S1e4PHvxWcwyMMg8w ' ,
} ,
' playlist_mincount ' : 2 ,
} , {
' note ' : ' Videos tab ' ,
' url ' : ' https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos ' ,
' info_dict ' : {
' id ' : ' UCKfVa3S1e4PHvxWcwyMMg8w ' ,
' title ' : ' lex will - Videos ' ,
' description ' : ' md5:2163c5d0ff54ed5f598d6a7e6211e488 ' ,
' uploader ' : ' lex will ' ,
' uploader_id ' : ' UCKfVa3S1e4PHvxWcwyMMg8w ' ,
} ,
' playlist_mincount ' : 975 ,
} , {
' note ' : ' Videos tab, sorted by popular ' ,
' url ' : ' https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid ' ,
' info_dict ' : {
' id ' : ' UCKfVa3S1e4PHvxWcwyMMg8w ' ,
' title ' : ' lex will - Videos ' ,
' description ' : ' md5:2163c5d0ff54ed5f598d6a7e6211e488 ' ,
' uploader ' : ' lex will ' ,
' uploader_id ' : ' UCKfVa3S1e4PHvxWcwyMMg8w ' ,
} ,
' playlist_mincount ' : 199 ,
} , {
' note ' : ' Playlists tab ' ,
' url ' : ' https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists ' ,
' info_dict ' : {
' id ' : ' UCKfVa3S1e4PHvxWcwyMMg8w ' ,
' title ' : ' lex will - Playlists ' ,
' description ' : ' md5:2163c5d0ff54ed5f598d6a7e6211e488 ' ,
' uploader ' : ' lex will ' ,
' uploader_id ' : ' UCKfVa3S1e4PHvxWcwyMMg8w ' ,
} ,
' playlist_mincount ' : 17 ,
} , {
' note ' : ' Community tab ' ,
' url ' : ' https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community ' ,
' info_dict ' : {
' id ' : ' UCKfVa3S1e4PHvxWcwyMMg8w ' ,
' title ' : ' lex will - Community ' ,
' description ' : ' md5:2163c5d0ff54ed5f598d6a7e6211e488 ' ,
' uploader ' : ' lex will ' ,
' uploader_id ' : ' UCKfVa3S1e4PHvxWcwyMMg8w ' ,
} ,
' playlist_mincount ' : 18 ,
} , {
' note ' : ' Channels tab ' ,
' url ' : ' https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels ' ,
' info_dict ' : {
' id ' : ' UCKfVa3S1e4PHvxWcwyMMg8w ' ,
' title ' : ' lex will - Channels ' ,
' description ' : ' md5:2163c5d0ff54ed5f598d6a7e6211e488 ' ,
' uploader ' : ' lex will ' ,
' uploader_id ' : ' UCKfVa3S1e4PHvxWcwyMMg8w ' ,
} ,
' playlist_mincount ' : 12 ,
} , {
' note ' : ' Search tab ' ,
' url ' : ' https://www.youtube.com/c/3blue1brown/search?query=linear %20a lgebra ' ,
' playlist_mincount ' : 40 ,
' info_dict ' : {
' id ' : ' UCYO_jab_esuFRV4b17AJtAw ' ,
' title ' : ' 3Blue1Brown - Search - linear algebra ' ,
' description ' : ' md5:e1384e8a133307dd10edee76e875d62f ' ,
' uploader ' : ' 3Blue1Brown ' ,
' uploader_id ' : ' UCYO_jab_esuFRV4b17AJtAw ' ,
} ,
} , {
' url ' : ' https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA ' ,
' only_matching ' : True ,
} , {
' note ' : ' Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list. ' ,
' url ' : ' https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC ' ,
' info_dict ' : {
' title ' : ' 29C3: Not my department ' ,
' id ' : ' PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC ' ,
' uploader ' : ' Christiaan008 ' ,
' uploader_id ' : ' UCEPzS1rYsrkqzSLNp76nrcg ' ,
' description ' : ' md5:a14dc1a8ef8307a9807fe136a0660268 ' ,
} ,
' playlist_count ' : 96 ,
} , {
' note ' : ' Large playlist ' ,
' url ' : ' https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q ' ,
' info_dict ' : {
' title ' : ' Uploads from Cauchemar ' ,
' id ' : ' UUBABnxM4Ar9ten8Mdjj1j0Q ' ,
' uploader ' : ' Cauchemar ' ,
' uploader_id ' : ' UCBABnxM4Ar9ten8Mdjj1j0Q ' ,
} ,
' playlist_mincount ' : 1123 ,
} , {
' note ' : ' even larger playlist, 8832 videos ' ,
' url ' : ' http://www.youtube.com/user/NASAgovVideo/videos ' ,
' only_matching ' : True ,
} , {
' note ' : ' Buggy playlist: the webpage has a " Load more " button but it doesn \' t have more videos ' ,
' url ' : ' https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA ' ,
' info_dict ' : {
' title ' : ' Uploads from Interstellar Movie ' ,
' id ' : ' UUXw-G3eDE9trcvY2sBMM_aA ' ,
' uploader ' : ' Interstellar Movie ' ,
' uploader_id ' : ' UCXw-G3eDE9trcvY2sBMM_aA ' ,
} ,
' playlist_mincount ' : 21 ,
} , {
' note ' : ' Playlist with " show unavailable videos " button ' ,
' url ' : ' https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q ' ,
' info_dict ' : {
' title ' : ' Uploads from Phim Siêu Nhân Nhật Bản ' ,
' id ' : ' UUTYLiWFZy8xtPwxFwX9rV7Q ' ,
' uploader ' : ' Phim Siêu Nhân Nhật Bản ' ,
' uploader_id ' : ' UCTYLiWFZy8xtPwxFwX9rV7Q ' ,
} ,
' playlist_mincount ' : 200 ,
} , {
' note ' : ' Playlist with unavailable videos in page 7 ' ,
' url ' : ' https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w ' ,
' info_dict ' : {
' title ' : ' Uploads from BlankTV ' ,
' id ' : ' UU8l9frL61Yl5KFOl87nIm2w ' ,
' uploader ' : ' BlankTV ' ,
' uploader_id ' : ' UC8l9frL61Yl5KFOl87nIm2w ' ,
} ,
' playlist_mincount ' : 1000 ,
} , {
' note ' : ' https://github.com/ytdl-org/youtube-dl/issues/21844 ' ,
' url ' : ' https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba ' ,
' info_dict ' : {
' title ' : ' Data Analysis with Dr Mike Pound ' ,
' id ' : ' PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba ' ,
' uploader_id ' : ' UC9-y-6csu5WGm29I7JiwpnA ' ,
' uploader ' : ' Computerphile ' ,
' description ' : ' md5:7f567c574d13d3f8c0954d9ffee4e487 ' ,
} ,
' playlist_mincount ' : 11 ,
} , {
' url ' : ' https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc ' ,
' only_matching ' : True ,
} , {
' note ' : ' Playlist URL that does not actually serve a playlist ' ,
' url ' : ' https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4 ' ,
' info_dict ' : {
' id ' : ' FqZTN594JQw ' ,
' ext ' : ' webm ' ,
' title ' : " Smiley ' s People 01 detective, Adventure Series, Action " ,
' uploader ' : ' STREEM ' ,
' uploader_id ' : ' UCyPhqAZgwYWZfxElWVbVJng ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/channel/UCyPhqAZgwYWZfxElWVbVJng ' ,
' upload_date ' : ' 20150526 ' ,
' license ' : ' Standard YouTube License ' ,
' description ' : ' md5:507cdcb5a49ac0da37a920ece610be80 ' ,
' categories ' : [ ' People & Blogs ' ] ,
' tags ' : list ,
' view_count ' : int ,
' like_count ' : int ,
' dislike_count ' : int ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
' skip ' : ' This video is not available. ' ,
' add_ie ' : [ YoutubeIE . ie_key ( ) ] ,
} , {
' url ' : ' https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live ' ,
' info_dict ' : {
' id ' : ' 3yImotZU3tw ' , # This will keep changing
' ext ' : ' mp4 ' ,
' title ' : compat_str ,
' uploader ' : ' Sky News ' ,
' uploader_id ' : ' skynews ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/skynews ' ,
' upload_date ' : r ' re: \ d {8} ' ,
' description ' : compat_str ,
' categories ' : [ ' News & Politics ' ] ,
' tags ' : list ,
' like_count ' : int ,
' dislike_count ' : int ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
' expected_warnings ' : [ ' Downloading just video ' , ' Ignoring subtitle tracks found in ' ] ,
} , {
' url ' : ' https://www.youtube.com/user/TheYoungTurks/live ' ,
' info_dict ' : {
' id ' : ' a48o2S1cPoo ' ,
' ext ' : ' mp4 ' ,
' title ' : ' The Young Turks - Live Main Show ' ,
' uploader ' : ' The Young Turks ' ,
' uploader_id ' : ' TheYoungTurks ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/TheYoungTurks ' ,
' upload_date ' : ' 20150715 ' ,
' license ' : ' Standard YouTube License ' ,
' description ' : ' md5:438179573adcdff3c97ebb1ee632b891 ' ,
' categories ' : [ ' News & Politics ' ] ,
' tags ' : [ ' Cenk Uygur (TV Program Creator) ' , ' The Young Turks (Award-Winning Work) ' , ' Talk Show (TV Genre) ' ] ,
' like_count ' : int ,
' dislike_count ' : int ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.youtube.com/c/CommanderVideoHq/live ' ,
' only_matching ' : True ,
} , {
' note ' : ' A channel that is not live. Should raise error ' ,
' url ' : ' https://www.youtube.com/user/numberphile/live ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.youtube.com/feed/trending ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.youtube.com/feed/library ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.youtube.com/feed/history ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.youtube.com/feed/subscriptions ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.youtube.com/feed/watch_later ' ,
' only_matching ' : True ,
} , {
' note ' : ' Recommended - redirects to home page. ' ,
' url ' : ' https://www.youtube.com/feed/recommended ' ,
' only_matching ' : True ,
} , {
' note ' : ' inline playlist with not always working continuations ' ,
' url ' : ' https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.youtube.com/course ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.youtube.com/zsecurity ' ,
' only_matching ' : True ,
} , {
' url ' : ' http://www.youtube.com/NASAgovVideo/videos ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.youtube.com/TheYoungTurks/live ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.youtube.com/hashtag/cctv9 ' ,
' info_dict ' : {
' id ' : ' cctv9 ' ,
' title ' : ' #cctv9 ' ,
} ,
' playlist_mincount ' : 350 ,
} , {
' url ' : ' https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU ' ,
' only_matching ' : True ,
} , {
' note ' : ' Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist ' ,
' url ' : ' https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq ' ,
' only_matching ' : True
} , {
' note ' : ' /browse/ should redirect to /channel/ ' ,
' url ' : ' https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng ' ,
' only_matching ' : True
} , {
' note ' : ' VLPL, should redirect to playlist?list=PL... ' ,
' url ' : ' https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq ' ,
' info_dict ' : {
' id ' : ' PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq ' ,
' uploader ' : ' NoCopyrightSounds ' ,
' description ' : ' Providing you with copyright free / safe music for gaming, live streaming, studying and more! ' ,
' uploader_id ' : ' UC_aEa8K-EOJ3D6gOs7HcyNg ' ,
' title ' : ' NCS Releases ' ,
} ,
' playlist_mincount ' : 166 ,
} , {
' note ' : ' Topic, should redirect to playlist?list=UU... ' ,
' url ' : ' https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw ' ,
' info_dict ' : {
' id ' : ' UU9ALqqC4aIeG5iDs7i90Bfw ' ,
' uploader_id ' : ' UC9ALqqC4aIeG5iDs7i90Bfw ' ,
' title ' : ' Uploads from Royalty Free Music - Topic ' ,
' uploader ' : ' Royalty Free Music - Topic ' ,
} ,
' expected_warnings ' : [
' A channel/user page was given ' ,
' The URL does not have a videos tab ' ,
] ,
' playlist_mincount ' : 101 ,
} , {
' note ' : ' Topic without a UU playlist ' ,
' url ' : ' https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg ' ,
' info_dict ' : {
' id ' : ' UCtFRv9O2AHqOZjjynzrv-xg ' ,
' title ' : ' UCtFRv9O2AHqOZjjynzrv-xg ' ,
} ,
' expected_warnings ' : [
' A channel/user page was given ' ,
' The URL does not have a videos tab ' ,
' Falling back to channel URL ' ,
] ,
' playlist_mincount ' : 9 ,
} , {
' note ' : ' Youtube music Album ' ,
' url ' : ' https://music.youtube.com/browse/MPREb_gTAcphH99wE ' ,
' info_dict ' : {
' id ' : ' OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0 ' ,
' title ' : ' Album - Royalty Free Music Library V2 (50 Songs) ' ,
} ,
' playlist_count ' : 50 ,
} , {
' note ' : ' unlisted single video playlist ' ,
' url ' : ' https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf ' ,
' info_dict ' : {
' uploader_id ' : ' UC9zHu_mHU96r19o-wV5Qs1Q ' ,
' uploader ' : ' colethedj ' ,
' id ' : ' PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf ' ,
' title ' : ' yt-dlp unlisted playlist test ' ,
' availability ' : ' unlisted '
} ,
' playlist_count ' : 1 ,
} , {
' note ' : ' API Fallback: Recommended - redirects to home page. Requires visitorData ' ,
' url ' : ' https://www.youtube.com/feed/recommended ' ,
' info_dict ' : {
' id ' : ' recommended ' ,
' title ' : ' recommended ' ,
} ,
' playlist_mincount ' : 50 ,
' params ' : {
' skip_download ' : True ,
' extractor_args ' : { ' youtubetab ' : { ' skip ' : [ ' webpage ' ] } }
} ,
} , {
' note ' : ' API Fallback: /videos tab, sorted by oldest first ' ,
' url ' : ' https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid ' ,
' info_dict ' : {
' id ' : ' UCu6mSoMNzHQiBIOCkHUa2Aw ' ,
' title ' : ' Cody \' sLab - Videos ' ,
' description ' : ' md5:d083b7c2f0c67ee7a6c74c3e9b4243fa ' ,
' uploader ' : ' Cody \' sLab ' ,
' uploader_id ' : ' UCu6mSoMNzHQiBIOCkHUa2Aw ' ,
} ,
' playlist_mincount ' : 650 ,
' params ' : {
' skip_download ' : True ,
' extractor_args ' : { ' youtubetab ' : { ' skip ' : [ ' webpage ' ] } }
} ,
} , {
' note ' : ' API Fallback: Topic, should redirect to playlist?list=UU... ' ,
' url ' : ' https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw ' ,
' info_dict ' : {
' id ' : ' UU9ALqqC4aIeG5iDs7i90Bfw ' ,
' uploader_id ' : ' UC9ALqqC4aIeG5iDs7i90Bfw ' ,
' title ' : ' Uploads from Royalty Free Music - Topic ' ,
' uploader ' : ' Royalty Free Music - Topic ' ,
} ,
' expected_warnings ' : [
' A channel/user page was given ' ,
' The URL does not have a videos tab ' ,
] ,
' playlist_mincount ' : 101 ,
' params ' : {
' skip_download ' : True ,
' extractor_args ' : { ' youtubetab ' : { ' skip ' : [ ' webpage ' ] } }
} ,
} ]
@classmethod
def suitable ( cls , url ) :
return False if YoutubeIE . suitable ( url ) else super (
YoutubeTabIE , cls ) . suitable ( url )
2021-05-15 16:38:47 +02:00
2021-03-04 19:05:26 +01:00
def _real_extract ( self , url ) :
2021-05-15 16:38:47 +02:00
url , smuggled_data = unsmuggle_url ( url , { } )
if self . is_music_url ( url ) :
smuggled_data [ ' is_music_url ' ] = True
2021-05-18 16:01:06 +02:00
info_dict = self . __real_extract ( url , smuggled_data )
2021-05-15 16:38:47 +02:00
if info_dict . get ( ' entries ' ) :
info_dict [ ' entries ' ] = self . _smuggle_data ( info_dict [ ' entries ' ] , smuggled_data )
return info_dict
2021-05-18 16:01:06 +02:00
_url_re = re . compile ( r ' (?P<pre> %s )(?(channel_type)(?P<tab>/ \ w+))?(?P<post>.*)$ ' % _VALID_URL )
def __real_extract ( self , url , smuggled_data ) :
2021-03-04 19:05:26 +01:00
item_id = self . _match_id ( url )
url = compat_urlparse . urlunparse (
compat_urlparse . urlparse ( url ) . _replace ( netloc = ' www.youtube.com ' ) )
2021-05-17 14:23:08 +02:00
compat_opts = self . get_param ( ' compat_opts ' , [ ] )
2021-03-04 19:05:26 +01:00
2021-05-18 16:01:06 +02:00
def get_mobj ( url ) :
mobj = self . _url_re . match ( url ) . groupdict ( )
2021-05-18 20:20:59 +02:00
mobj . update ( ( k , ' ' ) for k , v in mobj . items ( ) if v is None )
2021-05-18 16:01:06 +02:00
return mobj
mobj = get_mobj ( url )
# Youtube returns incomplete data if tabname is not lower case
pre , tab , post , is_channel = mobj [ ' pre ' ] , mobj [ ' tab ' ] . lower ( ) , mobj [ ' post ' ] , not mobj [ ' not_channel ' ]
if is_channel :
if smuggled_data . get ( ' is_music_url ' ) :
if item_id [ : 2 ] == ' VL ' :
# Youtube music VL channels have an equivalent playlist
item_id = item_id [ 2 : ]
pre , tab , post , is_channel = ' https://www.youtube.com/playlist?list= %s ' % item_id , ' ' , ' ' , False
2021-05-17 15:07:58 +02:00
elif item_id [ : 2 ] == ' MP ' :
2021-10-08 23:19:25 +02:00
# Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
mdata = self . _extract_tab_endpoint (
' https://music.youtube.com/channel/ %s ' % item_id , item_id , default_client = ' web_music ' )
murl = traverse_obj (
mdata , ( ' microformat ' , ' microformatDataRenderer ' , ' urlCanonical ' ) , get_all = False , expected_type = compat_str )
if not murl :
raise ExtractorError ( ' Failed to resolve album to playlist. ' )
return self . url_result ( murl , ie = YoutubeTabIE . ie_key ( ) )
2021-05-18 16:01:06 +02:00
elif mobj [ ' channel_type ' ] == ' browse ' :
# Youtube music /browse/ should be changed to /channel/
pre = ' https://www.youtube.com/channel/ %s ' % item_id
if is_channel and not tab and ' no-youtube-channel-redirect ' not in compat_opts :
# Home URLs should redirect to /videos/
2021-04-16 12:01:10 +02:00
self . report_warning (
2021-03-04 19:05:26 +01:00
' A channel/user page was given. All the channel \' s videos will be downloaded. '
' To download only the videos in the home page, add a " /featured " to the URL ' )
2021-05-18 16:01:06 +02:00
tab = ' /videos '
url = ' ' . join ( ( pre , tab , post ) )
mobj = get_mobj ( url )
2021-03-04 19:05:26 +01:00
# Handle both video/playlist URLs
2021-04-17 05:02:33 +02:00
qs = parse_qs ( url )
2021-03-04 19:05:26 +01:00
video_id = qs . get ( ' v ' , [ None ] ) [ 0 ]
playlist_id = qs . get ( ' list ' , [ None ] ) [ 0 ]
2021-05-18 16:01:06 +02:00
if not video_id and mobj [ ' not_channel ' ] . startswith ( ' watch ' ) :
2021-03-04 19:05:26 +01:00
if not playlist_id :
2021-05-18 16:01:06 +02:00
# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
2021-03-04 19:05:26 +01:00
raise ExtractorError ( ' Unable to recognize tab page ' )
2021-05-18 16:01:06 +02:00
# Common mistake: https://www.youtube.com/watch?list=playlist_id
2021-04-16 12:01:10 +02:00
self . report_warning ( ' A video URL was given without video ID. Trying to download playlist %s ' % playlist_id )
2021-03-04 19:05:26 +01:00
url = ' https://www.youtube.com/playlist?list= %s ' % playlist_id
2021-05-18 15:02:46 +02:00
mobj = get_mobj ( url )
2021-03-04 19:05:26 +01:00
if video_id and playlist_id :
2021-05-17 14:23:08 +02:00
if self . get_param ( ' noplaylist ' ) :
2021-03-04 19:05:26 +01:00
self . to_screen ( ' Downloading just video %s because of --no-playlist ' % video_id )
2021-09-22 02:05:39 +02:00
return self . url_result ( f ' https://www.youtube.com/watch?v= { video_id } ' , ie = YoutubeIE . ie_key ( ) , video_id = video_id )
2021-03-04 19:05:26 +01:00
self . to_screen ( ' Downloading playlist %s ; add --no-playlist to just download video %s ' % ( playlist_id , video_id ) )
2021-10-08 23:19:25 +02:00
data , ytcfg = self . _extract_data ( url , item_id )
2021-02-25 22:52:44 +01:00
2021-05-18 15:02:46 +02:00
tabs = try_get (
data , lambda x : x [ ' contents ' ] [ ' twoColumnBrowseResultsRenderer ' ] [ ' tabs ' ] , list )
if tabs :
selected_tab = self . _extract_selected_tab ( tabs )
tab_name = selected_tab . get ( ' title ' , ' ' )
2021-05-21 16:34:30 +02:00
if ' no-youtube-channel-redirect ' not in compat_opts :
if mobj [ ' tab ' ] == ' /live ' :
# Live tab should have redirected to the video
raise ExtractorError ( ' The channel is not currently live ' , expected = True )
if mobj [ ' tab ' ] == ' /videos ' and tab_name . lower ( ) != mobj [ ' tab ' ] [ 1 : ] :
if not mobj [ ' not_channel ' ] and item_id [ : 2 ] == ' UC ' :
# Topic channels don't have /videos. Use the equivalent playlist instead
self . report_warning ( ' The URL does not have a %s tab. Trying to redirect to playlist UU %s instead ' % ( mobj [ ' tab ' ] [ 1 : ] , item_id [ 2 : ] ) )
pl_id = ' UU %s ' % item_id [ 2 : ]
pl_url = ' https://www.youtube.com/playlist?list= %s %s ' % ( pl_id , mobj [ ' post ' ] )
try :
2021-10-08 23:19:25 +02:00
data , ytcfg , item_id , url = * self . _extract_data ( pl_url , pl_id , ytcfg = ytcfg , fatal = True ) , pl_id , pl_url
2021-05-21 16:34:30 +02:00
except ExtractorError :
self . report_warning ( ' The playlist gave error. Falling back to channel URL ' )
else :
self . report_warning ( ' The URL does not have a %s tab. %s is being downloaded instead ' % ( mobj [ ' tab ' ] [ 1 : ] , tab_name ) )
2021-05-18 15:02:46 +02:00
self . write_debug ( ' Final URL: %s ' % url )
2021-04-17 00:39:08 +02:00
# YouTube sometimes provides a button to reload playlist with unavailable videos.
2021-05-11 10:00:48 +02:00
if ' no-youtube-unavailable-videos ' not in compat_opts :
2021-10-08 23:19:25 +02:00
data = self . _reload_with_unavailable_videos ( item_id , data , ytcfg ) or data
2021-09-04 04:33:42 +02:00
self . _extract_and_report_alerts ( data , only_once = True )
2020-11-19 20:22:59 +01:00
tabs = try_get (
data , lambda x : x [ ' contents ' ] [ ' twoColumnBrowseResultsRenderer ' ] [ ' tabs ' ] , list )
if tabs :
2021-10-08 23:19:25 +02:00
return self . _extract_from_tabs ( item_id , ytcfg , data , tabs )
2021-03-04 19:05:26 +01:00
2020-11-19 20:22:59 +01:00
playlist = try_get (
data , lambda x : x [ ' contents ' ] [ ' twoColumnWatchNextResults ' ] [ ' playlist ' ] [ ' playlist ' ] , dict )
if playlist :
2021-10-08 23:19:25 +02:00
return self . _extract_from_playlist ( item_id , url , data , playlist , ytcfg )
2021-03-04 19:05:26 +01:00
2020-11-21 15:50:42 +01:00
video_id = try_get (
data , lambda x : x [ ' currentVideoEndpoint ' ] [ ' watchEndpoint ' ] [ ' videoId ' ] ,
compat_str ) or video_id
2020-11-19 20:22:59 +01:00
if video_id :
2021-05-21 16:34:30 +02:00
if mobj [ ' tab ' ] != ' /live ' : # live tab is expected to redirect to video
self . report_warning ( ' Unable to recognize playlist. Downloading just video %s ' % video_id )
2021-09-22 02:05:39 +02:00
return self . url_result ( f ' https://www.youtube.com/watch?v= { video_id } ' , ie = YoutubeIE . ie_key ( ) , video_id = video_id )
2021-03-04 19:05:26 +01:00
2020-11-19 20:22:59 +01:00
raise ExtractorError ( ' Unable to recognize tab page ' )
2013-06-23 19:58:33 +02:00
2020-11-19 20:22:59 +01:00
class YoutubePlaylistIE ( InfoExtractor ) :
2021-10-23 16:29:52 +02:00
IE_DESC = ' YouTube playlists '
2020-11-19 20:22:59 +01:00
_VALID_URL = r ''' (?x)(?:
( ? : https ? : / / ) ?
( ? : \w + \. ) ?
( ? :
( ? :
youtube ( ? : kids ) ? \. com |
2021-11-06 04:07:34 +01:00
% ( invidious ) s
2020-11-19 20:22:59 +01:00
)
/ . * ? \? . * ? \blist =
) ?
( ? P < id > % ( playlist_id ) s )
2021-11-06 04:07:34 +01:00
) ''' % {
' playlist_id ' : YoutubeBaseInfoExtractor . _PLAYLIST_ID_RE ,
' invidious ' : ' | ' . join ( YoutubeBaseInfoExtractor . _INVIDIOUS_SITES ) ,
}
2020-11-19 20:22:59 +01:00
IE_NAME = ' youtube:playlist '
2014-09-24 10:25:47 +02:00
_TESTS = [ {
2020-11-19 20:22:59 +01:00
' note ' : ' issue #673 ' ,
' url ' : ' PLBB231211A4F62143 ' ,
2014-09-24 10:25:47 +02:00
' info_dict ' : {
2020-11-19 20:22:59 +01:00
' title ' : ' [OLD]Team Fortress 2 (Class-based LP) ' ,
' id ' : ' PLBB231211A4F62143 ' ,
' uploader ' : ' Wickydoo ' ,
' uploader_id ' : ' UCKSpbfbl5kRQpTdL7kMc-1Q ' ,
2021-07-21 05:52:34 +02:00
' description ' : ' md5:8fa6f52abb47a9552002fa3ddfc57fc2 ' ,
2020-11-19 20:22:59 +01:00
} ,
' playlist_mincount ' : 29 ,
} , {
' url ' : ' PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl ' ,
' info_dict ' : {
' title ' : ' YDL_safe_search ' ,
' id ' : ' PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl ' ,
} ,
' playlist_count ' : 2 ,
' skip ' : ' This playlist is private ' ,
2016-07-09 03:37:02 +02:00
} , {
2020-11-19 20:22:59 +01:00
' note ' : ' embedded ' ,
' url ' : ' https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu ' ,
' playlist_count ' : 4 ,
2016-07-09 03:37:02 +02:00
' info_dict ' : {
2020-11-19 20:22:59 +01:00
' title ' : ' JODA15 ' ,
' id ' : ' PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu ' ,
' uploader ' : ' milan ' ,
' uploader_id ' : ' UCEI1-PVPcYXjB73Hfelbmaw ' ,
2016-07-09 03:37:02 +02:00
}
2014-09-24 10:25:47 +02:00
} , {
2020-11-19 20:22:59 +01:00
' url ' : ' http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl ' ,
2021-07-21 05:52:34 +02:00
' playlist_mincount ' : 654 ,
2020-11-19 20:22:59 +01:00
' info_dict ' : {
' title ' : ' 2018 Chinese New Singles (11/6 updated) ' ,
' id ' : ' PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl ' ,
' uploader ' : ' LBK ' ,
' uploader_id ' : ' UC21nz3_MesPLqtDqwdvnoxA ' ,
2021-07-21 05:52:34 +02:00
' description ' : ' md5:da521864744d60a198e3a88af4db0d9d ' ,
2020-11-19 20:22:59 +01:00
}
2016-06-10 10:37:12 +02:00
} , {
2021-01-01 13:26:37 +01:00
' url ' : ' TLGGrESM50VT6acwMjAyMjAxNw ' ,
' only_matching ' : True ,
} , {
# music album playlist
' url ' : ' OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM ' ,
' only_matching ' : True ,
} ]
@classmethod
def suitable ( cls , url ) :
2021-04-17 05:02:33 +02:00
if YoutubeTabIE . suitable ( url ) :
return False
2021-10-22 22:37:20 +02:00
from . . utils import parse_qs
2021-04-17 05:02:33 +02:00
qs = parse_qs ( url )
if qs . get ( ' v ' , [ None ] ) [ 0 ] :
return False
return super ( YoutubePlaylistIE , cls ) . suitable ( url )
2021-01-01 13:26:37 +01:00
def _real_extract ( self , url ) :
playlist_id = self . _match_id ( url )
2021-05-28 20:19:26 +02:00
is_music_url = YoutubeBaseInfoExtractor . is_music_url ( url )
2021-05-15 16:38:47 +02:00
url = update_url_query (
' https://www.youtube.com/playlist ' ,
parse_qs ( url ) or { ' list ' : playlist_id } )
if is_music_url :
url = smuggle_url ( url , { ' is_music_url ' : True } )
return self . url_result ( url , ie = YoutubeTabIE . ie_key ( ) , video_id = playlist_id )
2021-01-01 13:26:37 +01:00
class YoutubeYtBeIE ( InfoExtractor ) :
2021-01-07 07:41:05 +01:00
IE_DESC = ' youtu.be '
2021-01-01 13:26:37 +01:00
_VALID_URL = r ' https?://youtu \ .be/(?P<id>[0-9A-Za-z_-] {11} )/*?.*? \ blist=(?P<playlist_id> %(playlist_id)s ) ' % { ' playlist_id ' : YoutubeBaseInfoExtractor . _PLAYLIST_ID_RE }
_TESTS = [ {
2020-11-19 20:22:59 +01:00
' url ' : ' https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5 ' ,
' info_dict ' : {
' id ' : ' yeWKywCrFtk ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Small Scale Baler and Braiding Rugs ' ,
' uploader ' : ' Backus-Page House Museum ' ,
' uploader_id ' : ' backuspagemuseum ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/backuspagemuseum ' ,
' upload_date ' : ' 20161008 ' ,
' description ' : ' md5:800c0c78d5eb128500bffd4f0b4f2e8a ' ,
' categories ' : [ ' Nonprofits & Activism ' ] ,
' tags ' : list ,
' like_count ' : int ,
' dislike_count ' : int ,
} ,
' params ' : {
' noplaylist ' : True ,
' skip_download ' : True ,
} ,
2020-09-12 05:08:57 +02:00
} , {
2020-11-19 20:22:59 +01:00
' url ' : ' https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21 ' ,
2020-09-12 05:08:57 +02:00
' only_matching ' : True ,
2014-09-24 10:25:47 +02:00
} ]
2020-11-19 20:22:59 +01:00
def _real_extract ( self , url ) :
2021-08-19 03:41:24 +02:00
mobj = self . _match_valid_url ( url )
2021-01-01 13:26:37 +01:00
video_id = mobj . group ( ' id ' )
playlist_id = mobj . group ( ' playlist_id ' )
2020-11-19 20:22:59 +01:00
return self . url_result (
2021-01-01 13:26:37 +01:00
update_url_query ( ' https://www.youtube.com/watch ' , {
' v ' : video_id ,
' list ' : playlist_id ,
' feature ' : ' youtu.be ' ,
} ) , ie = YoutubeTabIE . ie_key ( ) , video_id = playlist_id )
2020-11-19 20:22:59 +01:00
class YoutubeYtUserIE ( InfoExtractor ) :
2021-10-23 16:29:52 +02:00
IE_DESC = ' YouTube user videos; " ytuser: " prefix '
2020-11-19 20:22:59 +01:00
_VALID_URL = r ' ytuser:(?P<id>.+) '
_TESTS = [ {
' url ' : ' ytuser:phihag ' ,
' only_matching ' : True ,
} ]
def _real_extract ( self , url ) :
user_id = self . _match_id ( url )
return self . url_result (
2021-11-09 23:44:42 +01:00
' https://www.youtube.com/user/ %s /videos ' % user_id ,
2020-11-19 20:22:59 +01:00
ie = YoutubeTabIE . ie_key ( ) , video_id = user_id )
2016-07-09 03:37:02 +02:00
2013-06-23 20:28:15 +02:00
2020-11-23 22:59:10 +01:00
class YoutubeFavouritesIE ( YoutubeBaseInfoExtractor ) :
2020-11-23 22:47:42 +01:00
IE_NAME = ' youtube:favorites '
2021-10-23 16:29:52 +02:00
IE_DESC = ' YouTube liked videos; " :ytfav " keyword (requires cookies) '
2020-11-23 22:47:42 +01:00
_VALID_URL = r ' :ytfav(?:ou?rite)?s? '
_LOGIN_REQUIRED = True
_TESTS = [ {
' url ' : ' :ytfav ' ,
' only_matching ' : True ,
} , {
' url ' : ' :ytfavorites ' ,
' only_matching ' : True ,
} ]
def _real_extract ( self , url ) :
return self . url_result (
' https://www.youtube.com/playlist?list=LL ' ,
ie = YoutubeTabIE . ie_key ( ) )
2021-11-24 04:01:52 +01:00
class YoutubeSearchIE ( YoutubeTabBaseInfoExtractor , SearchInfoExtractor ) :
IE_DESC = ' YouTube search '
2014-09-13 07:51:06 +02:00
IE_NAME = ' youtube:search '
2013-06-23 20:28:15 +02:00
_SEARCH_KEY = ' ytsearch '
2021-11-24 04:57:59 +01:00
_SEARCH_PARAMS = ' EgIQAQ % 3D % 3D ' # Videos only
2015-04-22 16:28:33 +02:00
_TESTS = [ ]
2013-06-23 20:28:15 +02:00
2021-11-24 04:57:59 +01:00
2021-12-01 18:50:38 +01:00
class YoutubeSearchDateIE ( YoutubeTabBaseInfoExtractor , SearchInfoExtractor ) :
2013-12-03 13:55:25 +01:00
IE_NAME = YoutubeSearchIE . IE_NAME + ' :date '
2013-11-03 03:40:48 +01:00
_SEARCH_KEY = ' ytsearchdate '
2021-11-24 04:01:52 +01:00
IE_DESC = ' YouTube search, newest videos first '
2021-11-24 04:57:59 +01:00
_SEARCH_PARAMS = ' CAISAhAB ' # Videos only, sorted by date
2013-07-01 17:59:28 +02:00
2014-03-04 03:32:28 +01:00
2021-11-24 04:01:52 +01:00
class YoutubeSearchURLIE ( YoutubeTabBaseInfoExtractor ) :
2021-10-23 16:29:52 +02:00
IE_DESC = ' YouTube search URLs with sorting and filter support '
2020-11-22 14:09:09 +01:00
IE_NAME = YoutubeSearchIE . IE_NAME + ' _url '
_VALID_URL = r ' https?://(?:www \ .)?youtube \ .com/results \ ?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$) '
2020-11-21 23:21:09 +01:00
_TESTS = [ {
' url ' : ' https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video ' ,
' playlist_mincount ' : 5 ,
' info_dict ' : {
2021-07-21 05:52:34 +02:00
' id ' : ' youtube-dl test video ' ,
2020-11-21 23:21:09 +01:00
' title ' : ' youtube-dl test video ' ,
}
2021-11-24 04:57:59 +01:00
} , {
' url ' : ' https://www.youtube.com/results?search_query=python&sp=EgIQAg % 253D % 253D ' ,
' playlist_mincount ' : 5 ,
' info_dict ' : {
' id ' : ' python ' ,
' title ' : ' python ' ,
}
2020-11-21 23:21:09 +01:00
} , {
' url ' : ' https://www.youtube.com/results?q=test&sp=EgQIBBgB ' ,
' only_matching ' : True ,
} ]
def _real_extract ( self , url ) :
2021-08-22 21:02:00 +02:00
qs = parse_qs ( url )
2020-11-22 14:09:09 +01:00
query = ( qs . get ( ' search_query ' ) or qs . get ( ' q ' ) ) [ 0 ]
2021-11-24 04:01:52 +01:00
return self . playlist_result ( self . _search_results ( query , qs . get ( ' sp ' , ( None , ) ) [ 0 ] ) , query , query )
2020-11-21 23:21:09 +01:00
class YoutubeFeedsInfoExtractor ( YoutubeTabIE ) :
2013-07-20 19:33:40 +02:00
"""
2015-05-15 17:06:59 +02:00
Base class for feed extractors
2020-11-23 22:59:10 +01:00
Subclasses must define the _FEED_NAME property .
2013-07-20 19:33:40 +02:00
"""
2013-07-24 20:40:12 +02:00
_LOGIN_REQUIRED = True
2020-11-21 23:46:05 +01:00
_TESTS = [ ]
2013-07-20 19:33:40 +02:00
@property
def IE_NAME ( self ) :
2014-09-13 07:51:06 +02:00
return ' youtube: %s ' % self . _FEED_NAME
2013-07-07 13:58:23 +02:00
2018-04-22 01:07:32 +02:00
def _real_extract ( self , url ) :
2020-11-23 22:59:10 +01:00
return self . url_result (
' https://www.youtube.com/feed/ %s ' % self . _FEED_NAME ,
ie = YoutubeTabIE . ie_key ( ) )
2015-05-15 17:06:59 +02:00
2020-11-21 23:46:05 +01:00
class YoutubeWatchLaterIE ( InfoExtractor ) :
IE_NAME = ' youtube:watchlater '
2021-10-23 16:29:52 +02:00
IE_DESC = ' Youtube watch later list; " :ytwatchlater " keyword (requires cookies) '
2020-11-23 22:59:10 +01:00
_VALID_URL = r ' :ytwatchlater '
2016-02-18 16:50:21 +01:00
_TESTS = [ {
2020-11-19 20:22:59 +01:00
' url ' : ' :ytwatchlater ' ,
2016-02-18 16:50:21 +01:00
' only_matching ' : True ,
} ]
2015-05-15 17:06:59 +02:00
def _real_extract ( self , url ) :
2020-11-21 23:46:05 +01:00
return self . url_result (
' https://www.youtube.com/playlist?list=WL ' , ie = YoutubeTabIE . ie_key ( ) )
2020-11-21 23:21:09 +01:00
2015-05-15 17:06:59 +02:00
class YoutubeRecommendedIE ( YoutubeFeedsInfoExtractor ) :
2021-10-23 16:29:52 +02:00
IE_DESC = ' YouTube recommended videos; " :ytrec " keyword '
2020-11-23 22:59:10 +01:00
_VALID_URL = r ' https?://(?:www \ .)?youtube \ .com/?(?:[?#]|$)|:ytrec(?:ommended)? '
2015-05-15 17:06:59 +02:00
_FEED_NAME = ' recommended '
2021-06-12 17:15:08 +02:00
_LOGIN_REQUIRED = False
2020-11-23 22:59:10 +01:00
_TESTS = [ {
' url ' : ' :ytrec ' ,
' only_matching ' : True ,
} , {
' url ' : ' :ytrecommended ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://youtube.com ' ,
' only_matching ' : True ,
} ]
2014-08-31 23:44:43 +02:00
2015-05-15 17:06:59 +02:00
class YoutubeSubscriptionsIE ( YoutubeFeedsInfoExtractor ) :
2021-10-23 16:29:52 +02:00
IE_DESC = ' YouTube subscriptions feed; " :ytsubs " keyword (requires cookies) '
2020-11-23 22:59:10 +01:00
_VALID_URL = r ' :ytsub(?:scription)?s? '
2015-05-15 17:06:59 +02:00
_FEED_NAME = ' subscriptions '
2020-11-23 22:59:10 +01:00
_TESTS = [ {
' url ' : ' :ytsubs ' ,
' only_matching ' : True ,
} , {
' url ' : ' :ytsubscriptions ' ,
' only_matching ' : True ,
} ]
2014-08-31 23:44:43 +02:00
2015-05-15 17:06:59 +02:00
class YoutubeHistoryIE ( YoutubeFeedsInfoExtractor ) :
2021-10-23 16:29:52 +02:00
IE_DESC = ' Youtube watch history; " :ythis " keyword (requires cookies) '
2021-03-03 11:32:40 +01:00
_VALID_URL = r ' :ythis(?:tory)? '
2015-05-15 17:06:59 +02:00
_FEED_NAME = ' history '
2020-11-23 22:59:10 +01:00
_TESTS = [ {
' url ' : ' :ythistory ' ,
' only_matching ' : True ,
} ]
2014-08-31 23:44:43 +02:00
2013-10-07 12:21:24 +02:00
class YoutubeTruncatedURLIE ( InfoExtractor ) :
IE_NAME = ' youtube:truncated_url '
IE_DESC = False # Do not list
2014-01-23 16:14:54 +01:00
_VALID_URL = r ''' (?x)
2015-01-24 11:42:20 +01:00
( ? : https ? : / / ) ?
( ? : \w + \. ) ? [ yY ] [ oO ] [ uU ] [ tT ] [ uU ] [ bB ] [ eE ] ( ? : - nocookie ) ? \. com /
( ? : watch \? ( ? :
2014-07-01 15:48:18 +02:00
feature = [ a - z_ ] + |
2015-01-24 11:42:20 +01:00
annotation_id = annotation_ [ ^ & ] + |
x - yt - cl = [ 0 - 9 ] + |
2015-01-30 03:45:29 +01:00
hl = [ ^ & ] * |
2015-09-14 00:26:12 +02:00
t = [ 0 - 9 ] +
2015-01-24 11:42:20 +01:00
) ?
|
attribution_link \? a = [ ^ & ] +
)
$
2014-01-23 16:14:54 +01:00
'''
2013-10-07 12:21:24 +02:00
2014-07-01 15:48:18 +02:00
_TESTS = [ {
2016-09-17 16:48:20 +02:00
' url ' : ' https://www.youtube.com/watch?annotation_id=annotation_3951667041 ' ,
2014-07-01 15:48:18 +02:00
' only_matching ' : True ,
2014-07-01 15:49:34 +02:00
} , {
2016-09-17 16:48:20 +02:00
' url ' : ' https://www.youtube.com/watch? ' ,
2014-07-01 15:49:34 +02:00
' only_matching ' : True ,
2015-01-24 11:42:20 +01:00
} , {
' url ' : ' https://www.youtube.com/watch?x-yt-cl=84503534 ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.youtube.com/watch?feature=foo ' ,
' only_matching ' : True ,
2015-01-30 03:45:29 +01:00
} , {
' url ' : ' https://www.youtube.com/watch?hl=en-GB ' ,
' only_matching ' : True ,
2015-09-14 00:26:12 +02:00
} , {
' url ' : ' https://www.youtube.com/watch?t=2372 ' ,
' only_matching ' : True ,
2014-07-01 15:48:18 +02:00
} ]
2013-10-07 12:21:24 +02:00
def _real_extract ( self , url ) :
raise ExtractorError (
2014-09-13 07:51:06 +02:00
' Did you forget to quote the URL? Remember that & is a meta '
' character in most shells, so you want to put the URL in quotes, '
2020-09-02 22:37:35 +02:00
' like youtube-dl '
2016-09-17 16:48:20 +02:00
' " https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc " '
2020-09-02 22:37:35 +02:00
' or simply youtube-dl BaW_jenozKc . ' ,
2013-10-07 12:21:24 +02:00
expected = True )
2015-01-01 23:44:39 +01:00
2021-09-19 16:07:47 +02:00
class YoutubeClipIE ( InfoExtractor ) :
IE_NAME = ' youtube:clip '
IE_DESC = False # Do not list
_VALID_URL = r ' https?://(?:www \ .)?youtube \ .com/clip/ '
def _real_extract ( self , url ) :
self . report_warning ( ' YouTube clips are not currently supported. The entire video will be downloaded instead ' )
return self . url_result ( url , ' Generic ' )
2015-01-01 23:44:39 +01:00
class YoutubeTruncatedIDIE ( InfoExtractor ) :
IE_NAME = ' youtube:truncated_id '
IE_DESC = False # Do not list
2015-01-24 11:42:20 +01:00
_VALID_URL = r ' https?://(?:www \ .)?youtube \ .com/watch \ ?v=(?P<id>[0-9A-Za-z_-] { 1,10})$ '
2015-01-01 23:44:39 +01:00
_TESTS = [ {
' url ' : ' https://www.youtube.com/watch?v=N_708QY7Ob ' ,
' only_matching ' : True ,
} ]
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
raise ExtractorError (
' Incomplete YouTube ID %s . URL %s looks truncated. ' % ( video_id , url ) ,
expected = True )