mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-24 21:56:53 +01:00
Merge remote-tracking branch 'origin/master' into pr-bbcnews
This commit is contained in:
commit
aa5740fb61
@ -4,7 +4,10 @@
|
||||
from .abc7news import Abc7NewsIE
|
||||
from .academicearth import AcademicEarthCourseIE
|
||||
from .addanime import AddAnimeIE
|
||||
from .adobetv import AdobeTVIE
|
||||
from .adobetv import (
|
||||
AdobeTVIE,
|
||||
AdobeTVVideoIE,
|
||||
)
|
||||
from .adultswim import AdultSwimIE
|
||||
from .aftenposten import AftenpostenIE
|
||||
from .aftonbladet import AftonbladetIE
|
||||
@ -103,6 +106,7 @@
|
||||
DailymotionIE,
|
||||
DailymotionPlaylistIE,
|
||||
DailymotionUserIE,
|
||||
DailymotionCloudIE,
|
||||
)
|
||||
from .daum import DaumIE
|
||||
from .dbtv import DBTVIE
|
||||
@ -401,6 +405,7 @@
|
||||
from .philharmoniedeparis import PhilharmonieDeParisIE
|
||||
from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .pinkbike import PinkbikeIE
|
||||
from .planetaplay import PlanetaPlayIE
|
||||
from .pladform import PladformIE
|
||||
from .played import PlayedIE
|
||||
@ -696,7 +701,10 @@
|
||||
from .wsj import WSJIE
|
||||
from .xbef import XBefIE
|
||||
from .xboxclips import XboxClipsIE
|
||||
from .xhamster import XHamsterIE
|
||||
from .xhamster import (
|
||||
XHamsterIE,
|
||||
XHamsterEmbedIE,
|
||||
)
|
||||
from .xminus import XMinusIE
|
||||
from .xnxx import XNXXIE
|
||||
from .xstream import XstreamIE
|
||||
|
@ -5,6 +5,8 @@
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
str_to_int,
|
||||
float_or_none,
|
||||
ISO639Utils,
|
||||
)
|
||||
|
||||
|
||||
@ -69,3 +71,61 @@ def _real_extract(self, url):
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class AdobeTVVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
# From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners
|
||||
'url': 'https://video.tv.adobe.com/v/2456/',
|
||||
'md5': '43662b577c018ad707a63766462b1e87',
|
||||
'info_dict': {
|
||||
'id': '2456',
|
||||
'ext': 'mp4',
|
||||
'title': 'New experience with Acrobat DC',
|
||||
'description': 'New experience with Acrobat DC',
|
||||
'duration': 248.667,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_params = self._parse_json(self._search_regex(
|
||||
r'var\s+bridge\s*=\s*([^;]+);', webpage, 'player parameters'),
|
||||
video_id)
|
||||
|
||||
formats = [{
|
||||
'url': source['src'],
|
||||
'width': source.get('width'),
|
||||
'height': source.get('height'),
|
||||
'tbr': source.get('bitrate'),
|
||||
} for source in player_params['sources']]
|
||||
|
||||
# For both metadata and downloaded files the duration varies among
|
||||
# formats. I just pick the max one
|
||||
duration = max(filter(None, [
|
||||
float_or_none(source.get('duration'), scale=1000)
|
||||
for source in player_params['sources']]))
|
||||
|
||||
subtitles = {}
|
||||
for translation in player_params.get('translations', []):
|
||||
lang_id = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])
|
||||
if lang_id not in subtitles:
|
||||
subtitles[lang_id] = []
|
||||
subtitles[lang_id].append({
|
||||
'url': translation['vttPath'],
|
||||
'ext': 'vtt',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': player_params['title'],
|
||||
'description': self._og_search_description(webpage),
|
||||
'duration': duration,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
@ -255,26 +255,11 @@ def _get_subtitles(self, media, programme_id):
|
||||
for connection in self._extract_connections(media):
|
||||
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
|
||||
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
|
||||
srt = ''
|
||||
|
||||
def _extract_text(p):
|
||||
if p.text is not None:
|
||||
stripped_text = p.text.strip()
|
||||
if stripped_text:
|
||||
return stripped_text
|
||||
return ' '.join(span.text.strip() for span in p.findall('{http://www.w3.org/2006/10/ttaf1}span'))
|
||||
for pos, p in enumerate(ps):
|
||||
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), _extract_text(p))
|
||||
subtitles[lang] = [
|
||||
{
|
||||
'url': connection.get('href'),
|
||||
'ext': 'ttml',
|
||||
},
|
||||
{
|
||||
'data': srt,
|
||||
'ext': 'srt',
|
||||
},
|
||||
]
|
||||
return subtitles
|
||||
|
||||
|
@ -13,6 +13,7 @@
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
compat_xml_parse_error,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
@ -119,7 +120,7 @@ def _build_brighcove_url(cls, object_str):
|
||||
|
||||
try:
|
||||
object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
|
||||
except xml.etree.ElementTree.ParseError:
|
||||
except compat_xml_parse_error:
|
||||
return
|
||||
|
||||
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
|
||||
|
@ -251,3 +251,45 @@ def _real_extract(self, url):
|
||||
'title': full_user,
|
||||
'entries': self._extract_entries(user),
|
||||
}
|
||||
|
||||
|
||||
class DailymotionCloudIE(DailymotionBaseInfoExtractor):
|
||||
_VALID_URL = r'http://api\.dmcloud\.net/embed/[^/]+/(?P<id>[^/?]+)'
|
||||
|
||||
_TEST = {
|
||||
# From http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html
|
||||
# Tested at FranceTvInfo_2
|
||||
'url': 'http://api.dmcloud.net/embed/4e7343f894a6f677b10006b4/556e03339473995ee145930c?auth=1464865870-0-jyhsm84b-ead4c701fb750cf9367bf4447167a3db&autoplay=1',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _extract_dmcloud_url(self, webpage):
|
||||
mobj = re.search(r'<iframe[^>]+src=[\'"](http://api\.dmcloud\.net/embed/[^/]+/[^\'"]+)[\'"]', webpage)
|
||||
if mobj:
|
||||
return mobj.group(1)
|
||||
|
||||
mobj = re.search(r'<input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=[\'"](http://api\.dmcloud\.net/embed/[^/]+/[^\'"]+)[\'"]', webpage)
|
||||
if mobj:
|
||||
return mobj.group(1)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
request = self._build_request(url)
|
||||
webpage = self._download_webpage(request, video_id)
|
||||
|
||||
title = self._html_search_regex(r'<title>([^>]+)</title>', webpage, 'title')
|
||||
|
||||
video_info = self._parse_json(self._search_regex(
|
||||
r'var\s+info\s*=\s*([^;]+);', webpage, 'video info'), video_id)
|
||||
|
||||
# TODO: parse ios_url, which is in fact a manifest
|
||||
video_url = video_info['mp4_url']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': video_info.get('thumbnail_url'),
|
||||
}
|
||||
|
@ -6,6 +6,8 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@ -17,7 +19,39 @@
|
||||
)
|
||||
|
||||
|
||||
class DramaFeverIE(InfoExtractor):
|
||||
class DramaFeverBaseIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
|
||||
_NETRC_MACHINE = 'dramafever'
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_form = {
|
||||
'username': username,
|
||||
'password': password,
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
response = self._download_webpage(
|
||||
request, None, 'Logging in as %s' % username)
|
||||
|
||||
if all(logout_pattern not in response
|
||||
for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
|
||||
error = self._html_search_regex(
|
||||
r'(?s)class="hidden-xs prompt"[^>]*>(.+?)<',
|
||||
response, 'error message', default=None)
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
|
||||
class DramaFeverIE(DramaFeverBaseIE):
|
||||
IE_NAME = 'dramafever'
|
||||
_VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
|
||||
_TEST = {
|
||||
@ -97,7 +131,7 @@ def _real_extract(self, url):
|
||||
}
|
||||
|
||||
|
||||
class DramaFeverSeriesIE(InfoExtractor):
|
||||
class DramaFeverSeriesIE(DramaFeverBaseIE):
|
||||
IE_NAME = 'dramafever:series'
|
||||
_VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$'
|
||||
_TESTS = [{
|
||||
@ -151,8 +185,11 @@ def _real_extract(self, url):
|
||||
% (consumer_secret, series_id, self._PAGE_SIZE, page_num),
|
||||
series_id, 'Downloading episodes JSON page #%d' % page_num)
|
||||
for episode in episodes.get('value', []):
|
||||
episode_url = episode.get('episode_url')
|
||||
if not episode_url:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
compat_urlparse.urljoin(url, episode['episode_url']),
|
||||
compat_urlparse.urljoin(url, episode_url),
|
||||
'DramaFever', episode.get('guid')))
|
||||
if page_num == episodes['num_pages']:
|
||||
break
|
||||
|
@ -15,7 +15,6 @@ class DRBonanzaIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dr.dk/bonanza/serie/portraetter/Talkshowet.htm?assetId=65517',
|
||||
'md5': 'fe330252ddea607635cf2eb2c99a0af3',
|
||||
'info_dict': {
|
||||
'id': '65517',
|
||||
'ext': 'mp4',
|
||||
@ -26,6 +25,9 @@ class DRBonanzaIE(InfoExtractor):
|
||||
'upload_date': '20110120',
|
||||
'duration': 3664,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmp
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.dr.dk/bonanza/radio/serie/sport/fodbold.htm?assetId=59410',
|
||||
'md5': '6dfe039417e76795fb783c52da3de11d',
|
||||
@ -93,6 +95,11 @@ def parse_filename_info(url):
|
||||
'format_id': file['Type'].replace('Video', ''),
|
||||
'preference': preferencemap.get(file['Type'], -10),
|
||||
})
|
||||
if format['url'].startswith('rtmp'):
|
||||
rtmp_url = format['url']
|
||||
format['rtmp_live'] = True # --resume does not work
|
||||
if '/bonanza/' in rtmp_url:
|
||||
format['play_path'] = rtmp_url.split('/bonanza/')[1]
|
||||
formats.append(format)
|
||||
elif file['Type'] == "Thumb":
|
||||
thumbnail = file['Location']
|
||||
@ -111,9 +118,6 @@ def parse_filename_info(url):
|
||||
description = '%s\n%s\n%s\n' % (
|
||||
info['Description'], info['Actors'], info['Colophon'])
|
||||
|
||||
for f in formats:
|
||||
f['url'] = f['url'].replace('rtmp://vod-bonanza.gss.dr.dk/bonanza/', 'http://vodfiles.dr.dk/')
|
||||
f['url'] = f['url'].replace('mp4:bonanza', 'bonanza')
|
||||
self._sort_formats(formats)
|
||||
|
||||
display_id = re.sub(r'[^\w\d-]', '', re.sub(r' ', '-', title.lower())) + '-' + asset_id
|
||||
|
@ -6,9 +6,9 @@
|
||||
|
||||
class FazIE(InfoExtractor):
|
||||
IE_NAME = 'faz.net'
|
||||
_VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?faz\.net/(?:[^/]+/)*.*?-(?P<id>\d+)\.html'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
|
||||
'info_dict': {
|
||||
'id': '12610585',
|
||||
@ -16,7 +16,22 @@ class FazIE(InfoExtractor):
|
||||
'title': 'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher',
|
||||
'description': 'md5:1453fbf9a0d041d985a47306192ea253',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.faz.net/aktuell/politik/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.faz.net/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.faz.net/-13659345.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.faz.net/aktuell/politik/-13659345.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.faz.net/foobarblafasel-13659345.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@ -18,6 +18,7 @@
|
||||
parse_duration,
|
||||
determine_ext,
|
||||
)
|
||||
from .dailymotion import DailymotionCloudIE
|
||||
|
||||
|
||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
@ -131,12 +132,26 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
'skip_download': 'HLS (reqires ffmpeg)'
|
||||
},
|
||||
'skip': 'Ce direct est terminé et sera disponible en rattrapage dans quelques minutes.',
|
||||
}, {
|
||||
'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html',
|
||||
'md5': 'f485bda6e185e7d15dbc69b72bae993e',
|
||||
'info_dict': {
|
||||
'id': '556e03339473995ee145930c',
|
||||
'ext': 'mp4',
|
||||
'title': 'Les entreprises familiales : le secret de la réussite',
|
||||
'thumbnail': 're:^https?://.*\.jpe?g$',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
page_title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, page_title)
|
||||
|
||||
dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
|
||||
if dmcloud_url:
|
||||
return self.url_result(dmcloud_url, 'DailymotionCloud')
|
||||
|
||||
video_id, catalogue = self._search_regex(
|
||||
r'id-video=([^@]+@[^"]+)', webpage, 'video id').split('@')
|
||||
return self._extract_video(video_id, catalogue)
|
||||
|
@ -43,6 +43,9 @@
|
||||
from .bliptv import BlipTVIE
|
||||
from .svt import SVTIE
|
||||
from .pornhub import PornHubIE
|
||||
from .xhamster import XHamsterEmbedIE
|
||||
from .vimeo import VimeoIE
|
||||
from .dailymotion import DailymotionCloudIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@ -333,6 +336,15 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# XHamster embed
|
||||
{
|
||||
'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
|
||||
'info_dict': {
|
||||
'id': 'showthread',
|
||||
'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
},
|
||||
# Embedded TED video
|
||||
{
|
||||
'url': 'http://en.support.wordpress.com/videos/ted-talks/',
|
||||
@ -812,6 +824,29 @@ class GenericIE(InfoExtractor):
|
||||
'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
|
||||
'uploader': 'Rogers Sportsnet',
|
||||
},
|
||||
},
|
||||
# Dailymotion Cloud video
|
||||
{
|
||||
'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
|
||||
'md5': '49444254273501a64675a7e68c502681',
|
||||
'info_dict': {
|
||||
'id': '5585de919473990de4bee11b',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le débat',
|
||||
'thumbnail': 're:^https?://.*\.jpe?g$',
|
||||
}
|
||||
},
|
||||
# AdobeTVVideo embed
|
||||
{
|
||||
'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
|
||||
'md5': '43662b577c018ad707a63766462b1e87',
|
||||
'info_dict': {
|
||||
'id': '2456',
|
||||
'ext': 'mp4',
|
||||
'title': 'New experience with Acrobat DC',
|
||||
'description': 'New experience with Acrobat DC',
|
||||
'duration': 248.667,
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
@ -1089,18 +1124,9 @@ def _playlist_from_matches(matches, getter=None, ie=None):
|
||||
if matches:
|
||||
return _playlist_from_matches(matches, ie='RtlNl')
|
||||
|
||||
# Look for embedded (iframe) Vimeo player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
|
||||
if mobj:
|
||||
player_url = unescapeHTML(mobj.group('url'))
|
||||
surl = smuggle_url(player_url, {'Referer': url})
|
||||
return self.url_result(surl)
|
||||
# Look for embedded (swf embed) Vimeo player
|
||||
mobj = re.search(
|
||||
r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
|
||||
if mobj:
|
||||
return self.url_result(mobj.group(1))
|
||||
vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
|
||||
if vimeo_url is not None:
|
||||
return self.url_result(vimeo_url)
|
||||
|
||||
# Look for embedded YouTube player
|
||||
matches = re.findall(r'''(?x)
|
||||
@ -1327,6 +1353,11 @@ def _playlist_from_matches(matches, getter=None, ie=None):
|
||||
if pornhub_url:
|
||||
return self.url_result(pornhub_url, 'PornHub')
|
||||
|
||||
# Look for embedded XHamster player
|
||||
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
|
||||
if xhamster_urls:
|
||||
return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
|
||||
|
||||
# Look for embedded Tvigle player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
|
||||
@ -1494,6 +1525,20 @@ def _playlist_from_matches(matches, getter=None, ie=None):
|
||||
if senate_isvp_url:
|
||||
return self.url_result(senate_isvp_url, 'SenateISVP')
|
||||
|
||||
# Look for Dailymotion Cloud videos
|
||||
dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
|
||||
if dmcloud_url:
|
||||
return self.url_result(dmcloud_url, 'DailymotionCloud')
|
||||
|
||||
# Look for AdobeTVVideo embeds
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(
|
||||
self._proto_relative_url(unescapeHTML(mobj.group(1))),
|
||||
'AdobeTVVideo')
|
||||
|
||||
def check_video(vurl):
|
||||
if YoutubeIE.suitable(vurl):
|
||||
return True
|
||||
|
@ -46,7 +46,7 @@ def _real_extract(self, url):
|
||||
format_info = info['videoPlayerObject']['video']
|
||||
formats.append({
|
||||
'format_id': f_id,
|
||||
'url': format_info['url'],
|
||||
'url': format_info['videoInfoList'][0]['videoUrl'],
|
||||
})
|
||||
|
||||
return {
|
||||
|
96
youtube_dl/extractor/pinkbike.py
Normal file
96
youtube_dl/extractor/pinkbike.py
Normal file
@ -0,0 +1,96 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
remove_end,
|
||||
remove_start,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class PinkbikeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?pinkbike\.com/video/|es\.pinkbike\.org/i/kvid/kvid-y5\.swf\?id=)(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pinkbike.com/video/402811/',
|
||||
'md5': '4814b8ca7651034cd87e3361d5c2155a',
|
||||
'info_dict': {
|
||||
'id': '402811',
|
||||
'ext': 'mp4',
|
||||
'title': 'Brandon Semenuk - RAW 100',
|
||||
'description': 'Official release: www.redbull.ca/rupertwalker',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 100,
|
||||
'upload_date': '20150406',
|
||||
'uploader': 'revelco',
|
||||
'location': 'Victoria, British Columbia, Canada',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://es.pinkbike.org/i/kvid/kvid-y5.swf?id=406629',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://www.pinkbike.com/video/%s' % video_id, video_id)
|
||||
|
||||
formats = []
|
||||
for _, format_id, src in re.findall(
|
||||
r'data-quality=((?:\\)?["\'])(.+?)\1[^>]+src=\1(.+?)\1', webpage):
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None))
|
||||
formats.append({
|
||||
'url': src,
|
||||
'format_id': format_id,
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' Video - Pinkbike')
|
||||
description = self._html_search_regex(
|
||||
r'(?s)id="media-description"[^>]*>(.+?)<',
|
||||
webpage, 'description', default=None) or remove_start(
|
||||
self._og_search_description(webpage), title + '. ')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
duration = int_or_none(self._html_search_meta(
|
||||
'video:duration', webpage, 'duration'))
|
||||
|
||||
uploader = self._search_regex(
|
||||
r'un:\s*"([^"]+)"', webpage, 'uploader', fatal=False)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'class="fullTime"[^>]+title="([^"]+)"',
|
||||
webpage, 'upload date', fatal=False))
|
||||
|
||||
location = self._html_search_regex(
|
||||
r'(?s)<dt>Location</dt>\s*<dd>(.+?)<',
|
||||
webpage, 'location', fatal=False)
|
||||
|
||||
def extract_count(webpage, label):
|
||||
return str_to_int(self._search_regex(
|
||||
r'<span[^>]+class="stat-num"[^>]*>([\d,.]+)</span>\s*<span[^>]+class="stat-label"[^>]*>%s' % label,
|
||||
webpage, label, fatal=False))
|
||||
|
||||
view_count = extract_count(webpage, 'Views')
|
||||
comment_count = extract_count(webpage, 'Comments')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
'uploader': uploader,
|
||||
'location': location,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'formats': formats
|
||||
}
|
@ -6,9 +6,12 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_request
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class SohuIE(InfoExtractor):
|
||||
@ -26,7 +29,7 @@ class SohuIE(InfoExtractor):
|
||||
'skip': 'On available in China',
|
||||
}, {
|
||||
'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
|
||||
'md5': 'ac9a5d322b4bf9ae184d53e4711e4f1a',
|
||||
'md5': '699060e75cf58858dd47fb9c03c42cfb',
|
||||
'info_dict': {
|
||||
'id': '409385080',
|
||||
'ext': 'mp4',
|
||||
@ -34,7 +37,7 @@ class SohuIE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
|
||||
'md5': '49308ff6dafde5ece51137d04aec311e',
|
||||
'md5': '9bf34be48f2f4dadcb226c74127e203c',
|
||||
'info_dict': {
|
||||
'id': '78693464',
|
||||
'ext': 'mp4',
|
||||
@ -48,7 +51,7 @@ class SohuIE(InfoExtractor):
|
||||
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '492923eac023ba2f13ff69617c32754a',
|
||||
'md5': 'bdbfb8f39924725e6589c146bc1883ad',
|
||||
'info_dict': {
|
||||
'id': '78910339_part1',
|
||||
'ext': 'mp4',
|
||||
@ -56,7 +59,7 @@ class SohuIE(InfoExtractor):
|
||||
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||
}
|
||||
}, {
|
||||
'md5': 'de604848c0e8e9c4a4dde7e1347c0637',
|
||||
'md5': '3e1f46aaeb95354fd10e7fca9fc1804e',
|
||||
'info_dict': {
|
||||
'id': '78910339_part2',
|
||||
'ext': 'mp4',
|
||||
@ -64,7 +67,7 @@ class SohuIE(InfoExtractor):
|
||||
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||
}
|
||||
}, {
|
||||
'md5': '93584716ee0657c0b205b8aa3d27aa13',
|
||||
'md5': '8407e634175fdac706766481b9443450',
|
||||
'info_dict': {
|
||||
'id': '78910339_part3',
|
||||
'ext': 'mp4',
|
||||
@ -139,21 +142,42 @@ def _fetch_data(vid_id, mytv=False):
|
||||
for i in range(part_count):
|
||||
formats = []
|
||||
for format_id, format_data in formats_json.items():
|
||||
data = format_data['data']
|
||||
allot = format_data['allot']
|
||||
|
||||
data = format_data['data']
|
||||
clips_url = data['clipsURL']
|
||||
su = data['su']
|
||||
|
||||
# URLs starts with http://newflv.sohu.ccgslb.net/ is not usable
|
||||
# so retry until got a working URL
|
||||
video_url = 'newflv.sohu.ccgslb.net'
|
||||
cdnId = None
|
||||
retries = 0
|
||||
while 'newflv.sohu.ccgslb.net' in video_url and retries < 5:
|
||||
download_note = 'Download information from CDN gateway for format ' + format_id
|
||||
|
||||
while 'newflv.sohu.ccgslb.net' in video_url:
|
||||
params = {
|
||||
'prot': 9,
|
||||
'file': clips_url[i],
|
||||
'new': su[i],
|
||||
'prod': 'flash',
|
||||
}
|
||||
|
||||
if cdnId is not None:
|
||||
params['idc'] = cdnId
|
||||
|
||||
download_note = 'Downloading %s video URL part %d of %d' % (
|
||||
format_id, i + 1, part_count)
|
||||
|
||||
if retries > 0:
|
||||
download_note += ' (retry #%d)' % retries
|
||||
part_info = self._parse_json(self._download_webpage(
|
||||
'http://%s/?%s' % (allot, compat_urllib_parse.urlencode(params)),
|
||||
video_id, download_note), video_id)
|
||||
|
||||
video_url = part_info['url']
|
||||
cdnId = part_info.get('nid')
|
||||
|
||||
retries += 1
|
||||
cdn_info = self._download_json(
|
||||
'http://data.vod.itc.cn/cdnList?new=' + data['su'][i],
|
||||
video_id, download_note)
|
||||
video_url = cdn_info['url']
|
||||
if retries > 5:
|
||||
raise ExtractorError('Failed to get video URL')
|
||||
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .pornhub import PornHubIE
|
||||
from .vimeo import VimeoIE
|
||||
|
||||
|
||||
class TumblrIE(InfoExtractor):
|
||||
@ -40,6 +41,17 @@ class TumblrIE(InfoExtractor):
|
||||
'timestamp': 1430931613,
|
||||
},
|
||||
'add_ie': ['Vidme'],
|
||||
}, {
|
||||
'url': 'http://camdamage.tumblr.com/post/98846056295/',
|
||||
'md5': 'a9e0c8371ea1ca306d6554e3fecf50b6',
|
||||
'info_dict': {
|
||||
'id': '105463834',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cam Damage-HD 720p',
|
||||
'uploader': 'John Moyer',
|
||||
'uploader_id': 'user32021558',
|
||||
},
|
||||
'add_ie': ['Vimeo'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -60,6 +72,10 @@ def _real_extract(self, url):
|
||||
if pornhub_url:
|
||||
return self.url_result(pornhub_url, 'PornHub')
|
||||
|
||||
vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
|
||||
if vimeo_url:
|
||||
return self.url_result(vimeo_url, 'Vimeo')
|
||||
|
||||
iframe_url = self._search_regex(
|
||||
r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'',
|
||||
webpage, 'iframe url')
|
||||
|
@ -28,11 +28,15 @@ class VikiBaseIE(InfoExtractor):
|
||||
|
||||
_NETRC_MACHINE = 'viki'
|
||||
|
||||
_token = None
|
||||
|
||||
def _prepare_call(self, path, timestamp=None, post_data=None):
|
||||
path += '?' if '?' not in path else '&'
|
||||
if not timestamp:
|
||||
timestamp = int(time.time())
|
||||
query = self._API_QUERY_TEMPLATE % (path, self._APP, timestamp)
|
||||
if self._token:
|
||||
query += '&token=%s' % self._token
|
||||
sig = hmac.new(
|
||||
self._APP_SECRET.encode('ascii'),
|
||||
query.encode('ascii'),
|
||||
@ -76,10 +80,14 @@ def _login(self):
|
||||
'password': password,
|
||||
}
|
||||
|
||||
self._call_api(
|
||||
login = self._call_api(
|
||||
'sessions.json', None,
|
||||
'Logging in as %s' % username, post_data=login_form)
|
||||
|
||||
self._token = login.get('token')
|
||||
if not self._token:
|
||||
self.report_warning('Unable to get session token, login has probably failed')
|
||||
|
||||
|
||||
class VikiIE(VikiBaseIE):
|
||||
IE_NAME = 'viki'
|
||||
|
@ -22,6 +22,7 @@
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
urlencode_postdata,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
@ -173,6 +174,21 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
},
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _extract_vimeo_url(url, webpage):
|
||||
# Look for embedded (iframe) Vimeo player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
|
||||
if mobj:
|
||||
player_url = unescapeHTML(mobj.group('url'))
|
||||
surl = smuggle_url(player_url, {'Referer': url})
|
||||
return surl
|
||||
# Look for embedded (swf embed) Vimeo player
|
||||
mobj = re.search(
|
||||
r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
|
||||
if mobj:
|
||||
return mobj.group(1)
|
||||
|
||||
def _verify_video_password(self, url, video_id, webpage):
|
||||
password = self._downloader.params.get('videopassword', None)
|
||||
if password is None:
|
||||
|
@ -13,7 +13,6 @@
|
||||
|
||||
|
||||
class XHamsterIE(InfoExtractor):
|
||||
"""Information Extractor for xHamster"""
|
||||
_VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
|
||||
_TESTS = [
|
||||
{
|
||||
@ -133,3 +132,36 @@ def is_hd(webpage):
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class XHamsterEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?xhamster\.com/xembed\.php\?video=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://xhamster.com/xembed.php?video=3328539',
|
||||
'info_dict': {
|
||||
'id': '3328539',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pen Masturbation',
|
||||
'upload_date': '20140728',
|
||||
'uploader_id': 'anonymous',
|
||||
'duration': 5,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [url for _, url in re.findall(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?xhamster\.com/xembed\.php\?video=\d+)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'href="(https?://xhamster\.com/movies/%s/[^"]+\.html[^"]*)"' % video_id,
|
||||
webpage, 'xhamster url')
|
||||
|
||||
return self.url_result(video_url, 'XHamster')
|
||||
|
@ -5,10 +5,12 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
@ -25,6 +27,8 @@ class XVideosIE(InfoExtractor):
|
||||
}
|
||||
}
|
||||
|
||||
_ANDROID_USER_AGENT = 'Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
@ -40,9 +44,30 @@ def _real_extract(self, url):
|
||||
video_thumbnail = self._search_regex(
|
||||
r'url_bigthumb=(.+?)&', webpage, 'thumbnail', fatal=False)
|
||||
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
}]
|
||||
|
||||
android_req = compat_urllib_request.Request(url)
|
||||
android_req.add_header('User-Agent', self._ANDROID_USER_AGENT)
|
||||
android_webpage = self._download_webpage(android_req, video_id, fatal=False)
|
||||
|
||||
if android_webpage is not None:
|
||||
player_params_str = self._search_regex(
|
||||
'mobileReplacePlayerDivTwoQual\(([^)]+)\)',
|
||||
android_webpage, 'player parameters', default='')
|
||||
player_params = list(map(lambda s: s.strip(' \''), player_params_str.split(',')))
|
||||
if player_params:
|
||||
formats.extend([{
|
||||
'url': param,
|
||||
'preference': -10,
|
||||
} for param in player_params if determine_ext(param) == 'mp4'])
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'formats': formats,
|
||||
'title': video_title,
|
||||
'ext': 'flv',
|
||||
'thumbnail': video_thumbnail,
|
||||
|
@ -234,6 +234,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'44': {'ext': 'webm', 'width': 854, 'height': 480},
|
||||
'45': {'ext': 'webm', 'width': 1280, 'height': 720},
|
||||
'46': {'ext': 'webm', 'width': 1920, 'height': 1080},
|
||||
'59': {'ext': 'mp4', 'width': 854, 'height': 480},
|
||||
'78': {'ext': 'mp4', 'width': 854, 'height': 480},
|
||||
|
||||
|
||||
# 3d videos
|
||||
|
@ -21,6 +21,7 @@
|
||||
shell_quote,
|
||||
subtitles_filename,
|
||||
dfxp2srt,
|
||||
ISO639Utils,
|
||||
)
|
||||
|
||||
|
||||
@ -307,199 +308,6 @@ def run(self, information):
|
||||
|
||||
|
||||
class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
|
||||
# See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
|
||||
_lang_map = {
|
||||
'aa': 'aar',
|
||||
'ab': 'abk',
|
||||
'ae': 'ave',
|
||||
'af': 'afr',
|
||||
'ak': 'aka',
|
||||
'am': 'amh',
|
||||
'an': 'arg',
|
||||
'ar': 'ara',
|
||||
'as': 'asm',
|
||||
'av': 'ava',
|
||||
'ay': 'aym',
|
||||
'az': 'aze',
|
||||
'ba': 'bak',
|
||||
'be': 'bel',
|
||||
'bg': 'bul',
|
||||
'bh': 'bih',
|
||||
'bi': 'bis',
|
||||
'bm': 'bam',
|
||||
'bn': 'ben',
|
||||
'bo': 'bod',
|
||||
'br': 'bre',
|
||||
'bs': 'bos',
|
||||
'ca': 'cat',
|
||||
'ce': 'che',
|
||||
'ch': 'cha',
|
||||
'co': 'cos',
|
||||
'cr': 'cre',
|
||||
'cs': 'ces',
|
||||
'cu': 'chu',
|
||||
'cv': 'chv',
|
||||
'cy': 'cym',
|
||||
'da': 'dan',
|
||||
'de': 'deu',
|
||||
'dv': 'div',
|
||||
'dz': 'dzo',
|
||||
'ee': 'ewe',
|
||||
'el': 'ell',
|
||||
'en': 'eng',
|
||||
'eo': 'epo',
|
||||
'es': 'spa',
|
||||
'et': 'est',
|
||||
'eu': 'eus',
|
||||
'fa': 'fas',
|
||||
'ff': 'ful',
|
||||
'fi': 'fin',
|
||||
'fj': 'fij',
|
||||
'fo': 'fao',
|
||||
'fr': 'fra',
|
||||
'fy': 'fry',
|
||||
'ga': 'gle',
|
||||
'gd': 'gla',
|
||||
'gl': 'glg',
|
||||
'gn': 'grn',
|
||||
'gu': 'guj',
|
||||
'gv': 'glv',
|
||||
'ha': 'hau',
|
||||
'he': 'heb',
|
||||
'hi': 'hin',
|
||||
'ho': 'hmo',
|
||||
'hr': 'hrv',
|
||||
'ht': 'hat',
|
||||
'hu': 'hun',
|
||||
'hy': 'hye',
|
||||
'hz': 'her',
|
||||
'ia': 'ina',
|
||||
'id': 'ind',
|
||||
'ie': 'ile',
|
||||
'ig': 'ibo',
|
||||
'ii': 'iii',
|
||||
'ik': 'ipk',
|
||||
'io': 'ido',
|
||||
'is': 'isl',
|
||||
'it': 'ita',
|
||||
'iu': 'iku',
|
||||
'ja': 'jpn',
|
||||
'jv': 'jav',
|
||||
'ka': 'kat',
|
||||
'kg': 'kon',
|
||||
'ki': 'kik',
|
||||
'kj': 'kua',
|
||||
'kk': 'kaz',
|
||||
'kl': 'kal',
|
||||
'km': 'khm',
|
||||
'kn': 'kan',
|
||||
'ko': 'kor',
|
||||
'kr': 'kau',
|
||||
'ks': 'kas',
|
||||
'ku': 'kur',
|
||||
'kv': 'kom',
|
||||
'kw': 'cor',
|
||||
'ky': 'kir',
|
||||
'la': 'lat',
|
||||
'lb': 'ltz',
|
||||
'lg': 'lug',
|
||||
'li': 'lim',
|
||||
'ln': 'lin',
|
||||
'lo': 'lao',
|
||||
'lt': 'lit',
|
||||
'lu': 'lub',
|
||||
'lv': 'lav',
|
||||
'mg': 'mlg',
|
||||
'mh': 'mah',
|
||||
'mi': 'mri',
|
||||
'mk': 'mkd',
|
||||
'ml': 'mal',
|
||||
'mn': 'mon',
|
||||
'mr': 'mar',
|
||||
'ms': 'msa',
|
||||
'mt': 'mlt',
|
||||
'my': 'mya',
|
||||
'na': 'nau',
|
||||
'nb': 'nob',
|
||||
'nd': 'nde',
|
||||
'ne': 'nep',
|
||||
'ng': 'ndo',
|
||||
'nl': 'nld',
|
||||
'nn': 'nno',
|
||||
'no': 'nor',
|
||||
'nr': 'nbl',
|
||||
'nv': 'nav',
|
||||
'ny': 'nya',
|
||||
'oc': 'oci',
|
||||
'oj': 'oji',
|
||||
'om': 'orm',
|
||||
'or': 'ori',
|
||||
'os': 'oss',
|
||||
'pa': 'pan',
|
||||
'pi': 'pli',
|
||||
'pl': 'pol',
|
||||
'ps': 'pus',
|
||||
'pt': 'por',
|
||||
'qu': 'que',
|
||||
'rm': 'roh',
|
||||
'rn': 'run',
|
||||
'ro': 'ron',
|
||||
'ru': 'rus',
|
||||
'rw': 'kin',
|
||||
'sa': 'san',
|
||||
'sc': 'srd',
|
||||
'sd': 'snd',
|
||||
'se': 'sme',
|
||||
'sg': 'sag',
|
||||
'si': 'sin',
|
||||
'sk': 'slk',
|
||||
'sl': 'slv',
|
||||
'sm': 'smo',
|
||||
'sn': 'sna',
|
||||
'so': 'som',
|
||||
'sq': 'sqi',
|
||||
'sr': 'srp',
|
||||
'ss': 'ssw',
|
||||
'st': 'sot',
|
||||
'su': 'sun',
|
||||
'sv': 'swe',
|
||||
'sw': 'swa',
|
||||
'ta': 'tam',
|
||||
'te': 'tel',
|
||||
'tg': 'tgk',
|
||||
'th': 'tha',
|
||||
'ti': 'tir',
|
||||
'tk': 'tuk',
|
||||
'tl': 'tgl',
|
||||
'tn': 'tsn',
|
||||
'to': 'ton',
|
||||
'tr': 'tur',
|
||||
'ts': 'tso',
|
||||
'tt': 'tat',
|
||||
'tw': 'twi',
|
||||
'ty': 'tah',
|
||||
'ug': 'uig',
|
||||
'uk': 'ukr',
|
||||
'ur': 'urd',
|
||||
'uz': 'uzb',
|
||||
've': 'ven',
|
||||
'vi': 'vie',
|
||||
'vo': 'vol',
|
||||
'wa': 'wln',
|
||||
'wo': 'wol',
|
||||
'xh': 'xho',
|
||||
'yi': 'yid',
|
||||
'yo': 'yor',
|
||||
'za': 'zha',
|
||||
'zh': 'zho',
|
||||
'zu': 'zul',
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _conver_lang_code(cls, code):
|
||||
"""Convert language code from ISO 639-1 to ISO 639-2/T"""
|
||||
return cls._lang_map.get(code[:2])
|
||||
|
||||
def run(self, information):
|
||||
if information['ext'] not in ['mp4', 'mkv']:
|
||||
self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 or mkv files')
|
||||
@ -525,7 +333,7 @@ def run(self, information):
|
||||
opts += ['-c:s', 'mov_text']
|
||||
for (i, lang) in enumerate(sub_langs):
|
||||
opts.extend(['-map', '%d:0' % (i + 1)])
|
||||
lang_code = self._conver_lang_code(lang)
|
||||
lang_code = ISO639Utils.short2long(lang)
|
||||
if lang_code is not None:
|
||||
opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
|
||||
|
||||
|
@ -1841,7 +1841,10 @@ def srt_subtitles_timecode(seconds):
|
||||
|
||||
|
||||
def dfxp2srt(dfxp_data):
|
||||
_x = functools.partial(xpath_with_ns, ns_map={'ttml': 'http://www.w3.org/ns/ttml'})
|
||||
_x = functools.partial(xpath_with_ns, ns_map={
|
||||
'ttml': 'http://www.w3.org/ns/ttml',
|
||||
'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
|
||||
})
|
||||
|
||||
def parse_node(node):
|
||||
str_or_empty = functools.partial(str_or_none, default='')
|
||||
@ -1849,9 +1852,9 @@ def parse_node(node):
|
||||
out = str_or_empty(node.text)
|
||||
|
||||
for child in node:
|
||||
if child.tag in (_x('ttml:br'), 'br'):
|
||||
if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
|
||||
out += '\n' + str_or_empty(child.tail)
|
||||
elif child.tag in (_x('ttml:span'), 'span'):
|
||||
elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'):
|
||||
out += str_or_empty(parse_node(child))
|
||||
else:
|
||||
out += str_or_empty(xml.etree.ElementTree.tostring(child))
|
||||
@ -1860,7 +1863,7 @@ def parse_node(node):
|
||||
|
||||
dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
|
||||
out = []
|
||||
paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
|
||||
paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')
|
||||
|
||||
if not paras:
|
||||
raise ValueError('Invalid dfxp/TTML subtitle')
|
||||
@ -1879,6 +1882,208 @@ def parse_node(node):
|
||||
return ''.join(out)
|
||||
|
||||
|
||||
class ISO639Utils(object):
|
||||
# See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
|
||||
_lang_map = {
|
||||
'aa': 'aar',
|
||||
'ab': 'abk',
|
||||
'ae': 'ave',
|
||||
'af': 'afr',
|
||||
'ak': 'aka',
|
||||
'am': 'amh',
|
||||
'an': 'arg',
|
||||
'ar': 'ara',
|
||||
'as': 'asm',
|
||||
'av': 'ava',
|
||||
'ay': 'aym',
|
||||
'az': 'aze',
|
||||
'ba': 'bak',
|
||||
'be': 'bel',
|
||||
'bg': 'bul',
|
||||
'bh': 'bih',
|
||||
'bi': 'bis',
|
||||
'bm': 'bam',
|
||||
'bn': 'ben',
|
||||
'bo': 'bod',
|
||||
'br': 'bre',
|
||||
'bs': 'bos',
|
||||
'ca': 'cat',
|
||||
'ce': 'che',
|
||||
'ch': 'cha',
|
||||
'co': 'cos',
|
||||
'cr': 'cre',
|
||||
'cs': 'ces',
|
||||
'cu': 'chu',
|
||||
'cv': 'chv',
|
||||
'cy': 'cym',
|
||||
'da': 'dan',
|
||||
'de': 'deu',
|
||||
'dv': 'div',
|
||||
'dz': 'dzo',
|
||||
'ee': 'ewe',
|
||||
'el': 'ell',
|
||||
'en': 'eng',
|
||||
'eo': 'epo',
|
||||
'es': 'spa',
|
||||
'et': 'est',
|
||||
'eu': 'eus',
|
||||
'fa': 'fas',
|
||||
'ff': 'ful',
|
||||
'fi': 'fin',
|
||||
'fj': 'fij',
|
||||
'fo': 'fao',
|
||||
'fr': 'fra',
|
||||
'fy': 'fry',
|
||||
'ga': 'gle',
|
||||
'gd': 'gla',
|
||||
'gl': 'glg',
|
||||
'gn': 'grn',
|
||||
'gu': 'guj',
|
||||
'gv': 'glv',
|
||||
'ha': 'hau',
|
||||
'he': 'heb',
|
||||
'hi': 'hin',
|
||||
'ho': 'hmo',
|
||||
'hr': 'hrv',
|
||||
'ht': 'hat',
|
||||
'hu': 'hun',
|
||||
'hy': 'hye',
|
||||
'hz': 'her',
|
||||
'ia': 'ina',
|
||||
'id': 'ind',
|
||||
'ie': 'ile',
|
||||
'ig': 'ibo',
|
||||
'ii': 'iii',
|
||||
'ik': 'ipk',
|
||||
'io': 'ido',
|
||||
'is': 'isl',
|
||||
'it': 'ita',
|
||||
'iu': 'iku',
|
||||
'ja': 'jpn',
|
||||
'jv': 'jav',
|
||||
'ka': 'kat',
|
||||
'kg': 'kon',
|
||||
'ki': 'kik',
|
||||
'kj': 'kua',
|
||||
'kk': 'kaz',
|
||||
'kl': 'kal',
|
||||
'km': 'khm',
|
||||
'kn': 'kan',
|
||||
'ko': 'kor',
|
||||
'kr': 'kau',
|
||||
'ks': 'kas',
|
||||
'ku': 'kur',
|
||||
'kv': 'kom',
|
||||
'kw': 'cor',
|
||||
'ky': 'kir',
|
||||
'la': 'lat',
|
||||
'lb': 'ltz',
|
||||
'lg': 'lug',
|
||||
'li': 'lim',
|
||||
'ln': 'lin',
|
||||
'lo': 'lao',
|
||||
'lt': 'lit',
|
||||
'lu': 'lub',
|
||||
'lv': 'lav',
|
||||
'mg': 'mlg',
|
||||
'mh': 'mah',
|
||||
'mi': 'mri',
|
||||
'mk': 'mkd',
|
||||
'ml': 'mal',
|
||||
'mn': 'mon',
|
||||
'mr': 'mar',
|
||||
'ms': 'msa',
|
||||
'mt': 'mlt',
|
||||
'my': 'mya',
|
||||
'na': 'nau',
|
||||
'nb': 'nob',
|
||||
'nd': 'nde',
|
||||
'ne': 'nep',
|
||||
'ng': 'ndo',
|
||||
'nl': 'nld',
|
||||
'nn': 'nno',
|
||||
'no': 'nor',
|
||||
'nr': 'nbl',
|
||||
'nv': 'nav',
|
||||
'ny': 'nya',
|
||||
'oc': 'oci',
|
||||
'oj': 'oji',
|
||||
'om': 'orm',
|
||||
'or': 'ori',
|
||||
'os': 'oss',
|
||||
'pa': 'pan',
|
||||
'pi': 'pli',
|
||||
'pl': 'pol',
|
||||
'ps': 'pus',
|
||||
'pt': 'por',
|
||||
'qu': 'que',
|
||||
'rm': 'roh',
|
||||
'rn': 'run',
|
||||
'ro': 'ron',
|
||||
'ru': 'rus',
|
||||
'rw': 'kin',
|
||||
'sa': 'san',
|
||||
'sc': 'srd',
|
||||
'sd': 'snd',
|
||||
'se': 'sme',
|
||||
'sg': 'sag',
|
||||
'si': 'sin',
|
||||
'sk': 'slk',
|
||||
'sl': 'slv',
|
||||
'sm': 'smo',
|
||||
'sn': 'sna',
|
||||
'so': 'som',
|
||||
'sq': 'sqi',
|
||||
'sr': 'srp',
|
||||
'ss': 'ssw',
|
||||
'st': 'sot',
|
||||
'su': 'sun',
|
||||
'sv': 'swe',
|
||||
'sw': 'swa',
|
||||
'ta': 'tam',
|
||||
'te': 'tel',
|
||||
'tg': 'tgk',
|
||||
'th': 'tha',
|
||||
'ti': 'tir',
|
||||
'tk': 'tuk',
|
||||
'tl': 'tgl',
|
||||
'tn': 'tsn',
|
||||
'to': 'ton',
|
||||
'tr': 'tur',
|
||||
'ts': 'tso',
|
||||
'tt': 'tat',
|
||||
'tw': 'twi',
|
||||
'ty': 'tah',
|
||||
'ug': 'uig',
|
||||
'uk': 'ukr',
|
||||
'ur': 'urd',
|
||||
'uz': 'uzb',
|
||||
've': 'ven',
|
||||
'vi': 'vie',
|
||||
'vo': 'vol',
|
||||
'wa': 'wln',
|
||||
'wo': 'wol',
|
||||
'xh': 'xho',
|
||||
'yi': 'yid',
|
||||
'yo': 'yor',
|
||||
'za': 'zha',
|
||||
'zh': 'zho',
|
||||
'zu': 'zul',
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def short2long(cls, code):
|
||||
"""Convert language code from ISO 639-1 to ISO 639-2/T"""
|
||||
return cls._lang_map.get(code[:2])
|
||||
|
||||
@classmethod
|
||||
def long2short(cls, code):
|
||||
"""Convert language code from ISO 639-2/T to ISO 639-1"""
|
||||
for short_name, long_name in cls._lang_map.items():
|
||||
if long_name == code:
|
||||
return short_name
|
||||
|
||||
|
||||
class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
|
||||
def __init__(self, proxies=None):
|
||||
# Set default handlers
|
||||
|
Loading…
Reference in New Issue
Block a user