mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-12-24 12:15:50 +01:00
commit
bfc993cc91
2
Makefile
2
Makefile
@ -43,7 +43,7 @@ test:
|
||||
ot: offlinetest
|
||||
|
||||
offlinetest: codetest
|
||||
nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations --exclude test_youtube_lists
|
||||
nosetests --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py
|
||||
|
||||
tar: youtube-dl.tar.gz
|
||||
|
||||
|
@ -28,7 +28,7 @@
|
||||
"retries": 10,
|
||||
"simulate": false,
|
||||
"subtitleslang": null,
|
||||
"subtitlesformat": "srt",
|
||||
"subtitlesformat": "best",
|
||||
"test": true,
|
||||
"updatetime": true,
|
||||
"usenetrc": false,
|
||||
|
@ -337,6 +337,65 @@ def test_format_filtering(self):
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'G')
|
||||
|
||||
def test_subtitles(self):
|
||||
def s_formats(lang, autocaption=False):
|
||||
return [{
|
||||
'ext': ext,
|
||||
'url': 'http://localhost/video.%s.%s' % (lang, ext),
|
||||
'_auto': autocaption,
|
||||
} for ext in ['vtt', 'srt', 'ass']]
|
||||
subtitles = dict((l, s_formats(l)) for l in ['en', 'fr', 'es'])
|
||||
auto_captions = dict((l, s_formats(l, True)) for l in ['it', 'pt', 'es'])
|
||||
info_dict = {
|
||||
'id': 'test',
|
||||
'title': 'Test',
|
||||
'url': 'http://localhost/video.mp4',
|
||||
'subtitles': subtitles,
|
||||
'automatic_captions': auto_captions,
|
||||
'extractor': 'TEST',
|
||||
}
|
||||
|
||||
def get_info(params={}):
|
||||
params.setdefault('simulate', True)
|
||||
ydl = YDL(params)
|
||||
ydl.report_warning = lambda *args, **kargs: None
|
||||
return ydl.process_video_result(info_dict, download=False)
|
||||
|
||||
result = get_info()
|
||||
self.assertFalse(result.get('requested_subtitles'))
|
||||
self.assertEqual(result['subtitles'], subtitles)
|
||||
self.assertEqual(result['automatic_captions'], auto_captions)
|
||||
|
||||
result = get_info({'writesubtitles': True})
|
||||
subs = result['requested_subtitles']
|
||||
self.assertTrue(subs)
|
||||
self.assertEqual(set(subs.keys()), set(['en']))
|
||||
self.assertTrue(subs['en'].get('data') is None)
|
||||
self.assertEqual(subs['en']['ext'], 'ass')
|
||||
|
||||
result = get_info({'writesubtitles': True, 'subtitlesformat': 'foo/srt'})
|
||||
subs = result['requested_subtitles']
|
||||
self.assertEqual(subs['en']['ext'], 'srt')
|
||||
|
||||
result = get_info({'writesubtitles': True, 'subtitleslangs': ['es', 'fr', 'it']})
|
||||
subs = result['requested_subtitles']
|
||||
self.assertTrue(subs)
|
||||
self.assertEqual(set(subs.keys()), set(['es', 'fr']))
|
||||
|
||||
result = get_info({'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
|
||||
subs = result['requested_subtitles']
|
||||
self.assertTrue(subs)
|
||||
self.assertEqual(set(subs.keys()), set(['es', 'pt']))
|
||||
self.assertFalse(subs['es']['_auto'])
|
||||
self.assertTrue(subs['pt']['_auto'])
|
||||
|
||||
result = get_info({'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
|
||||
subs = result['requested_subtitles']
|
||||
self.assertTrue(subs)
|
||||
self.assertEqual(set(subs.keys()), set(['es', 'pt']))
|
||||
self.assertTrue(subs['es']['_auto'])
|
||||
self.assertTrue(subs['pt']['_auto'])
|
||||
|
||||
def test_add_extra_info(self):
|
||||
test_dict = {
|
||||
'extractor': 'Foo',
|
||||
|
@ -18,6 +18,13 @@
|
||||
VimeoIE,
|
||||
WallaIE,
|
||||
CeskaTelevizeIE,
|
||||
LyndaIE,
|
||||
NPOIE,
|
||||
ComedyCentralIE,
|
||||
NRKTVIE,
|
||||
RaiIE,
|
||||
VikiIE,
|
||||
ThePlatformIE,
|
||||
)
|
||||
|
||||
|
||||
@ -27,42 +34,38 @@ class BaseTestSubtitles(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.DL = FakeYDL()
|
||||
self.ie = self.IE(self.DL)
|
||||
self.ie = self.IE()
|
||||
self.DL.add_info_extractor(self.ie)
|
||||
|
||||
def getInfoDict(self):
|
||||
info_dict = self.ie.extract(self.url)
|
||||
info_dict = self.DL.extract_info(self.url, download=False)
|
||||
return info_dict
|
||||
|
||||
def getSubtitles(self):
|
||||
info_dict = self.getInfoDict()
|
||||
return info_dict['subtitles']
|
||||
subtitles = info_dict['requested_subtitles']
|
||||
if not subtitles:
|
||||
return subtitles
|
||||
for sub_info in subtitles.values():
|
||||
if sub_info.get('data') is None:
|
||||
uf = self.DL.urlopen(sub_info['url'])
|
||||
sub_info['data'] = uf.read().decode('utf-8')
|
||||
return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
|
||||
|
||||
|
||||
class TestYoutubeSubtitles(BaseTestSubtitles):
|
||||
url = 'QRS8MkLhQmM'
|
||||
IE = YoutubeIE
|
||||
|
||||
def test_youtube_no_writesubtitles(self):
|
||||
self.DL.params['writesubtitles'] = False
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(subtitles, None)
|
||||
|
||||
def test_youtube_subtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
|
||||
|
||||
def test_youtube_subtitles_lang(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitleslangs'] = ['it']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
|
||||
|
||||
def test_youtube_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles.keys()), 13)
|
||||
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
|
||||
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
|
||||
for lang in ['it', 'fr', 'de']:
|
||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
||||
|
||||
def test_youtube_subtitles_sbv_format(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
@ -76,12 +79,6 @@ def test_youtube_subtitles_vtt_format(self):
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
|
||||
|
||||
def test_youtube_list_subtitles(self):
|
||||
self.DL.expect_warning('Video doesn\'t have automatic captions')
|
||||
self.DL.params['listsubtitles'] = True
|
||||
info_dict = self.getInfoDict()
|
||||
self.assertEqual(info_dict, None)
|
||||
|
||||
def test_youtube_automatic_captions(self):
|
||||
self.url = '8YoUxe5ncPo'
|
||||
self.DL.params['writeautomaticsub'] = True
|
||||
@ -103,55 +100,22 @@ def test_youtube_nosubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles), 0)
|
||||
|
||||
def test_youtube_multiple_langs(self):
|
||||
self.url = 'QRS8MkLhQmM'
|
||||
self.DL.params['writesubtitles'] = True
|
||||
langs = ['it', 'fr', 'de']
|
||||
self.DL.params['subtitleslangs'] = langs
|
||||
subtitles = self.getSubtitles()
|
||||
for lang in langs:
|
||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
class TestDailymotionSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.dailymotion.com/video/xczg00'
|
||||
IE = DailymotionIE
|
||||
|
||||
def test_no_writesubtitles(self):
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(subtitles, None)
|
||||
|
||||
def test_subtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
|
||||
|
||||
def test_subtitles_lang(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitleslangs'] = ['fr']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertTrue(len(subtitles.keys()) >= 6)
|
||||
|
||||
def test_list_subtitles(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['listsubtitles'] = True
|
||||
info_dict = self.getInfoDict()
|
||||
self.assertEqual(info_dict, None)
|
||||
|
||||
def test_automatic_captions(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['writeautomaticsub'] = True
|
||||
self.DL.params['subtitleslang'] = ['en']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertTrue(len(subtitles.keys()) == 0)
|
||||
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
|
||||
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
|
||||
for lang in ['es', 'fr', 'de']:
|
||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
||||
|
||||
def test_nosubtitles(self):
|
||||
self.DL.expect_warning('video doesn\'t have subtitles')
|
||||
@ -159,61 +123,21 @@ def test_nosubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles), 0)
|
||||
|
||||
def test_multiple_langs(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
langs = ['es', 'fr', 'de']
|
||||
self.DL.params['subtitleslangs'] = langs
|
||||
subtitles = self.getSubtitles()
|
||||
for lang in langs:
|
||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
class TestTedSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
|
||||
IE = TEDIE
|
||||
|
||||
def test_no_writesubtitles(self):
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(subtitles, None)
|
||||
|
||||
def test_subtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
|
||||
|
||||
def test_subtitles_lang(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitleslangs'] = ['fr']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertTrue(len(subtitles.keys()) >= 28)
|
||||
|
||||
def test_list_subtitles(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['listsubtitles'] = True
|
||||
info_dict = self.getInfoDict()
|
||||
self.assertEqual(info_dict, None)
|
||||
|
||||
def test_automatic_captions(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['writeautomaticsub'] = True
|
||||
self.DL.params['subtitleslang'] = ['en']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertTrue(len(subtitles.keys()) == 0)
|
||||
|
||||
def test_multiple_langs(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
langs = ['es', 'fr', 'de']
|
||||
self.DL.params['subtitleslangs'] = langs
|
||||
subtitles = self.getSubtitles()
|
||||
for lang in langs:
|
||||
self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
|
||||
self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
|
||||
for lang in ['es', 'fr', 'de']:
|
||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
||||
|
||||
|
||||
@ -221,14 +145,7 @@ class TestBlipTVSubtitles(BaseTestSubtitles):
|
||||
url = 'http://blip.tv/a/a-6603250'
|
||||
IE = BlipTVIE
|
||||
|
||||
def test_list_subtitles(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['listsubtitles'] = True
|
||||
info_dict = self.getInfoDict()
|
||||
self.assertEqual(info_dict, None)
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
@ -240,39 +157,13 @@ class TestVimeoSubtitles(BaseTestSubtitles):
|
||||
url = 'http://vimeo.com/76979871'
|
||||
IE = VimeoIE
|
||||
|
||||
def test_no_writesubtitles(self):
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(subtitles, None)
|
||||
|
||||
def test_subtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
|
||||
|
||||
def test_subtitles_lang(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitleslangs'] = ['fr']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
|
||||
|
||||
def test_list_subtitles(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['listsubtitles'] = True
|
||||
info_dict = self.getInfoDict()
|
||||
self.assertEqual(info_dict, None)
|
||||
|
||||
def test_automatic_captions(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['writeautomaticsub'] = True
|
||||
self.DL.params['subtitleslang'] = ['en']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertTrue(len(subtitles.keys()) == 0)
|
||||
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
|
||||
self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
|
||||
|
||||
def test_nosubtitles(self):
|
||||
self.DL.expect_warning('video doesn\'t have subtitles')
|
||||
@ -280,27 +171,13 @@ def test_nosubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles), 0)
|
||||
|
||||
def test_multiple_langs(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
langs = ['es', 'fr', 'de']
|
||||
self.DL.params['subtitleslangs'] = langs
|
||||
subtitles = self.getSubtitles()
|
||||
for lang in langs:
|
||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
class TestWallaSubtitles(BaseTestSubtitles):
|
||||
url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
|
||||
IE = WallaIE
|
||||
|
||||
def test_list_subtitles(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['listsubtitles'] = True
|
||||
info_dict = self.getInfoDict()
|
||||
self.assertEqual(info_dict, None)
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['writesubtitles'] = True
|
||||
@ -315,19 +192,13 @@ def test_nosubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles), 0)
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
|
||||
IE = CeskaTelevizeIE
|
||||
|
||||
def test_list_subtitles(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['listsubtitles'] = True
|
||||
info_dict = self.getInfoDict()
|
||||
self.assertEqual(info_dict, None)
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['writesubtitles'] = True
|
||||
@ -342,7 +213,96 @@ def test_nosubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles), 0)
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
class TestLyndaSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
|
||||
IE = LyndaIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||
self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
|
||||
|
||||
|
||||
class TestNPOSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
|
||||
IE = NPOIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['nl']))
|
||||
self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
|
||||
|
||||
|
||||
class TestMTVSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother'
|
||||
IE = ComedyCentralIE
|
||||
|
||||
def getInfoDict(self):
|
||||
return super(TestMTVSubtitles, self).getInfoDict()['entries'][0]
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||
self.assertEqual(md5(subtitles['en']), 'b9f6ca22a6acf597ec76f61749765e65')
|
||||
|
||||
|
||||
class TestNRKSubtitles(BaseTestSubtitles):
|
||||
url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
|
||||
IE = NRKTVIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['no']))
|
||||
self.assertEqual(md5(subtitles['no']), '1d221e6458c95c5494dcd38e6a1f129a')
|
||||
|
||||
|
||||
class TestRaiSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
|
||||
IE = RaiIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['it']))
|
||||
self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
|
||||
|
||||
|
||||
class TestVikiSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
|
||||
IE = VikiIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||
self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
|
||||
|
||||
|
||||
class TestThePlatformSubtitles(BaseTestSubtitles):
|
||||
# from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
|
||||
# (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
|
||||
url = 'theplatform:JFUjUE1_ehvq'
|
||||
IE = ThePlatformIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||
self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -154,7 +154,7 @@ class YoutubeDL(object):
|
||||
allsubtitles: Downloads all the subtitles of the video
|
||||
(requires writesubtitles or writeautomaticsub)
|
||||
listsubtitles: Lists all available subtitles for the video
|
||||
subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
|
||||
subtitlesformat: The format code for subtitles
|
||||
subtitleslangs: List of languages of the subtitles to download
|
||||
keepvideo: Keep the video file after post-processing
|
||||
daterange: A DateRange object, download only if the upload_date is in the range.
|
||||
@ -1008,6 +1008,15 @@ def process_video_result(self, info_dict, download=True):
|
||||
info_dict['timestamp'])
|
||||
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
|
||||
|
||||
if self.params.get('listsubtitles', False):
|
||||
if 'automatic_captions' in info_dict:
|
||||
self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
|
||||
self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
|
||||
return
|
||||
info_dict['requested_subtitles'] = self.process_subtitles(
|
||||
info_dict['id'], info_dict.get('subtitles'),
|
||||
info_dict.get('automatic_captions'))
|
||||
|
||||
# This extractors handle format selection themselves
|
||||
if info_dict['extractor'] in ['Youku']:
|
||||
if download:
|
||||
@ -1136,6 +1145,55 @@ def process_video_result(self, info_dict, download=True):
|
||||
info_dict.update(formats_to_download[-1])
|
||||
return info_dict
|
||||
|
||||
def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
|
||||
"""Select the requested subtitles and their format"""
|
||||
available_subs = {}
|
||||
if normal_subtitles and self.params.get('writesubtitles'):
|
||||
available_subs.update(normal_subtitles)
|
||||
if automatic_captions and self.params.get('writeautomaticsub'):
|
||||
for lang, cap_info in automatic_captions.items():
|
||||
if lang not in available_subs:
|
||||
available_subs[lang] = cap_info
|
||||
|
||||
if (not self.params.get('writesubtitles') and not
|
||||
self.params.get('writeautomaticsub') or not
|
||||
available_subs):
|
||||
return None
|
||||
|
||||
if self.params.get('allsubtitles', False):
|
||||
requested_langs = available_subs.keys()
|
||||
else:
|
||||
if self.params.get('subtitleslangs', False):
|
||||
requested_langs = self.params.get('subtitleslangs')
|
||||
elif 'en' in available_subs:
|
||||
requested_langs = ['en']
|
||||
else:
|
||||
requested_langs = [list(available_subs.keys())[0]]
|
||||
|
||||
formats_query = self.params.get('subtitlesformat', 'best')
|
||||
formats_preference = formats_query.split('/') if formats_query else []
|
||||
subs = {}
|
||||
for lang in requested_langs:
|
||||
formats = available_subs.get(lang)
|
||||
if formats is None:
|
||||
self.report_warning('%s subtitles not available for %s' % (lang, video_id))
|
||||
continue
|
||||
for ext in formats_preference:
|
||||
if ext == 'best':
|
||||
f = formats[-1]
|
||||
break
|
||||
matches = list(filter(lambda f: f['ext'] == ext, formats))
|
||||
if matches:
|
||||
f = matches[-1]
|
||||
break
|
||||
else:
|
||||
f = formats[-1]
|
||||
self.report_warning(
|
||||
'No subtitle format found matching "%s" for language %s, '
|
||||
'using %s' % (formats_query, lang, f['ext']))
|
||||
subs[lang] = f
|
||||
return subs
|
||||
|
||||
def process_info(self, info_dict):
|
||||
"""Process a single resolved IE result."""
|
||||
|
||||
@ -1238,15 +1296,22 @@ def process_info(self, info_dict):
|
||||
subtitles_are_requested = any([self.params.get('writesubtitles', False),
|
||||
self.params.get('writeautomaticsub')])
|
||||
|
||||
if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
|
||||
if subtitles_are_requested and info_dict.get('requested_subtitles'):
|
||||
# subtitles download errors are already managed as troubles in relevant IE
|
||||
# that way it will silently go on when used with unsupporting IE
|
||||
subtitles = info_dict['subtitles']
|
||||
sub_format = self.params.get('subtitlesformat', 'srt')
|
||||
for sub_lang in subtitles.keys():
|
||||
sub = subtitles[sub_lang]
|
||||
if sub is None:
|
||||
continue
|
||||
subtitles = info_dict['requested_subtitles']
|
||||
for sub_lang, sub_info in subtitles.items():
|
||||
sub_format = sub_info['ext']
|
||||
if sub_info.get('data') is not None:
|
||||
sub_data = sub_info['data']
|
||||
else:
|
||||
try:
|
||||
uf = self.urlopen(sub_info['url'])
|
||||
sub_data = uf.read().decode('utf-8')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self.report_warning('Unable to download subtitle for "%s": %s' %
|
||||
(sub_lang, compat_str(err)))
|
||||
continue
|
||||
try:
|
||||
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
|
||||
@ -1254,7 +1319,7 @@ def process_info(self, info_dict):
|
||||
else:
|
||||
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
|
||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
|
||||
subfile.write(sub)
|
||||
subfile.write(sub_data)
|
||||
except (OSError, IOError):
|
||||
self.report_error('Cannot write subtitles file ' + sub_filename)
|
||||
return
|
||||
@ -1564,6 +1629,17 @@ def list_thumbnails(self, info_dict):
|
||||
['ID', 'width', 'height', 'URL'],
|
||||
[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
|
||||
|
||||
def list_subtitles(self, video_id, subtitles, name='subtitles'):
|
||||
if not subtitles:
|
||||
self.to_screen('%s has no %s' % (video_id, name))
|
||||
return
|
||||
self.to_screen(
|
||||
'Available %s for %s:' % (name, video_id))
|
||||
self.to_screen(render_table(
|
||||
['Language', 'formats'],
|
||||
[[lang, ', '.join(f['ext'] for f in reversed(formats))]
|
||||
for lang, formats in subtitles.items()]))
|
||||
|
||||
def urlopen(self, req):
|
||||
""" Start an HTTP download """
|
||||
|
||||
|
@ -226,7 +226,6 @@ def _real_main(argv=None):
|
||||
if opts.embedsubtitles:
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegEmbedSubtitle',
|
||||
'subtitlesformat': opts.subtitlesformat,
|
||||
})
|
||||
if opts.xattrs:
|
||||
postprocessors.append({'key': 'XAttrMetadata'})
|
||||
|
@ -3,7 +3,7 @@
|
||||
import time
|
||||
import hmac
|
||||
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
@ -17,7 +17,7 @@
|
||||
)
|
||||
|
||||
|
||||
class AtresPlayerIE(SubtitlesInfoExtractor):
|
||||
class AtresPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
|
||||
_TESTS = [
|
||||
{
|
||||
@ -144,13 +144,12 @@ def _real_extract(self, url):
|
||||
thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
|
||||
|
||||
subtitles = {}
|
||||
subtitle = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
|
||||
if subtitle:
|
||||
subtitles['es'] = subtitle
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, subtitles)
|
||||
return
|
||||
subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
|
||||
if subtitle_url:
|
||||
subtitles['es'] = [{
|
||||
'ext': 'srt',
|
||||
'url': subtitle_url,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@ -159,5 +158,5 @@ def _real_extract(self, url):
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': self.extract_subtitles(video_id, subtitles),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
@ -2,12 +2,12 @@
|
||||
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
from ..compat import compat_HTTPError
|
||||
|
||||
|
||||
class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
class BBCCoUkIE(InfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
||||
@ -215,17 +215,32 @@ def _extract_audio(self, media, programme_id):
|
||||
formats.extend(conn_formats)
|
||||
return formats
|
||||
|
||||
def _extract_captions(self, media, programme_id):
|
||||
def _get_subtitles(self, media, programme_id):
|
||||
subtitles = {}
|
||||
for connection in self._extract_connections(media):
|
||||
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
|
||||
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
|
||||
srt = ''
|
||||
|
||||
def _extract_text(p):
|
||||
if p.text is not None:
|
||||
stripped_text = p.text.strip()
|
||||
if stripped_text:
|
||||
return stripped_text
|
||||
return ' '.join(span.text.strip() for span in p.findall('{http://www.w3.org/2006/10/ttaf1}span'))
|
||||
for pos, p in enumerate(ps):
|
||||
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'),
|
||||
p.text.strip() if p.text is not None else '')
|
||||
subtitles[lang] = srt
|
||||
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), _extract_text(p))
|
||||
subtitles[lang] = [
|
||||
{
|
||||
'url': connection.get('href'),
|
||||
'ext': 'ttml',
|
||||
},
|
||||
{
|
||||
'data': srt,
|
||||
'ext': 'srt',
|
||||
},
|
||||
]
|
||||
return subtitles
|
||||
|
||||
def _download_media_selector(self, programme_id):
|
||||
@ -249,7 +264,7 @@ def _download_media_selector(self, programme_id):
|
||||
elif kind == 'video':
|
||||
formats.extend(self._extract_video(media, programme_id))
|
||||
elif kind == 'captions':
|
||||
subtitles = self._extract_captions(media, programme_id)
|
||||
subtitles = self.extract_subtitles(media, programme_id)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
@ -324,10 +339,6 @@ def _real_extract(self, url):
|
||||
else:
|
||||
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(programme_id, subtitles)
|
||||
return
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
@ -3,7 +3,6 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
@ -18,7 +17,7 @@
|
||||
)
|
||||
|
||||
|
||||
class BlipTVIE(SubtitlesInfoExtractor):
|
||||
class BlipTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'
|
||||
|
||||
_TESTS = [
|
||||
@ -143,7 +142,7 @@ def itunes(s):
|
||||
categories = [category.text for category in item.findall('category')]
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
subtitles_urls = {}
|
||||
|
||||
media_group = item.find(media('group'))
|
||||
for media_content in media_group.findall(media('content')):
|
||||
@ -161,7 +160,7 @@ def itunes(s):
|
||||
}
|
||||
lang = role.rpartition('-')[-1].strip().lower()
|
||||
langcode = LANGS.get(lang, lang)
|
||||
subtitles[langcode] = url
|
||||
subtitles_urls[langcode] = url
|
||||
elif media_type.startswith('video/'):
|
||||
formats.append({
|
||||
'url': real_url,
|
||||
@ -175,11 +174,7 @@ def itunes(s):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(video_id, subtitles)
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, subtitles)
|
||||
return
|
||||
subtitles = self.extract_subtitles(video_id, subtitles_urls)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@ -192,15 +187,22 @@ def itunes(s):
|
||||
'thumbnail': thumbnail,
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
'subtitles': video_subtitles,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _download_subtitle_url(self, sub_lang, url):
|
||||
# For some weird reason, blip.tv serves a video instead of subtitles
|
||||
# when we request with a common UA
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('User-Agent', 'youtube-dl')
|
||||
return self._download_webpage(req, None, note=False)
|
||||
def _get_subtitles(self, video_id, subtitles_urls):
|
||||
subtitles = {}
|
||||
for lang, url in subtitles_urls.items():
|
||||
# For some weird reason, blip.tv serves a video instead of subtitles
|
||||
# when we request with a common UA
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('User-Agent', 'youtube-dl')
|
||||
subtitles[lang] = [{
|
||||
# The extension is 'srt' but it's actually an 'ass' file
|
||||
'ext': 'ass',
|
||||
'data': self._download_webpage(req, None, note=False),
|
||||
}]
|
||||
return subtitles
|
||||
|
||||
|
||||
class BlipTVUserIE(InfoExtractor):
|
||||
|
@ -3,7 +3,7 @@
|
||||
|
||||
import re
|
||||
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
@ -15,7 +15,7 @@
|
||||
)
|
||||
|
||||
|
||||
class CeskaTelevizeIE(SubtitlesInfoExtractor):
|
||||
class CeskaTelevizeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
|
||||
|
||||
_TESTS = [
|
||||
@ -107,13 +107,7 @@ def _real_extract(self, url):
|
||||
subtitles = {}
|
||||
subs = item.get('subtitles')
|
||||
if subs:
|
||||
subtitles['cs'] = subs[0]['url']
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, subtitles)
|
||||
return
|
||||
|
||||
subtitles = self._fix_subtitles(self.extract_subtitles(video_id, subtitles))
|
||||
subtitles = self.extract_subtitles(episode_id, subs)
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
@ -125,11 +119,20 @@ def _real_extract(self, url):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _get_subtitles(self, episode_id, subs):
|
||||
original_subtitles = self._download_webpage(
|
||||
subs[0]['url'], episode_id, 'Downloading subtitles')
|
||||
srt_subs = self._fix_subtitles(original_subtitles)
|
||||
return {
|
||||
'cs': [{
|
||||
'ext': 'srt',
|
||||
'data': srt_subs,
|
||||
}]
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _fix_subtitles(subtitles):
|
||||
""" Convert millisecond-based subtitles to SRT """
|
||||
if subtitles is None:
|
||||
return subtitles # subtitles not requested
|
||||
|
||||
def _msectotimecode(msec):
|
||||
""" Helper utility to convert milliseconds to timecode """
|
||||
@ -149,7 +152,4 @@ def _fix_subtitle(subtitle):
|
||||
else:
|
||||
yield line
|
||||
|
||||
fixed_subtitles = {}
|
||||
for k, v in subtitles.items():
|
||||
fixed_subtitles[k] = "\r\n".join(_fix_subtitle(v))
|
||||
return fixed_subtitles
|
||||
return "\r\n".join(_fix_subtitle(subtitles))
|
||||
|
@ -150,8 +150,14 @@ class InfoExtractor(object):
|
||||
If not explicitly set, calculated from timestamp.
|
||||
uploader_id: Nickname or id of the video uploader.
|
||||
location: Physical location where the video was filmed.
|
||||
subtitles: The subtitle file contents as a dictionary in the format
|
||||
{language: subtitles}.
|
||||
subtitles: The available subtitles as a dictionary in the format
|
||||
{language: subformats}. "subformats" is a list sorted from
|
||||
lower to higher preference, each element is a dictionary
|
||||
with the "ext" entry and one of:
|
||||
* "data": The subtitles file contents
|
||||
* "url": A url pointing to the subtitles file
|
||||
automatic_captions: Like 'subtitles', used by the YoutubeIE for
|
||||
automatically generated captions
|
||||
duration: Length of the video in seconds, as an integer.
|
||||
view_count: How many users have watched the video on the platform.
|
||||
like_count: Number of positive ratings of the video
|
||||
@ -1011,6 +1017,24 @@ def is_suitable(self, age_limit):
|
||||
any_restricted = any_restricted or is_restricted
|
||||
return not any_restricted
|
||||
|
||||
def extract_subtitles(self, *args, **kwargs):
|
||||
if (self._downloader.params.get('writesubtitles', False) or
|
||||
self._downloader.params.get('listsubtitles')):
|
||||
return self._get_subtitles(*args, **kwargs)
|
||||
return {}
|
||||
|
||||
def _get_subtitles(self, *args, **kwargs):
|
||||
raise NotImplementedError("This method must be implemented by subclasses")
|
||||
|
||||
def extract_automatic_captions(self, *args, **kwargs):
|
||||
if (self._downloader.params.get('writeautomaticsub', False) or
|
||||
self._downloader.params.get('listsubtitles')):
|
||||
return self._get_automatic_captions(*args, **kwargs)
|
||||
return {}
|
||||
|
||||
def _get_automatic_captions(self, *args, **kwargs):
|
||||
raise NotImplementedError("This method must be implemented by subclasses")
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
@ -9,7 +9,7 @@
|
||||
|
||||
from hashlib import sha1
|
||||
from math import pow, sqrt, floor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
@ -25,10 +25,9 @@
|
||||
aes_cbc_decrypt,
|
||||
inc,
|
||||
)
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CrunchyrollIE(SubtitlesInfoExtractor):
|
||||
class CrunchyrollIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||
@ -187,6 +186,38 @@ def ass_bool(strvalue):
|
||||
|
||||
return output
|
||||
|
||||
def _get_subtitles(self, video_id, webpage):
|
||||
subtitles = {}
|
||||
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
|
||||
sub_page = self._download_webpage(
|
||||
'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
|
||||
video_id, note='Downloading subtitles for ' + sub_name)
|
||||
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
|
||||
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
|
||||
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
|
||||
if not id or not iv or not data:
|
||||
continue
|
||||
id = int(id)
|
||||
iv = base64.b64decode(iv)
|
||||
data = base64.b64decode(data)
|
||||
|
||||
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
|
||||
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
|
||||
if not lang_code:
|
||||
continue
|
||||
sub_root = xml.etree.ElementTree.fromstring(subtitle)
|
||||
subtitles[lang_code] = [
|
||||
{
|
||||
'ext': 'srt',
|
||||
'data': self._convert_subtitles_to_srt(sub_root),
|
||||
},
|
||||
{
|
||||
'ext': 'ass',
|
||||
'data': self._convert_subtitles_to_ass(sub_root),
|
||||
},
|
||||
]
|
||||
return subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
@ -249,34 +280,7 @@ def _real_extract(self, url):
|
||||
'format_id': video_format,
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
sub_format = self._downloader.params.get('subtitlesformat', 'srt')
|
||||
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
|
||||
sub_page = self._download_webpage(
|
||||
'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
|
||||
video_id, note='Downloading subtitles for ' + sub_name)
|
||||
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
|
||||
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
|
||||
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
|
||||
if not id or not iv or not data:
|
||||
continue
|
||||
id = int(id)
|
||||
iv = base64.b64decode(iv)
|
||||
data = base64.b64decode(data)
|
||||
|
||||
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
|
||||
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
|
||||
if not lang_code:
|
||||
continue
|
||||
sub_root = xml.etree.ElementTree.fromstring(subtitle)
|
||||
if sub_format == 'ass':
|
||||
subtitles[lang_code] = self._convert_subtitles_to_ass(sub_root)
|
||||
else:
|
||||
subtitles[lang_code] = self._convert_subtitles_to_srt(sub_root)
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, subtitles)
|
||||
return
|
||||
subtitles = self.extract_subtitles(video_id, webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -6,7 +6,6 @@
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
@ -31,7 +30,7 @@ def _build_request(url):
|
||||
return request
|
||||
|
||||
|
||||
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
"""Information Extractor for Dailymotion"""
|
||||
|
||||
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
|
||||
@ -143,9 +142,6 @@ def _real_extract(self, url):
|
||||
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(video_id, webpage)
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, webpage)
|
||||
return
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'video_views_count[^>]+>\s+([\d\.,]+)',
|
||||
@ -169,7 +165,7 @@ def _real_extract(self, url):
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
||||
def _get_available_subtitles(self, video_id, webpage):
|
||||
def _get_subtitles(self, video_id, webpage):
|
||||
try:
|
||||
sub_list = self._download_webpage(
|
||||
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
|
||||
@ -179,7 +175,7 @@ def _get_available_subtitles(self, video_id, webpage):
|
||||
return {}
|
||||
info = json.loads(sub_list)
|
||||
if (info['total'] > 0):
|
||||
sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
|
||||
sub_lang_list = dict((l['language'], [{'url': l['url'], 'ext': 'srt'}]) for l in info['list'])
|
||||
return sub_lang_list
|
||||
self._downloader.report_warning('video doesn\'t have subtitles')
|
||||
return {}
|
||||
|
@ -1,11 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from .common import ExtractorError
|
||||
from .common import InfoExtractor, ExtractorError
|
||||
from ..utils import parse_iso8601
|
||||
|
||||
|
||||
class DRTVIE(SubtitlesInfoExtractor):
|
||||
class DRTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||
|
||||
_TEST = {
|
||||
@ -76,7 +75,7 @@ def _real_extract(self, url):
|
||||
}
|
||||
for subs in subtitles_list:
|
||||
lang = subs['Language']
|
||||
subtitles[LANGS.get(lang, lang)] = subs['Uri']
|
||||
subtitles[LANGS.get(lang, lang)] = [{'url': subs['Uri'], 'ext': 'vtt'}]
|
||||
|
||||
if not formats and restricted_to_denmark:
|
||||
raise ExtractorError(
|
||||
@ -84,10 +83,6 @@ def _real_extract(self, url):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, subtitles)
|
||||
return
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
@ -96,5 +91,5 @@ def _real_extract(self, url):
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': self.extract_subtitles(video_id, subtitles),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
@ -3,7 +3,6 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
@ -16,7 +15,7 @@
|
||||
)
|
||||
|
||||
|
||||
class LyndaIE(SubtitlesInfoExtractor):
|
||||
class LyndaIE(InfoExtractor):
|
||||
IE_NAME = 'lynda'
|
||||
IE_DESC = 'lynda.com videos'
|
||||
_VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html'
|
||||
@ -88,11 +87,7 @@ def _real_extract(self, url):
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, page)
|
||||
return
|
||||
|
||||
subtitles = self._fix_subtitles(self.extract_subtitles(video_id, page))
|
||||
subtitles = self.extract_subtitles(video_id, page)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@ -144,38 +139,31 @@ def _login(self):
|
||||
if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _fix_subtitles(self, subtitles):
|
||||
if subtitles is None:
|
||||
return subtitles # subtitles not requested
|
||||
|
||||
fixed_subtitles = {}
|
||||
for k, v in subtitles.items():
|
||||
subs = json.loads(v)
|
||||
if len(subs) == 0:
|
||||
def _fix_subtitles(self, subs):
|
||||
srt = ''
|
||||
for pos in range(0, len(subs) - 1):
|
||||
seq_current = subs[pos]
|
||||
m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
|
||||
if m_current is None:
|
||||
continue
|
||||
srt = ''
|
||||
for pos in range(0, len(subs) - 1):
|
||||
seq_current = subs[pos]
|
||||
m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
|
||||
if m_current is None:
|
||||
continue
|
||||
seq_next = subs[pos + 1]
|
||||
m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
|
||||
if m_next is None:
|
||||
continue
|
||||
appear_time = m_current.group('timecode')
|
||||
disappear_time = m_next.group('timecode')
|
||||
text = seq_current['Caption']
|
||||
srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
|
||||
if srt:
|
||||
fixed_subtitles[k] = srt
|
||||
return fixed_subtitles
|
||||
seq_next = subs[pos + 1]
|
||||
m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
|
||||
if m_next is None:
|
||||
continue
|
||||
appear_time = m_current.group('timecode')
|
||||
disappear_time = m_next.group('timecode')
|
||||
text = seq_current['Caption']
|
||||
srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
|
||||
if srt:
|
||||
return srt
|
||||
|
||||
def _get_available_subtitles(self, video_id, webpage):
|
||||
def _get_subtitles(self, video_id, webpage):
|
||||
url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
|
||||
sub = self._download_webpage(url, None, False)
|
||||
sub_json = json.loads(sub)
|
||||
return {'en': url} if len(sub_json) > 0 else {}
|
||||
subs = self._download_json(url, None, False)
|
||||
if subs:
|
||||
return {'en': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]}
|
||||
else:
|
||||
return {}
|
||||
|
||||
|
||||
class LyndaCourseIE(InfoExtractor):
|
||||
|
@ -5,9 +5,6 @@
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
@ -108,7 +105,6 @@ class OCWMITIE(InfoExtractor):
|
||||
'upload_date': '20121109',
|
||||
'uploader_id': 'MIT',
|
||||
'uploader': 'MIT OpenCourseWare',
|
||||
# 'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt'
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -121,7 +117,6 @@ class OCWMITIE(InfoExtractor):
|
||||
'uploader_id': 'MIT',
|
||||
'uploader': 'MIT OpenCourseWare',
|
||||
'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
|
||||
# 'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT'
|
||||
}
|
||||
}
|
||||
]
|
||||
@ -140,7 +135,6 @@ def _real_extract(self, url):
|
||||
metadata = re.sub(r'[\'"]', '', embed_chapter_media.group(1))
|
||||
metadata = re.split(r', ?', metadata)
|
||||
yt = metadata[1]
|
||||
subs = compat_urlparse.urljoin(self._BASE_URL, metadata[7])
|
||||
else:
|
||||
# search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, captions_file)
|
||||
embed_media = re.search(r'ocw_embed_media\((.+?)\)', webpage)
|
||||
@ -148,7 +142,6 @@ def _real_extract(self, url):
|
||||
metadata = re.sub(r'[\'"]', '', embed_media.group(1))
|
||||
metadata = re.split(r', ?', metadata)
|
||||
yt = metadata[1]
|
||||
subs = compat_urlparse.urljoin(self._BASE_URL, metadata[5])
|
||||
else:
|
||||
raise ExtractorError('Unable to find embedded YouTube video.')
|
||||
video_id = YoutubeIE.extract_id(yt)
|
||||
@ -159,7 +152,5 @@ def _real_extract(self, url):
|
||||
'title': title,
|
||||
'description': description,
|
||||
'url': yt,
|
||||
'url_transparent'
|
||||
'subtitles': subs,
|
||||
'ie_key': 'Youtube',
|
||||
}
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
import re
|
||||
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
@ -23,7 +23,7 @@ def _media_xml_tag(tag):
|
||||
return '{http://search.yahoo.com/mrss/}%s' % tag
|
||||
|
||||
|
||||
class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
|
||||
class MTVServicesInfoExtractor(InfoExtractor):
|
||||
_MOBILE_TEMPLATE = None
|
||||
|
||||
@staticmethod
|
||||
@ -95,25 +95,15 @@ def _extract_video_formats(self, mdoc, mtvn_id):
|
||||
|
||||
def _extract_subtitles(self, mdoc, mtvn_id):
|
||||
subtitles = {}
|
||||
FORMATS = {
|
||||
'scc': 'cea-608',
|
||||
'eia-608': 'cea-608',
|
||||
'xml': 'ttml',
|
||||
}
|
||||
subtitles_format = FORMATS.get(
|
||||
self._downloader.params.get('subtitlesformat'), 'ttml')
|
||||
for transcript in mdoc.findall('.//transcript'):
|
||||
if transcript.get('kind') != 'captions':
|
||||
continue
|
||||
lang = transcript.get('srclang')
|
||||
for typographic in transcript.findall('./typographic'):
|
||||
captions_format = typographic.get('format')
|
||||
if captions_format == subtitles_format:
|
||||
subtitles[lang] = compat_str(typographic.get('src'))
|
||||
break
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(mtvn_id, subtitles)
|
||||
return self.extract_subtitles(mtvn_id, subtitles)
|
||||
subtitles[lang] = [{
|
||||
'url': compat_str(typographic.get('src')),
|
||||
'ext': typographic.get('format')
|
||||
} for typographic in transcript.findall('./typographic')]
|
||||
return subtitles
|
||||
|
||||
def _get_video_info(self, itemdoc):
|
||||
uri = itemdoc.find('guid').text
|
||||
@ -196,8 +186,6 @@ def _real_extract(self, url):
|
||||
webpage, 'mgid')
|
||||
|
||||
videos_info = self._get_videos_info(mgid)
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
return
|
||||
return videos_info
|
||||
|
||||
|
||||
|
@ -1,6 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
fix_xml_ampersands,
|
||||
@ -12,7 +11,7 @@
|
||||
)
|
||||
|
||||
|
||||
class NPOBaseIE(SubtitlesInfoExtractor):
|
||||
class NPOBaseIE(InfoExtractor):
|
||||
def _get_token(self, video_id):
|
||||
token_page = self._download_webpage(
|
||||
'http://ida.omroep.nl/npoplayer/i.js',
|
||||
@ -164,13 +163,10 @@ def _get_info(self, video_id):
|
||||
|
||||
subtitles = {}
|
||||
if metadata.get('tt888') == 'ja':
|
||||
subtitles['nl'] = 'http://e.omroep.nl/tt888/%s' % video_id
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, subtitles)
|
||||
return
|
||||
|
||||
subtitles = self.extract_subtitles(video_id, subtitles)
|
||||
subtitles['nl'] = [{
|
||||
'ext': 'vtt',
|
||||
'url': 'http://e.omroep.nl/tt888/%s' % video_id,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -10,7 +10,6 @@
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
)
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
|
||||
|
||||
class NRKIE(InfoExtractor):
|
||||
@ -73,7 +72,7 @@ def _real_extract(self, url):
|
||||
}
|
||||
|
||||
|
||||
class NRKTVIE(SubtitlesInfoExtractor):
|
||||
class NRKTVIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
|
||||
|
||||
_TESTS = [
|
||||
@ -156,7 +155,7 @@ def _debug_print(self, txt):
|
||||
if self._downloader.params.get('verbose', False):
|
||||
self.to_screen('[debug] %s' % txt)
|
||||
|
||||
def _extract_captions(self, subtitlesurl, video_id, baseurl):
|
||||
def _get_subtitles(self, subtitlesurl, video_id, baseurl):
|
||||
url = "%s%s" % (baseurl, subtitlesurl)
|
||||
self._debug_print('%s: Subtitle url: %s' % (video_id, url))
|
||||
captions = self._download_xml(url, video_id, 'Downloading subtitles')
|
||||
@ -170,7 +169,10 @@ def _extract_captions(self, subtitlesurl, video_id, baseurl):
|
||||
endtime = self._seconds2str(begin + duration)
|
||||
text = '\n'.join(p.itertext())
|
||||
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), starttime, endtime, text)
|
||||
return {lang: srt}
|
||||
return {lang: [
|
||||
{'ext': 'ttml', 'url': url},
|
||||
{'ext': 'srt', 'data': srt},
|
||||
]}
|
||||
|
||||
def _extract_f4m(self, manifest_url, video_id):
|
||||
return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id)
|
||||
@ -243,10 +245,7 @@ def _real_extract(self, url):
|
||||
webpage, 'subtitle URL', default=None)
|
||||
subtitles = None
|
||||
if subtitles_url:
|
||||
subtitles = self._extract_captions(subtitles_url, video_id, baseurl)
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, subtitles)
|
||||
return
|
||||
subtitles = self.extract_subtitles(subtitles_url, video_id, baseurl)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
import re
|
||||
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
)
|
||||
@ -12,7 +12,7 @@
|
||||
)
|
||||
|
||||
|
||||
class RaiIE(SubtitlesInfoExtractor):
|
||||
class RaiIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)'
|
||||
_TESTS = [
|
||||
{
|
||||
@ -89,15 +89,7 @@ def _real_extract(self, url):
|
||||
'ext': 'mp4',
|
||||
})
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
page = self._download_webpage(url, video_id)
|
||||
self._list_available_subtitles(video_id, page)
|
||||
return
|
||||
|
||||
subtitles = {}
|
||||
if self._have_to_download_any_subtitles:
|
||||
page = self._download_webpage(url, video_id)
|
||||
subtitles = self.extract_subtitles(video_id, page)
|
||||
subtitles = self.extract_subtitles(video_id, url)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@ -111,7 +103,8 @@ def _real_extract(self, url):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _get_available_subtitles(self, video_id, webpage):
|
||||
def _get_subtitles(self, video_id, url):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
subtitles = {}
|
||||
m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage)
|
||||
if m:
|
||||
@ -120,5 +113,8 @@ def _get_available_subtitles(self, video_id, webpage):
|
||||
SRT_EXT = '.srt'
|
||||
if captions.endswith(STL_EXT):
|
||||
captions = captions[:-len(STL_EXT)] + SRT_EXT
|
||||
subtitles['it'] = 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions)
|
||||
subtitles['it'] = [{
|
||||
'ext': 'srt',
|
||||
'url': 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions),
|
||||
}]
|
||||
return subtitles
|
||||
|
@ -1,99 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class SubtitlesInfoExtractor(InfoExtractor):
|
||||
@property
|
||||
def _have_to_download_any_subtitles(self):
|
||||
return any([self._downloader.params.get('writesubtitles', False),
|
||||
self._downloader.params.get('writeautomaticsub')])
|
||||
|
||||
def _list_available_subtitles(self, video_id, webpage):
|
||||
""" outputs the available subtitles for the video """
|
||||
sub_lang_list = self._get_available_subtitles(video_id, webpage)
|
||||
auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
|
||||
sub_lang = ",".join(list(sub_lang_list.keys()))
|
||||
self.to_screen('%s: Available subtitles for video: %s' %
|
||||
(video_id, sub_lang))
|
||||
auto_lang = ",".join(auto_captions_list.keys())
|
||||
self.to_screen('%s: Available automatic captions for video: %s' %
|
||||
(video_id, auto_lang))
|
||||
|
||||
def extract_subtitles(self, video_id, webpage):
|
||||
"""
|
||||
returns {sub_lang: sub} ,{} if subtitles not found or None if the
|
||||
subtitles aren't requested.
|
||||
"""
|
||||
if not self._have_to_download_any_subtitles:
|
||||
return None
|
||||
available_subs_list = {}
|
||||
if self._downloader.params.get('writeautomaticsub', False):
|
||||
available_subs_list.update(self._get_available_automatic_caption(video_id, webpage))
|
||||
if self._downloader.params.get('writesubtitles', False):
|
||||
available_subs_list.update(self._get_available_subtitles(video_id, webpage))
|
||||
|
||||
if not available_subs_list: # error, it didn't get the available subtitles
|
||||
return {}
|
||||
if self._downloader.params.get('allsubtitles', False):
|
||||
sub_lang_list = available_subs_list
|
||||
else:
|
||||
if self._downloader.params.get('subtitleslangs', False):
|
||||
requested_langs = self._downloader.params.get('subtitleslangs')
|
||||
elif 'en' in available_subs_list:
|
||||
requested_langs = ['en']
|
||||
else:
|
||||
requested_langs = [list(available_subs_list.keys())[0]]
|
||||
|
||||
sub_lang_list = {}
|
||||
for sub_lang in requested_langs:
|
||||
if sub_lang not in available_subs_list:
|
||||
self._downloader.report_warning('no closed captions found in the specified language "%s"' % sub_lang)
|
||||
continue
|
||||
sub_lang_list[sub_lang] = available_subs_list[sub_lang]
|
||||
|
||||
subtitles = {}
|
||||
for sub_lang, url in sub_lang_list.items():
|
||||
subtitle = self._request_subtitle_url(sub_lang, url)
|
||||
if subtitle:
|
||||
subtitles[sub_lang] = subtitle
|
||||
return subtitles
|
||||
|
||||
def _download_subtitle_url(self, sub_lang, url):
|
||||
return self._download_webpage(url, None, note=False)
|
||||
|
||||
def _request_subtitle_url(self, sub_lang, url):
|
||||
""" makes the http request for the subtitle """
|
||||
try:
|
||||
sub = self._download_subtitle_url(sub_lang, url)
|
||||
except ExtractorError as err:
|
||||
self._downloader.report_warning('unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
|
||||
return
|
||||
if not sub:
|
||||
self._downloader.report_warning('Did not fetch video subtitles')
|
||||
return
|
||||
return sub
|
||||
|
||||
def _get_available_subtitles(self, video_id, webpage):
|
||||
"""
|
||||
returns {sub_lang: url} or {} if not available
|
||||
Must be redefined by the subclasses
|
||||
"""
|
||||
|
||||
# By default, allow implementations to simply pass in the result
|
||||
assert isinstance(webpage, dict), \
|
||||
'_get_available_subtitles not implemented'
|
||||
return webpage
|
||||
|
||||
def _get_available_automatic_caption(self, video_id, webpage):
|
||||
"""
|
||||
returns {sub_lang: url} or {} if not available
|
||||
Must be redefined by the subclasses that support automatic captions,
|
||||
otherwise it will return {}
|
||||
"""
|
||||
self._downloader.report_warning('Automatic Captions not supported by this server')
|
||||
return {}
|
@ -3,14 +3,14 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
)
|
||||
|
||||
|
||||
class TEDIE(SubtitlesInfoExtractor):
|
||||
class TEDIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?P<proto>https?://)
|
||||
(?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/
|
||||
@ -184,11 +184,6 @@ def _talk_info(self, url, video_name):
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_id = compat_str(talk_info['id'])
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(video_id, talk_info)
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, talk_info)
|
||||
return
|
||||
|
||||
thumbnail = talk_info['thumb']
|
||||
if not thumbnail.startswith('http'):
|
||||
@ -199,21 +194,25 @@ def _talk_info(self, url, video_name):
|
||||
'uploader': talk_info['speaker'],
|
||||
'thumbnail': thumbnail,
|
||||
'description': self._og_search_description(webpage),
|
||||
'subtitles': video_subtitles,
|
||||
'subtitles': self._get_subtitles(video_id, talk_info),
|
||||
'formats': formats,
|
||||
'duration': talk_info.get('duration'),
|
||||
}
|
||||
|
||||
def _get_available_subtitles(self, video_id, talk_info):
|
||||
def _get_subtitles(self, video_id, talk_info):
|
||||
languages = [lang['languageCode'] for lang in talk_info.get('languages', [])]
|
||||
if languages:
|
||||
sub_lang_list = {}
|
||||
for l in languages:
|
||||
url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
|
||||
sub_lang_list[l] = url
|
||||
sub_lang_list[l] = [
|
||||
{
|
||||
'url': 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/%s' % (video_id, l, ext),
|
||||
'ext': ext,
|
||||
}
|
||||
for ext in ['ted', 'srt']
|
||||
]
|
||||
return sub_lang_list
|
||||
else:
|
||||
self._downloader.report_warning('video doesn\'t have subtitles')
|
||||
return {}
|
||||
|
||||
def _watch_info(self, url, name):
|
||||
|
@ -8,7 +8,7 @@
|
||||
import hashlib
|
||||
|
||||
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
)
|
||||
@ -22,7 +22,7 @@
|
||||
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
|
||||
|
||||
|
||||
class ThePlatformIE(SubtitlesInfoExtractor):
|
||||
class ThePlatformIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
|
||||
(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
|
||||
@ -106,15 +106,11 @@ def _real_extract(self, url):
|
||||
captions = info.get('captions')
|
||||
if isinstance(captions, list):
|
||||
for caption in captions:
|
||||
lang, src = caption.get('lang'), caption.get('src')
|
||||
if lang and src:
|
||||
subtitles[lang] = src
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, subtitles)
|
||||
return
|
||||
|
||||
subtitles = self.extract_subtitles(video_id, subtitles)
|
||||
lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
|
||||
subtitles[lang] = [{
|
||||
'ext': 'srt' if mime == 'text/srt' else 'ttml',
|
||||
'url': src,
|
||||
}]
|
||||
|
||||
head = meta.find(_x('smil:head'))
|
||||
body = meta.find(_x('smil:body'))
|
||||
|
@ -2,16 +2,17 @@
|
||||
|
||||
import re
|
||||
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
US_RATINGS,
|
||||
)
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class VikiIE(SubtitlesInfoExtractor):
|
||||
class VikiIE(InfoExtractor):
|
||||
IE_NAME = 'viki'
|
||||
|
||||
_VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
|
||||
@ -69,9 +70,6 @@ def _real_extract(self, url):
|
||||
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(video_id, info_webpage)
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, info_webpage)
|
||||
return
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@ -85,12 +83,15 @@ def _real_extract(self, url):
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
def _get_available_subtitles(self, video_id, info_webpage):
|
||||
def _get_subtitles(self, video_id, info_webpage):
|
||||
res = {}
|
||||
for sturl_html in re.findall(r'<track src="([^"]+)"/>', info_webpage):
|
||||
for sturl_html in re.findall(r'<track src="([^"]+)"', info_webpage):
|
||||
sturl = unescapeHTML(sturl_html)
|
||||
m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
|
||||
if not m:
|
||||
continue
|
||||
res[m.group('lang')] = sturl
|
||||
res[m.group('lang')] = [{
|
||||
'url': compat_urlparse.urljoin('http://www.viki.com', sturl),
|
||||
'ext': 'vtt',
|
||||
}]
|
||||
return res
|
||||
|
@ -7,7 +7,6 @@
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_urllib_parse,
|
||||
@ -53,7 +52,7 @@ def _login(self):
|
||||
self._download_webpage(login_request, None, False, 'Wrong login info')
|
||||
|
||||
|
||||
class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
class VimeoIE(VimeoBaseInfoExtractor):
|
||||
"""Information extractor for vimeo.com."""
|
||||
|
||||
# _VALID_URL matches Vimeo URLs
|
||||
@ -378,12 +377,10 @@ def _real_extract(self, url):
|
||||
text_tracks = config['request'].get('text_tracks')
|
||||
if text_tracks:
|
||||
for tt in text_tracks:
|
||||
subtitles[tt['lang']] = 'http://vimeo.com' + tt['url']
|
||||
|
||||
video_subtitles = self.extract_subtitles(video_id, subtitles)
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, subtitles)
|
||||
return
|
||||
subtitles[tt['lang']] = [{
|
||||
'ext': 'vtt',
|
||||
'url': 'http://vimeo.com' + tt['url'],
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@ -399,7 +396,7 @@ def _real_extract(self, url):
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'comment_count': comment_count,
|
||||
'subtitles': video_subtitles,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
|
@ -3,14 +3,14 @@
|
||||
|
||||
import re
|
||||
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class WallaIE(SubtitlesInfoExtractor):
|
||||
class WallaIE(InfoExtractor):
|
||||
_VALID_URL = r'http://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)'
|
||||
_TEST = {
|
||||
'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one',
|
||||
@ -52,13 +52,10 @@ def _real_extract(self, url):
|
||||
subtitles = {}
|
||||
for subtitle in item.findall('./subtitles/subtitle'):
|
||||
lang = xpath_text(subtitle, './title')
|
||||
subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = xpath_text(subtitle, './src')
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, subtitles)
|
||||
return
|
||||
|
||||
subtitles = self.extract_subtitles(video_id, subtitles)
|
||||
subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
|
||||
'ext': 'srt',
|
||||
'url': xpath_text(subtitle, './src'),
|
||||
}]
|
||||
|
||||
formats = []
|
||||
for quality in item.findall('./qualities/quality'):
|
||||
|
@ -11,7 +11,6 @@
|
||||
import traceback
|
||||
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..jsinterp import JSInterpreter
|
||||
from ..swfinterp import SWFInterpreter
|
||||
from ..compat import (
|
||||
@ -185,7 +184,7 @@ def _real_initialize(self):
|
||||
return
|
||||
|
||||
|
||||
class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube.com'
|
||||
_VALID_URL = r"""(?x)^
|
||||
(
|
||||
@ -648,7 +647,7 @@ def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
|
||||
raise ExtractorError(
|
||||
'Signature extraction failed: ' + tb, cause=e)
|
||||
|
||||
def _get_available_subtitles(self, video_id, webpage):
|
||||
def _get_subtitles(self, video_id, webpage):
|
||||
try:
|
||||
subs_doc = self._download_xml(
|
||||
'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
|
||||
@ -662,23 +661,27 @@ def _get_available_subtitles(self, video_id, webpage):
|
||||
lang = track.attrib['lang_code']
|
||||
if lang in sub_lang_list:
|
||||
continue
|
||||
params = compat_urllib_parse.urlencode({
|
||||
'lang': lang,
|
||||
'v': video_id,
|
||||
'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
|
||||
'name': track.attrib['name'].encode('utf-8'),
|
||||
})
|
||||
url = 'https://www.youtube.com/api/timedtext?' + params
|
||||
sub_lang_list[lang] = url
|
||||
sub_formats = []
|
||||
for ext in ['sbv', 'vtt', 'srt']:
|
||||
params = compat_urllib_parse.urlencode({
|
||||
'lang': lang,
|
||||
'v': video_id,
|
||||
'fmt': ext,
|
||||
'name': track.attrib['name'].encode('utf-8'),
|
||||
})
|
||||
sub_formats.append({
|
||||
'url': 'https://www.youtube.com/api/timedtext?' + params,
|
||||
'ext': ext,
|
||||
})
|
||||
sub_lang_list[lang] = sub_formats
|
||||
if not sub_lang_list:
|
||||
self._downloader.report_warning('video doesn\'t have subtitles')
|
||||
return {}
|
||||
return sub_lang_list
|
||||
|
||||
def _get_available_automatic_caption(self, video_id, webpage):
|
||||
def _get_automatic_captions(self, video_id, webpage):
|
||||
"""We need the webpage for getting the captions url, pass it as an
|
||||
argument to speed up the process."""
|
||||
sub_format = self._downloader.params.get('subtitlesformat', 'srt')
|
||||
self.to_screen('%s: Looking for automatic captions' % video_id)
|
||||
mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
|
||||
err_msg = 'Couldn\'t find automatic captions for %s' % video_id
|
||||
@ -708,14 +711,20 @@ def _get_available_automatic_caption(self, video_id, webpage):
|
||||
sub_lang_list = {}
|
||||
for lang_node in caption_list.findall('target'):
|
||||
sub_lang = lang_node.attrib['lang_code']
|
||||
params = compat_urllib_parse.urlencode({
|
||||
'lang': original_lang,
|
||||
'tlang': sub_lang,
|
||||
'fmt': sub_format,
|
||||
'ts': timestamp,
|
||||
'kind': caption_kind,
|
||||
})
|
||||
sub_lang_list[sub_lang] = caption_url + '&' + params
|
||||
sub_formats = []
|
||||
for ext in ['sbv', 'vtt', 'srt']:
|
||||
params = compat_urllib_parse.urlencode({
|
||||
'lang': original_lang,
|
||||
'tlang': sub_lang,
|
||||
'fmt': ext,
|
||||
'ts': timestamp,
|
||||
'kind': caption_kind,
|
||||
})
|
||||
sub_formats.append({
|
||||
'url': caption_url + '&' + params,
|
||||
'ext': ext,
|
||||
})
|
||||
sub_lang_list[sub_lang] = sub_formats
|
||||
return sub_lang_list
|
||||
# An extractor error can be raise by the download process if there are
|
||||
# no automatic captions but there are subtitles
|
||||
@ -970,10 +979,7 @@ def _extract_count(count_name):
|
||||
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(video_id, video_webpage)
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, video_webpage)
|
||||
return
|
||||
automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
|
||||
|
||||
if 'length_seconds' not in video_info:
|
||||
self._downloader.report_warning('unable to extract video duration')
|
||||
@ -1122,6 +1128,7 @@ def _map_to_format_list(urlmap):
|
||||
'description': video_description,
|
||||
'categories': video_categories,
|
||||
'subtitles': video_subtitles,
|
||||
'automatic_captions': automatic_captions,
|
||||
'duration': video_duration,
|
||||
'age_limit': 18 if age_gate else 0,
|
||||
'annotations': video_annotations,
|
||||
|
@ -387,8 +387,8 @@ def _hide_login_info(opts):
|
||||
help='lists all available subtitles for the video')
|
||||
subtitles.add_option(
|
||||
'--sub-format',
|
||||
action='store', dest='subtitlesformat', metavar='FORMAT', default='srt',
|
||||
help='subtitle format (default=srt) ([sbv/vtt] youtube only)')
|
||||
action='store', dest='subtitlesformat', metavar='FORMAT', default='best',
|
||||
help='subtitle format, accepts formats preference, for example: "ass/srt/best"')
|
||||
subtitles.add_option(
|
||||
'--sub-lang', '--sub-langs', '--srt-lang',
|
||||
action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
|
||||
|
@ -496,10 +496,6 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
|
||||
'zu': 'zul',
|
||||
}
|
||||
|
||||
def __init__(self, downloader=None, subtitlesformat='srt'):
|
||||
super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
|
||||
self._subformat = subtitlesformat
|
||||
|
||||
@classmethod
|
||||
def _conver_lang_code(cls, code):
|
||||
"""Convert language code from ISO 639-1 to ISO 639-2/T"""
|
||||
@ -509,13 +505,14 @@ def run(self, information):
|
||||
if information['ext'] != 'mp4':
|
||||
self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 files')
|
||||
return True, information
|
||||
if not information.get('subtitles'):
|
||||
subtitles = information.get('requested_subtitles')
|
||||
if not subtitles:
|
||||
self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed')
|
||||
return True, information
|
||||
|
||||
sub_langs = [key for key in information['subtitles']]
|
||||
sub_langs = list(subtitles.keys())
|
||||
filename = information['filepath']
|
||||
input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
|
||||
input_files = [filename] + [subtitles_filename(filename, lang, sub_info['ext']) for lang, sub_info in subtitles.items()]
|
||||
|
||||
opts = [
|
||||
'-map', '0',
|
||||
|
Loading…
Reference in New Issue
Block a user