mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-12-25 12:45:51 +01:00
parent
672f1bd849
commit
bf6427d2fb
@ -58,6 +58,8 @@
|
|||||||
xpath_text,
|
xpath_text,
|
||||||
render_table,
|
render_table,
|
||||||
match_str,
|
match_str,
|
||||||
|
parse_dfxp_time_expr,
|
||||||
|
dfxp2srt,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -581,6 +583,42 @@ def test_match_str(self):
|
|||||||
'like_count > 100 & dislike_count <? 50 & description',
|
'like_count > 100 & dislike_count <? 50 & description',
|
||||||
{'like_count': 190, 'dislike_count': 10}))
|
{'like_count': 190, 'dislike_count': 10}))
|
||||||
|
|
||||||
|
def test_parse_dfxp_time_expr(self):
|
||||||
|
self.assertEqual(parse_dfxp_time_expr(None), 0.0)
|
||||||
|
self.assertEqual(parse_dfxp_time_expr(''), 0.0)
|
||||||
|
self.assertEqual(parse_dfxp_time_expr('0.1'), 0.1)
|
||||||
|
self.assertEqual(parse_dfxp_time_expr('0.1s'), 0.1)
|
||||||
|
self.assertEqual(parse_dfxp_time_expr('00:00:01'), 1.0)
|
||||||
|
self.assertEqual(parse_dfxp_time_expr('00:00:01.100'), 1.1)
|
||||||
|
|
||||||
|
def test_dfxp2srt(self):
|
||||||
|
dfxp_data = '''<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
|
||||||
|
<body>
|
||||||
|
<div xml:lang="en">
|
||||||
|
<p begin="0" end="1">The following line contains Chinese characters and special symbols</p>
|
||||||
|
<p begin="1" end="2">第二行<br/>♪♪</p>
|
||||||
|
<p begin="2" end="3"><span>Third<br/>Line</span></p>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</tt>'''
|
||||||
|
srt_data = '''1
|
||||||
|
00:00:00,000 --> 00:00:01,000
|
||||||
|
The following line contains Chinese characters and special symbols
|
||||||
|
|
||||||
|
2
|
||||||
|
00:00:01,000 --> 00:00:02,000
|
||||||
|
第二行
|
||||||
|
♪♪
|
||||||
|
|
||||||
|
3
|
||||||
|
00:00:02,000 --> 00:00:03,000
|
||||||
|
Third
|
||||||
|
Line
|
||||||
|
|
||||||
|
'''
|
||||||
|
self.assertEqual(dfxp2srt(dfxp_data), srt_data)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
prepend_extension,
|
prepend_extension,
|
||||||
shell_quote,
|
shell_quote,
|
||||||
subtitles_filename,
|
subtitles_filename,
|
||||||
|
dfxp2srt,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -651,6 +652,30 @@ def run(self, info):
|
|||||||
'format' % new_ext)
|
'format' % new_ext)
|
||||||
continue
|
continue
|
||||||
new_file = subtitles_filename(filename, lang, new_ext)
|
new_file = subtitles_filename(filename, lang, new_ext)
|
||||||
|
|
||||||
|
if ext == 'dfxp' or ext == 'ttml':
|
||||||
|
self._downloader.report_warning(
|
||||||
|
'You have requested to convert dfxp (TTML) subtitles into another format, '
|
||||||
|
'which results in style information loss')
|
||||||
|
|
||||||
|
dfxp_file = subtitles_filename(filename, lang, ext)
|
||||||
|
srt_file = subtitles_filename(filename, lang, 'srt')
|
||||||
|
|
||||||
|
with io.open(dfxp_file, 'rt', encoding='utf-8') as f:
|
||||||
|
srt_data = dfxp2srt(f.read())
|
||||||
|
|
||||||
|
with io.open(srt_file, 'wt', encoding='utf-8') as f:
|
||||||
|
f.write(srt_data)
|
||||||
|
|
||||||
|
ext = 'srt'
|
||||||
|
subs[lang] = {
|
||||||
|
'ext': 'srt',
|
||||||
|
'data': srt_data
|
||||||
|
}
|
||||||
|
|
||||||
|
if new_ext == 'srt':
|
||||||
|
continue
|
||||||
|
|
||||||
self.run_ffmpeg(
|
self.run_ffmpeg(
|
||||||
subtitles_filename(filename, lang, ext),
|
subtitles_filename(filename, lang, ext),
|
||||||
new_file, ['-f', new_format])
|
new_file, ['-f', new_format])
|
||||||
|
@ -1800,6 +1800,59 @@ def _match_func(info_dict):
|
|||||||
return _match_func
|
return _match_func
|
||||||
|
|
||||||
|
|
||||||
|
def parse_dfxp_time_expr(time_expr):
|
||||||
|
if not time_expr:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
|
||||||
|
if mobj:
|
||||||
|
return float(mobj.group('time_offset'))
|
||||||
|
|
||||||
|
mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:\.\d+)?)$', time_expr)
|
||||||
|
if mobj:
|
||||||
|
return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3))
|
||||||
|
|
||||||
|
|
||||||
|
def format_srt_time(seconds):
|
||||||
|
(mins, secs) = divmod(seconds, 60)
|
||||||
|
(hours, mins) = divmod(mins, 60)
|
||||||
|
millisecs = (secs - int(secs)) * 1000
|
||||||
|
secs = int(secs)
|
||||||
|
return '%02d:%02d:%02d,%03d' % (hours, mins, secs, millisecs)
|
||||||
|
|
||||||
|
|
||||||
|
def dfxp2srt(dfxp_data):
|
||||||
|
_x = functools.partial(xpath_with_ns, ns_map={'ttml': 'http://www.w3.org/ns/ttml'})
|
||||||
|
|
||||||
|
def parse_node(node):
|
||||||
|
str_or_empty = functools.partial(str_or_none, default='')
|
||||||
|
|
||||||
|
out = str_or_empty(node.text)
|
||||||
|
|
||||||
|
for child in node:
|
||||||
|
if child.tag == _x('ttml:br'):
|
||||||
|
out += '\n' + str_or_empty(child.tail)
|
||||||
|
elif child.tag == _x('ttml:span'):
|
||||||
|
out += str_or_empty(parse_node(child))
|
||||||
|
else:
|
||||||
|
out += str_or_empty(xml.etree.ElementTree.tostring(child))
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
|
||||||
|
out = []
|
||||||
|
paras = dfxp.findall(_x('.//ttml:p'))
|
||||||
|
|
||||||
|
for para, index in zip(paras, itertools.count(1)):
|
||||||
|
out.append('%d\n%s --> %s\n%s\n\n' % (
|
||||||
|
index,
|
||||||
|
format_srt_time(parse_dfxp_time_expr(para.attrib.get('begin'))),
|
||||||
|
format_srt_time(parse_dfxp_time_expr(para.attrib.get('end'))),
|
||||||
|
parse_node(para)))
|
||||||
|
|
||||||
|
return ''.join(out)
|
||||||
|
|
||||||
|
|
||||||
class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
|
class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
|
||||||
def __init__(self, proxies=None):
|
def __init__(self, proxies=None):
|
||||||
# Set default handlers
|
# Set default handlers
|
||||||
|
Loading…
Reference in New Issue
Block a user