1
1
mirror of https://github.com/ytdl-org/youtube-dl synced 2024-11-27 15:16:57 +01:00

[utils] Support ttaf1 namespace in TTML

It's found in bbc.co.uk. See #6038
This commit is contained in:
Yen Chi Hsuan 2015-06-21 19:16:59 +08:00
parent 607841af64
commit 4e33577173

View File

@ -1841,7 +1841,10 @@ def srt_subtitles_timecode(seconds):
def dfxp2srt(dfxp_data): def dfxp2srt(dfxp_data):
_x = functools.partial(xpath_with_ns, ns_map={'ttml': 'http://www.w3.org/ns/ttml'}) _x = functools.partial(xpath_with_ns, ns_map={
'ttml': 'http://www.w3.org/ns/ttml',
'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
})
def parse_node(node): def parse_node(node):
str_or_empty = functools.partial(str_or_none, default='') str_or_empty = functools.partial(str_or_none, default='')
@ -1849,9 +1852,9 @@ def dfxp2srt(dfxp_data):
out = str_or_empty(node.text) out = str_or_empty(node.text)
for child in node: for child in node:
if child.tag in (_x('ttml:br'), 'br'): if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
out += '\n' + str_or_empty(child.tail) out += '\n' + str_or_empty(child.tail)
elif child.tag in (_x('ttml:span'), 'span'): elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'):
out += str_or_empty(parse_node(child)) out += str_or_empty(parse_node(child))
else: else:
out += str_or_empty(xml.etree.ElementTree.tostring(child)) out += str_or_empty(xml.etree.ElementTree.tostring(child))
@ -1860,7 +1863,7 @@ def dfxp2srt(dfxp_data):
dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8')) dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
out = [] out = []
paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p') paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')
if not paras: if not paras:
raise ValueError('Invalid dfxp/TTML subtitle') raise ValueError('Invalid dfxp/TTML subtitle')