mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-12-01 00:52:58 +01:00
[crunchycroll] Fix building of ass subtitles (reported in #4019)
Parse the xml document instead of using regexes, otherwise unicode characters are left unescaped.
This commit is contained in:
parent
ac645ac7d0
commit
d65d628613
@ -109,19 +109,17 @@ def next_value(self):
|
|||||||
decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
|
decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
|
||||||
return zlib.decompress(decrypted_data)
|
return zlib.decompress(decrypted_data)
|
||||||
|
|
||||||
def _convert_subtitles_to_srt(self, subtitles):
|
def _convert_subtitles_to_srt(self, sub_root):
|
||||||
output = ''
|
output = ''
|
||||||
for i, (start, end, text) in enumerate(re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles), 1):
|
|
||||||
start = start.replace('.', ',')
|
for i, event in enumerate(sub_root.findall('./events/event'), 1):
|
||||||
end = end.replace('.', ',')
|
start = event.attrib['start'].replace('.', ',')
|
||||||
text = clean_html(text)
|
end = event.attrib['end'].replace('.', ',')
|
||||||
text = text.replace('\\N', '\n')
|
text = event.attrib['text'].replace('\\N', '\n')
|
||||||
if not text:
|
|
||||||
continue
|
|
||||||
output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
|
output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
|
||||||
return output
|
return output
|
||||||
|
|
||||||
def _convert_subtitles_to_ass(self, subtitles):
|
def _convert_subtitles_to_ass(self, sub_root):
|
||||||
output = ''
|
output = ''
|
||||||
|
|
||||||
def ass_bool(strvalue):
|
def ass_bool(strvalue):
|
||||||
@ -130,10 +128,6 @@ def ass_bool(strvalue):
|
|||||||
assvalue = '-1'
|
assvalue = '-1'
|
||||||
return assvalue
|
return assvalue
|
||||||
|
|
||||||
sub_root = xml.etree.ElementTree.fromstring(subtitles)
|
|
||||||
if not sub_root:
|
|
||||||
return output
|
|
||||||
|
|
||||||
output = '[Script Info]\n'
|
output = '[Script Info]\n'
|
||||||
output += 'Title: %s\n' % sub_root.attrib["title"]
|
output += 'Title: %s\n' % sub_root.attrib["title"]
|
||||||
output += 'ScriptType: v4.00+\n'
|
output += 'ScriptType: v4.00+\n'
|
||||||
@ -270,10 +264,13 @@ def _real_extract(self,url):
|
|||||||
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
|
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
|
||||||
if not lang_code:
|
if not lang_code:
|
||||||
continue
|
continue
|
||||||
|
sub_root = xml.etree.ElementTree.fromstring(subtitle)
|
||||||
|
if not sub_root:
|
||||||
|
subtitles[lang_code] = ''
|
||||||
if sub_format == 'ass':
|
if sub_format == 'ass':
|
||||||
subtitles[lang_code] = self._convert_subtitles_to_ass(subtitle)
|
subtitles[lang_code] = self._convert_subtitles_to_ass(sub_root)
|
||||||
else:
|
else:
|
||||||
subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
|
subtitles[lang_code] = self._convert_subtitles_to_srt(sub_root)
|
||||||
|
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
self._list_available_subtitles(video_id, subtitles)
|
self._list_available_subtitles(video_id, subtitles)
|
||||||
|
Loading…
Reference in New Issue
Block a user