[youtube] Fix auto-translated automatic captions

d49669acad71f640ffd8b78f0ea7911ae1f67720 only covered ASR

Closes #2956
This commit is contained in:
pukkandan 2022-03-27 14:06:26 +05:30
parent 18e4940825
commit 1235d333ab
No known key found for this signature in database
GPG Key ID: 7EEE9E1E817D0A39

View File

@ -3479,6 +3479,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
subtitles, automatic_captions = {}, {} subtitles, automatic_captions = {}, {}
for lang_code, caption_track in captions.items(): for lang_code, caption_track in captions.items():
base_url = caption_track.get('baseUrl') base_url = caption_track.get('baseUrl')
orig_lang = parse_qs(base_url).get('lang', [None])[-1]
if not base_url: if not base_url:
continue continue
lang_name = self._get_text(caption_track, 'name', max_runs=1) lang_name = self._get_text(caption_track, 'name', max_runs=1)
@ -3492,6 +3493,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
for trans_code, trans_name in translation_languages.items(): for trans_code, trans_name in translation_languages.items():
if not trans_code: if not trans_code:
continue continue
orig_trans_code = trans_code
if caption_track.get('kind') != 'asr': if caption_track.get('kind') != 'asr':
if 'translated_subs' in self._configuration_arg('skip'): if 'translated_subs' in self._configuration_arg('skip'):
continue continue
@ -3499,14 +3501,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
trans_name += format_field(lang_name, template=' from %s') trans_name += format_field(lang_name, template=' from %s')
# Add an "-orig" label to the original language so that it can be distinguished. # Add an "-orig" label to the original language so that it can be distinguished.
# The subs are returned without "-orig" as well for compatibility # The subs are returned without "-orig" as well for compatibility
if lang_code == f'a-{trans_code}': if lang_code == f'a-{orig_trans_code}':
process_language( process_language(
automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {}) automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
# Setting tlang=lang returns damaged subtitles. # Setting tlang=lang returns damaged subtitles.
# Not using lang_code == f'a-{trans_code}' here for future-proofing
orig_lang = parse_qs(base_url).get('lang', [None])[-1]
process_language(automatic_captions, base_url, trans_code, trans_name, process_language(automatic_captions, base_url, trans_code, trans_name,
{} if orig_lang == trans_code else {'tlang': trans_code}) {} if orig_lang == orig_trans_code else {'tlang': trans_code})
info['automatic_captions'] = automatic_captions info['automatic_captions'] = automatic_captions
info['subtitles'] = subtitles info['subtitles'] = subtitles