[WebVTT] Adjust parser to accommodate PBS subtitles (#922)

Closes #921
This commit is contained in:
pukkandan 2021-09-08 16:10:10 +05:30 committed by GitHub
parent eab3f867e2
commit 81a136b80f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -89,8 +89,12 @@ def __init__(self, parser):
))
# While the specification <https://www.w3.org/TR/webvtt1/#webvtt-timestamp>
# prescribes that hours must be *2 or more* digits, timestamps with a single
# digit for the hour part has been seen in the wild.
# See https://github.com/yt-dlp/yt-dlp/issues/921
_REGEX_TS = re.compile(r'''(?x)
(?:([0-9]{2,}):)?
(?:([0-9]{1,}):)?
([0-9]{2}):
([0-9]{2})\.
([0-9]{3})?
@ -172,6 +176,7 @@ class Magic(HeaderBlock):
_REGEX_TSMAP = re.compile(r'X-TIMESTAMP-MAP=')
_REGEX_TSMAP_LOCAL = re.compile(r'LOCAL:')
_REGEX_TSMAP_MPEGTS = re.compile(r'MPEGTS:([0-9]+)')
_REGEX_TSMAP_SEP = re.compile(r'[ \t]*,[ \t]*')
@classmethod
def __parse_tsmap(cls, parser):
@ -194,7 +199,7 @@ def __parse_tsmap(cls, parser):
raise ParseError(parser)
else:
raise ParseError(parser)
if parser.consume(','):
if parser.consume(cls._REGEX_TSMAP_SEP):
continue
if parser.consume(_REGEX_NL):
break