From c73fae1e2e4421df664aefd1d14a72596caf9e2f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 10 Feb 2015 01:39:43 +0100 Subject: [PATCH] [commonmistakes] Detect BOMs at the beginning of URLs Reported at https://bugzilla.redhat.com/show_bug.cgi?id=1093517 . --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/commonmistakes.py | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 55ca0d6e4..fb1e7f325 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -74,7 +74,7 @@ from .collegerama import CollegeRamaIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE from .comcarcoff import ComCarCoffIE -from .commonmistakes import CommonMistakesIE +from .commonmistakes import CommonMistakesIE, UnicodeBOMIE from .condenast import CondeNastIE from .cracked import CrackedIE from .criterion import CriterionIE diff --git a/youtube_dl/extractor/commonmistakes.py b/youtube_dl/extractor/commonmistakes.py index dbbf27a74..2f86e2381 100644 --- a/youtube_dl/extractor/commonmistakes.py +++ b/youtube_dl/extractor/commonmistakes.py @@ -27,3 +27,20 @@ def _real_extract(self, url): if not self._downloader.params.get('verbose'): msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.' raise ExtractorError(msg, expected=True) + + +class UnicodeBOMIE(InfoExtractor): + IE_DESC = False + _VALID_URL = r'(?P\ufeff)(?P.*)$' + + _TESTS = [{ + 'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc', + 'only_matching': True, + }] + + def _real_extract(self, url): + real_url = self._match_id(url) + self.report_warning( + 'Your URL starts with a Byte Order Mark (BOM). ' + 'Removing the BOM and looking for "%s" ...' % real_url) + return self.url_result(real_url)