From efdc45a6ea1dad1000d0478928cd4576975b9b3f Mon Sep 17 00:00:00 2001 From: MMM Date: Wed, 9 Nov 2022 10:05:08 +0100 Subject: [PATCH] [extractor/bitchute] Better error for geo-restricted videos (#5474) Authored by: flashdagger --- yt_dlp/extractor/bitchute.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/yt_dlp/extractor/bitchute.py b/yt_dlp/extractor/bitchute.py index f4b6a9a0e..9e3d6337a 100644 --- a/yt_dlp/extractor/bitchute.py +++ b/yt_dlp/extractor/bitchute.py @@ -8,6 +8,7 @@ OnDemandPagedList, clean_html, get_element_by_class, + get_element_by_id, get_elements_html_by_class, int_or_none, orderedSet, @@ -49,6 +50,16 @@ class BitChuteIE(InfoExtractor): 'upload_date': '20181113', }, 'params': {'check_formats': None}, + }, { + # restricted video + 'url': 'https://www.bitchute.com/video/WEnQU7XGcTdl/', + 'info_dict': { + 'id': 'WEnQU7XGcTdl', + 'ext': 'mp4', + 'title': 'Impartial Truth - Ein Letzter Appell an die Vernunft', + }, + 'params': {'skip_download': True}, + 'skip': 'Georestricted in DE', }, { 'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/', 'only_matching': True, @@ -56,6 +67,7 @@ class BitChuteIE(InfoExtractor): 'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent', 'only_matching': True, }] + _GEO_BYPASS = False _HEADERS = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36', @@ -78,11 +90,18 @@ def _check_format(self, video_url, video_id): 'filesize': int_or_none(response.headers.get('Content-Length')) } + def _raise_if_restricted(self, webpage): + page_title = clean_html(get_element_by_class('page-title', webpage)) or '' + if re.fullmatch(r'(?:Channel|Video) Restricted', page_title): + reason = clean_html(get_element_by_id('page-detail', webpage)) or page_title + self.raise_geo_restricted(reason) + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( f'https://www.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS) + self._raise_if_restricted(webpage) publish_date = clean_html(get_element_by_class('video-publish-date', webpage)) entries = self._parse_html5_media_entries(url, webpage, video_id)