From 4070c00ba77a1dabbb1ea7dc1fdae47b0bc90034 Mon Sep 17 00:00:00 2001 From: EGA-SUPREMO Date: Fri, 17 May 2024 09:09:46 -0400 Subject: [PATCH 1/8] add cablecasttv extrator --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/cablecasttv.py | 55 +++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 yt_dlp/extractor/cablecasttv.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 42034275b..c0c780103 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -289,6 +289,7 @@ from .byutv import BYUtvIE from .c56 import C56IE from .cableav import CableAVIE +from .cablecasttv import CableCastTVIE from .callin import CallinIE from .caltrans import CaltransIE from .cam4 import CAM4IE diff --git a/yt_dlp/extractor/cablecasttv.py b/yt_dlp/extractor/cablecasttv.py new file mode 100644 index 000000000..6f35d2e10 --- /dev/null +++ b/yt_dlp/extractor/cablecasttv.py @@ -0,0 +1,55 @@ +from .common import InfoExtractor + +import re + + +class CableCastTVIE(InfoExtractor): + _VALID_URL = r'https://wctv\.wilmette\.com/CablecastPublicSite/show/(?P\d+)' + _TESTS = [{ + 'url': 'https://wctv.wilmette.com/CablecastPublicSite/show/532', + 'md5': 'fc12bce4a9c1335f153500c8fea6e1a8', + 'info_dict': { + 'id': '532', + 'ext': 'mp4', + 'title': 'Village Board Meeting 4/24/24', + }, + }, { + 'url': 'https://fyptt.to/10382/beautiful-livestream-tits-and-nipples-slip-from-girls-who-loves-talking-with-their-viewers/', + 'only_matching': True, + }, { + 'url': 'https://fyptt.to/120/small-tits-fit-blonde-dancing-naked-at-the-front-door-on-tiktok', + 'only_matching': True, + }, { + 'url': 'https://fkbae.to/18', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + formats = [] + format_url = self._html_search_regex(r'"embedURL":"([^"]+)"', webpage, 'video URL') + format_url = re.sub(r'\\', '', format_url) + + webpage_video = self._download_webpage(format_url, video_id) + + match = re.search(r'(https:\/\/[^"]+\.mp4)', webpage_video) + format_url = match.group(1) + formats.append({ + 'url': format_url, + 'format_id': 'default', + }) + + title = self._html_search_regex(r'(.+?)', webpage, 'title') + +# base_url = re.search(r'^(https?://[a-zA-Z0-9_-]+\.to)', url).group(1) +# http_headers = {'Referer': base_url} + + return { + 'id': video_id, + 'title': title, + 'formats': formats, +# 'http_headers': http_headers + } + From 151b383fe1137e0de1d0308004a1446ad46a1ff9 Mon Sep 17 00:00:00 2001 From: EGA-SUPREMO Date: Sun, 19 May 2024 11:53:01 -0400 Subject: [PATCH 2/8] add regex for video webpage url --- yt_dlp/extractor/cablecasttv.py | 35 +++++++++++++-------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/yt_dlp/extractor/cablecasttv.py b/yt_dlp/extractor/cablecasttv.py index 6f35d2e10..0dd682844 100644 --- a/yt_dlp/extractor/cablecasttv.py +++ b/yt_dlp/extractor/cablecasttv.py @@ -13,35 +13,28 @@ class CableCastTVIE(InfoExtractor): 'ext': 'mp4', 'title': 'Village Board Meeting 4/24/24', }, - }, { - 'url': 'https://fyptt.to/10382/beautiful-livestream-tits-and-nipples-slip-from-girls-who-loves-talking-with-their-viewers/', - 'only_matching': True, - }, { - 'url': 'https://fyptt.to/120/small-tits-fit-blonde-dancing-naked-at-the-front-door-on-tiktok', - 'only_matching': True, - }, { - 'url': 'https://fkbae.to/18', - 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + with open('output.txt', 'w', encoding='utf-8') as file: + file.write(webpage) + + + url_video_webpage = self._html_search_regex(r']*class=\"trms-player\"[^>]*src=\"([^\"]+)\"', webpage, 'url_video_webpage') + webpage_video = self._download_webpage(url_video_webpage, video_id) + + video_url = self._html_search_regex(r'"([^\"]*\.m3u8)"', webpage_video, 'video URL') + print(video_url) + formats = [] - format_url = self._html_search_regex(r'"embedURL":"([^"]+)"', webpage, 'video URL') - format_url = re.sub(r'\\', '', format_url) + formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls')) + print(formats) + #format_url = re.sub(r'\\', '', format_url) - webpage_video = self._download_webpage(format_url, video_id) - - match = re.search(r'(https:\/\/[^"]+\.mp4)', webpage_video) - format_url = match.group(1) - formats.append({ - 'url': format_url, - 'format_id': 'default', - }) - - title = self._html_search_regex(r'(.+?)', webpage, 'title') + title = self._html_search_regex(r'(.+?)', webpage, 'title') # base_url = re.search(r'^(https?://[a-zA-Z0-9_-]+\.to)', url).group(1) # http_headers = {'Referer': base_url} From ec017102246f2321472ef18edaeb53e2bc343df6 Mon Sep 17 00:00:00 2001 From: EGA-SUPREMO Date: Sun, 19 May 2024 12:09:27 -0400 Subject: [PATCH 3/8] clean code --- yt_dlp/extractor/cablecasttv.py | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/yt_dlp/extractor/cablecasttv.py b/yt_dlp/extractor/cablecasttv.py index 0dd682844..cac8eef40 100644 --- a/yt_dlp/extractor/cablecasttv.py +++ b/yt_dlp/extractor/cablecasttv.py @@ -1,48 +1,36 @@ from .common import InfoExtractor -import re - class CableCastTVIE(InfoExtractor): _VALID_URL = r'https://wctv\.wilmette\.com/CablecastPublicSite/show/(?P\d+)' _TESTS = [{ 'url': 'https://wctv.wilmette.com/CablecastPublicSite/show/532', - 'md5': 'fc12bce4a9c1335f153500c8fea6e1a8', + 'md5': '17e7ed129582babf6d1ae5c3b9d70d18', 'info_dict': { 'id': '532', 'ext': 'mp4', 'title': 'Village Board Meeting 4/24/24', }, + }, { + 'url': 'https://wctv.wilmette.com/CablecastPublicSite/show/53/', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - with open('output.txt', 'w', encoding='utf-8') as file: - file.write(webpage) - - - url_video_webpage = self._html_search_regex(r']*class=\"trms-player\"[^>]*src=\"([^\"]+)\"', webpage, 'url_video_webpage') - webpage_video = self._download_webpage(url_video_webpage, video_id) - + url_video_page = self._html_search_regex(r']*class=\"trms-player\"[^>]*src=\"([^\"]+)\"', webpage, 'url_video_webpage') + webpage_video = self._download_webpage(url_video_page, video_id) video_url = self._html_search_regex(r'"([^\"]*\.m3u8)"', webpage_video, 'video URL') - print(video_url) formats = [] formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls')) - print(formats) - #format_url = re.sub(r'\\', '', format_url) - title = self._html_search_regex(r'(.+?)', webpage, 'title') - -# base_url = re.search(r'^(https?://[a-zA-Z0-9_-]+\.to)', url).group(1) -# http_headers = {'Referer': base_url} + title = self._og_search_title(webpage) or self._html_search_regex(r'(.+?)', webpage, 'title') return { 'id': video_id, 'title': title, 'formats': formats, -# 'http_headers': http_headers } - From 52de89b43c3b0e776459f1b7147d1e3fdada54f9 Mon Sep 17 00:00:00 2001 From: EGA-SUPREMO Date: Sun, 19 May 2024 12:14:46 -0400 Subject: [PATCH 4/8] clean code --- yt_dlp/extractor/cablecasttv.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/cablecasttv.py b/yt_dlp/extractor/cablecasttv.py index cac8eef40..b55ad5b6d 100644 --- a/yt_dlp/extractor/cablecasttv.py +++ b/yt_dlp/extractor/cablecasttv.py @@ -25,7 +25,9 @@ def _real_extract(self, url): video_url = self._html_search_regex(r'"([^\"]*\.m3u8)"', webpage_video, 'video URL') formats = [] - formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls')) + formats.extend(self._extract_m3u8_formats( + video_url, video_id, ext='mp4', m3u8_id='hls'), + note='Downloading HD m3u8 information', errnote='Unable to download HD m3u8 information') title = self._og_search_title(webpage) or self._html_search_regex(r'(.+?)', webpage, 'title') From f86f46a010dd3873453e72e11bd6f1c187a01068 Mon Sep 17 00:00:00 2001 From: EGA-SUPREMO Date: Sun, 19 May 2024 15:12:08 -0400 Subject: [PATCH 5/8] merge conflict --- yt_dlp/extractor/_extractors.py | 1 - 1 file changed, 1 deletion(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index c0c780103..d7a9ac60d 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -288,7 +288,6 @@ from .buzzfeed import BuzzFeedIE from .byutv import BYUtvIE from .c56 import C56IE -from .cableav import CableAVIE from .cablecasttv import CableCastTVIE from .callin import CallinIE from .caltrans import CaltransIE From c640231330c626208fc9579c4c2b978de24626ff Mon Sep 17 00:00:00 2001 From: EGA-SUPREMO Date: Sun, 19 May 2024 16:35:53 -0400 Subject: [PATCH 6/8] add support for subtitles --- yt_dlp/extractor/cablecasttv.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/cablecasttv.py b/yt_dlp/extractor/cablecasttv.py index b55ad5b6d..f3aeb9a6e 100644 --- a/yt_dlp/extractor/cablecasttv.py +++ b/yt_dlp/extractor/cablecasttv.py @@ -25,9 +25,11 @@ def _real_extract(self, url): video_url = self._html_search_regex(r'"([^\"]*\.m3u8)"', webpage_video, 'video URL') formats = [] - formats.extend(self._extract_m3u8_formats( - video_url, video_id, ext='mp4', m3u8_id='hls'), - note='Downloading HD m3u8 information', errnote='Unable to download HD m3u8 information') + subtitles = {} + + fmts, subs = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) title = self._og_search_title(webpage) or self._html_search_regex(r'(.+?)', webpage, 'title') @@ -35,4 +37,5 @@ def _real_extract(self, url): 'id': video_id, 'title': title, 'formats': formats, + 'subtitles': subtitles, } From 8ebdf57c9c3759cd37e0591b0062c0368154f40c Mon Sep 17 00:00:00 2001 From: EGA-SUPREMO Date: Sun, 19 May 2024 19:15:19 -0400 Subject: [PATCH 7/8] change regex to match most cableCastTV websites --- yt_dlp/extractor/cablecasttv.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/cablecasttv.py b/yt_dlp/extractor/cablecasttv.py index f3aeb9a6e..6c51c1014 100644 --- a/yt_dlp/extractor/cablecasttv.py +++ b/yt_dlp/extractor/cablecasttv.py @@ -2,7 +2,7 @@ class CableCastTVIE(InfoExtractor): - _VALID_URL = r'https://wctv\.wilmette\.com/CablecastPublicSite/show/(?P\d+)' + _VALID_URL = r'https:\/\/[a-zA-Z0-9-]+\.cablecast\.tv\/show\/(?P\d+)' _TESTS = [{ 'url': 'https://wctv.wilmette.com/CablecastPublicSite/show/532', 'md5': '17e7ed129582babf6d1ae5c3b9d70d18', @@ -11,6 +11,12 @@ class CableCastTVIE(InfoExtractor): 'ext': 'mp4', 'title': 'Village Board Meeting 4/24/24', }, + }, { + 'url': 'https://capitoltvri.cablecast.tv/show/9199?site=1', + 'only_matching': True, + }, { + 'url': 'https://king-county-tv.cablecast.tv/show/504', + 'only_matching': True, }, { 'url': 'https://wctv.wilmette.com/CablecastPublicSite/show/53/', 'only_matching': True, From 301d51e362f3eeaed2662727660d8b9448010f03 Mon Sep 17 00:00:00 2001 From: EGA-SUPREMO Date: Thu, 13 Jun 2024 07:37:10 -0400 Subject: [PATCH 8/8] fix regex for url --- yt_dlp/extractor/cablecasttv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/cablecasttv.py b/yt_dlp/extractor/cablecasttv.py index 6c51c1014..a8ca5bcdd 100644 --- a/yt_dlp/extractor/cablecasttv.py +++ b/yt_dlp/extractor/cablecasttv.py @@ -2,7 +2,7 @@ class CableCastTVIE(InfoExtractor): - _VALID_URL = r'https:\/\/[a-zA-Z0-9-]+\.cablecast\.tv\/show\/(?P\d+)' + _VALID_URL = r'https?://(?:[a-z\-\.]+\.cablecast\.tv/show/|[a-z\-\.]+/CablecastPublicSite/show/)(?P\d+)' _TESTS = [{ 'url': 'https://wctv.wilmette.com/CablecastPublicSite/show/532', 'md5': '17e7ed129582babf6d1ae5c3b9d70d18',