From 0eaec13ba6abe18d6ddf35f2ebffdcaf3937e485 Mon Sep 17 00:00:00 2001 From: Aleri Kaisattera <73682764+alerikaisattera@users.noreply.github.com> Date: Sat, 2 Oct 2021 00:45:15 +0600 Subject: [PATCH] [Theta] Add video extractor (#1137) Authored by: alerikaisattera --- yt_dlp/extractor/extractors.py | 5 ++++- yt_dlp/extractor/theta.py | 40 ++++++++++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 1776a4d26..8e8d269ce 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1433,7 +1433,10 @@ from .thescene import TheSceneIE from .thestar import TheStarIE from .thesun import TheSunIE -from .theta import ThetaIE +from .theta import ( + ThetaVideoIE, + ThetaStreamIE, +) from .theweatherchannel import TheWeatherChannelIE from .thisamericanlife import ThisAmericanLifeIE from .thisav import ThisAVIE diff --git a/yt_dlp/extractor/theta.py b/yt_dlp/extractor/theta.py index 34c0da815..3b6543629 100644 --- a/yt_dlp/extractor/theta.py +++ b/yt_dlp/extractor/theta.py @@ -5,8 +5,8 @@ from ..utils import try_get -class ThetaIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?theta\.tv/(?P[a-z0-9]+)' +class ThetaStreamIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?theta\.tv/(?!video/)(?P[a-z0-9]+)' _TESTS = [{ 'url': 'https://www.theta.tv/davirus', 'skip': 'The live may have ended', @@ -49,3 +49,39 @@ def _real_extract(self, url): 'formats': formats, 'thumbnail': try_get(info, lambda x: x['live_stream']['thumbnail_url']), } + + +class ThetaVideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?theta\.tv/video/(?Pvid[a-z0-9]+)' + _TEST = { + 'url': 'https://www.theta.tv/video/vidiq6aaet3kzf799p0', + 'md5': '633d8c29eb276bb38a111dbd591c677f', + 'info_dict': { + 'id': 'vidiq6aaet3kzf799p0', + 'ext': 'mp4', + 'title': 'Theta EdgeCast Tutorial', + 'uploader': 'Pixiekittie', + 'description': 'md5:e316253f5bdced8b5a46bb50ae60a09f', + 'thumbnail': r're:https://user-prod-theta-tv\.imgix\.net/.+/vod_thumb/.+.jpg', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + info = self._download_json(f'https://api.theta.tv/v1/video/{video_id}/raw', video_id)['body'] + + m3u8_playlist = try_get(info, lambda x: x['video_urls'][0]['url']) + + formats = self._extract_m3u8_formats(m3u8_playlist, video_id, 'mp4', m3u8_id='hls') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': info.get('title'), + 'uploader': try_get(info, lambda x: x['user']['username']), + 'description': info.get('description'), + 'view_count': info.get('view_count'), + 'like_count': info.get('like_count'), + 'formats': formats, + 'thumbnail': info.get('thumbnail_url'), + }