1
1
mirror of https://github.com/ytdl-org/youtube-dl synced 2024-06-21 13:40:03 +02:00

[screenwavemedia] Simplify (#3766)

This commit is contained in:
Philipp Hagemeister 2014-12-12 02:11:58 +01:00
parent 807962f4a1
commit f17e4c9c28
3 changed files with 113 additions and 133 deletions

View File

@ -622,23 +622,17 @@ class YoutubeDL(object):
ie_result['url'], ie_key=ie_result.get('ie_key'), ie_result['url'], ie_key=ie_result.get('ie_key'),
extra_info=extra_info, download=False, process=False) extra_info=extra_info, download=False, process=False)
def make_result(embedded_info): new_result = ie_result.copy()
new_result = ie_result.copy() for f in ('_type', 'id', 'url', 'ext', 'player_url', 'formats',
for f in ('_type', 'url', 'ext', 'player_url', 'formats', 'entries', 'ie_key', 'duration',
'entries', 'ie_key', 'duration', 'subtitles', 'annotations', 'format',
'subtitles', 'annotations', 'format', 'thumbnail', 'thumbnails'):
'thumbnail', 'thumbnails'): if f in new_result:
if f in new_result: del new_result[f]
del new_result[f] if f in info:
if f in embedded_info: new_result[f] = info[f]
new_result[f] = embedded_info[f]
return new_result
new_result = make_result(info)
assert new_result.get('_type') != 'url_transparent' assert new_result.get('_type') != 'url_transparent'
if new_result.get('_type') == 'compat_list':
new_result['entries'] = [
make_result(e) for e in new_result['entries']]
return self.process_ie_result( return self.process_ie_result(
new_result, download=download, extra_info=extra_info) new_result, download=download, extra_info=extra_info)

View File

@ -335,7 +335,7 @@ from .savefrom import SaveFromIE
from .sbs import SBSIE from .sbs import SBSIE
from .scivee import SciVeeIE from .scivee import SciVeeIE
from .screencast import ScreencastIE from .screencast import ScreencastIE
from .screenwavemedia import ScreenwaveMediaIE from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE
from .servingsys import ServingSysIE from .servingsys import ServingSysIE
from .sexu import SexuIE from .sexu import SexuIE
from .sexykarma import SexyKarmaIE from .sexykarma import SexyKarmaIE

View File

@ -6,109 +6,28 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
month_by_name,
int_or_none, int_or_none,
month_by_name,
unified_strdate,
) )
class ScreenwaveMediaIE(InfoExtractor): class ScreenwaveMediaIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?' \ _VALID_URL = r'http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
r':(?P<generic>player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<video_id>.+))' \
r'|(?P<cinemassacre>(?:www\.)?cinemassacre\.com/(?P<cm_date_Y>[0-9]{4})/(?P<cm_date_m>[0-9]{2})/(?P<cm_date_d>[0-9]{2})/(?P<cm_display_id>[^?#/]+))' \
r'|(?P<teamfourstar>(?:www\.)?teamfourstar\.com/video/(?P<tfs_display_id>[a-z0-9\-]+)/?)' \
r')'
_TESTS = [ _TESTS = [{
{ 'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', 'only_matching': True,
'md5': 'fde81fbafaee331785f58cd6c0d46190', }]
'info_dict': {
'id': 'Cinemasssacre-19911',
'ext': 'mp4',
'upload_date': '20121110',
'title': '“Angry Video Game Nerd: The Movie” Trailer',
'description': 'md5:fb87405fcb42a331742a0dce2708560b',
},
},
{
'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
'md5': 'd72f10cd39eac4215048f62ab477a511',
'info_dict': {
'id': 'Cinemasssacre-521be8ef82b16',
'ext': 'mp4',
'upload_date': '20131002',
'title': 'The Mummys Hand (1940)',
},
}
]
def _cinemassacre_get_info(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('cm_display_id')
webpage = self._download_webpage(url, display_id)
video_date = mobj.group('cm_date_Y') + mobj.group('cm_date_m') + mobj.group('cm_date_d')
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', webpage)
if not mobj:
raise ExtractorError('Can\'t extract embed url and video id')
playerdata_url = mobj.group('embed_url')
video_title = self._html_search_regex(
r'<title>(?P<title>.+?)\|', webpage, 'title')
video_description = self._html_search_regex(
r'<div class="entry-content">(?P<description>.+?)</div>',
webpage, 'description', flags=re.DOTALL, fatal=False)
video_thumbnail = self._og_search_thumbnail(webpage)
return {
'title': video_title,
'description': video_description,
'upload_date': video_date,
'thumbnail': video_thumbnail,
'_embed_url': playerdata_url,
}
def _teamfourstar_get_info(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('tfs_display_id')
webpage = self._download_webpage(url, display_id)
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', webpage)
if not mobj:
raise ExtractorError('Can\'t extract embed url and video id')
playerdata_url = mobj.group('embed_url')
video_title = self._html_search_regex(
r'<div class="heroheadingtitle">(?P<title>.+?)</div>', webpage, 'title')
video_date = self._html_search_regex(
r'<div class="heroheadingdate">(?P<date>.+?)</div>', webpage, 'date')
mobj = re.match('(?P<month>[A-Z][a-z]+) (?P<day>\d+), (?P<year>\d+)', video_date)
video_date = '%04u%02u%02u' % (int(mobj.group('year')), month_by_name(mobj.group('month')), int(mobj.group('day')))
video_description = self._html_search_regex(
r'<div class="postcontent">(?P<description>.+?)</div>', webpage, 'description', flags=re.DOTALL)
video_thumbnail = self._og_search_thumbnail(webpage)
return {
'title': video_title,
'description': video_description,
'upload_date': video_date,
'thumbnail': video_thumbnail,
'_embed_url': playerdata_url,
}
def _screenwavemedia_get_info(self, url):
mobj = re.match(self._VALID_URL, url)
if not mobj:
raise ExtractorError('Can\'t extract embed url and video id')
video_id = mobj.group('video_id')
def _real_extract(self, url):
video_id = self._match_id(url)
playerdata = self._download_webpage(url, video_id, 'Downloading player webpage') playerdata = self._download_webpage(url, video_id, 'Downloading player webpage')
vidtitle = self._search_regex( vidtitle = self._search_regex(
r'\'vidtitle\'\s*:\s*"([^\']+)"', playerdata, 'vidtitle').replace('\\/', '/') r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/')
vidurl = self._search_regex( vidurl = self._search_regex(
r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/') r'\'vidurl\'\s*:\s*"([^"]+)"', playerdata, 'vidurl').replace('\\/', '/')
pageurl = self._search_regex(
r'\'pageurl\'\s*:\s*"([^\']+)"', playerdata, 'pageurl', fatal=False).replace('\\/', '/')
videolist_url = None videolist_url = None
@ -134,61 +53,128 @@ class ScreenwaveMediaIE(InfoExtractor):
file_ = src.partition(':')[-1] file_ = src.partition(':')[-1]
width = int_or_none(video.get('width')) width = int_or_none(video.get('width'))
height = int_or_none(video.get('height')) height = int_or_none(video.get('height'))
bitrate = int_or_none(video.get('system-bitrate')) bitrate = int_or_none(video.get('system-bitrate'), scale=1000)
format = { format = {
'url': baseurl + file_, 'url': baseurl + file_,
'format_id': src.rpartition('.')[0].rpartition('_')[-1], 'format_id': src.rpartition('.')[0].rpartition('_')[-1],
} }
if width or height: if width or height:
format.update({ format.update({
'tbr': bitrate // 1000 if bitrate else None, 'tbr': bitrate,
'width': width, 'width': width,
'height': height, 'height': height,
}) })
else: else:
format.update({ format.update({
'abr': bitrate // 1000 if bitrate else None, 'abr': bitrate,
'vcodec': 'none', 'vcodec': 'none',
}) })
formats.append(format) formats.append(format)
self._sort_formats(formats)
else: else:
formats = [{ formats = [{
'url': vidurl, 'url': vidurl,
}] }]
self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
'title': vidtitle, 'title': vidtitle,
'formats': formats, 'formats': formats,
'_episode_page': pageurl,
} }
class CinemassacreIE(InfoExtractor):
_VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
_TESTS = [
{
'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
'md5': 'fde81fbafaee331785f58cd6c0d46190',
'info_dict': {
'id': 'Cinemassacre-19911',
'ext': 'mp4',
'upload_date': '20121110',
'title': '“Angry Video Game Nerd: The Movie” Trailer',
'description': 'md5:fb87405fcb42a331742a0dce2708560b',
},
},
{
'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
'md5': 'd72f10cd39eac4215048f62ab477a511',
'info_dict': {
'id': 'Cinemassacre-521be8ef82b16',
'ext': 'mp4',
'upload_date': '20131002',
'title': 'The Mummys Hand (1940)',
},
}
]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('display_id')
video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d')
swm_info = None webpage = self._download_webpage(url, display_id)
site_info = None
if mobj.group('generic'): playerdata_url = self._search_regex(
swm_info = self._screenwavemedia_get_info(url) r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
url = swm_info['_episode_page'] webpage, 'player data URL')
mobj = re.match(self._VALID_URL, url) video_title = self._html_search_regex(
r'<title>(?P<title>.+?)\|', webpage, 'title')
video_description = self._html_search_regex(
r'<div class="entry-content">(?P<description>.+?)</div>',
webpage, 'description', flags=re.DOTALL, fatal=False)
video_thumbnail = self._og_search_thumbnail(webpage)
if mobj: return {
if mobj.group('cinemassacre'): '_type': 'url_transparent',
site_info = self._cinemassacre_get_info(url) 'display_id': display_id,
elif mobj.group('teamfourstar'): 'title': video_title,
site_info = self._teamfourstar_get_info(url) 'description': video_description,
'upload_date': video_date,
'thumbnail': video_thumbnail,
'url': playerdata_url,
}
if not swm_info:
if site_info:
swm_info = self._screenwavemedia_get_info(site_info['_embed_url'])
if not swm_info: class TeamFourIE(InfoExtractor):
raise ExtractorError("Failed to extract metadata for this URL") _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?'
_TEST = {
'url': 'http://teamfourstar.com/video/a-moment-with-tfs-episode-4/',
'info_dict': {
'id': 'TeamFourStar-5292a02f20bfa',
'ext': 'mp4',
'upload_date': '20130401',
'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar',
'title': 'A Moment With TFS Episode 4',
}
}
if site_info: def _real_extract(self, url):
swm_info.update(site_info) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
return swm_info playerdata_url = self._search_regex(
r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
webpage, 'player data URL')
video_title = self._html_search_regex(
r'<div class="heroheadingtitle">(?P<title>.+?)</div>',
webpage, 'title')
video_date = unified_strdate(self._html_search_regex(
r'<div class="heroheadingdate">(?P<date>.+?)</div>',
webpage, 'date', fatal=False))
video_description = self._html_search_regex(
r'(?s)<div class="postcontent">(?P<description>.+?)</div>',
webpage, 'description', fatal=False)
video_thumbnail = self._og_search_thumbnail(webpage)
return {
'_type': 'url_transparent',
'display_id': display_id,
'title': video_title,
'description': video_description,
'upload_date': video_date,
'thumbnail': video_thumbnail,
'url': playerdata_url,
}