[crackle] Add new extractor

This commit is contained in:
remitamine 2016-02-10 22:16:21 +01:00
parent f817d9bec1
commit 80f772c28a
3 changed files with 102 additions and 7 deletions

View File

@ -126,6 +126,7 @@
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
from .condenast import CondeNastIE from .condenast import CondeNastIE
from .cracked import CrackedIE from .cracked import CrackedIE
from .crackle import CrackleIE
from .criterion import CriterionIE from .criterion import CriterionIE
from .crooksandliars import CrooksAndLiarsIE from .crooksandliars import CrooksAndLiarsIE
from .crunchyroll import ( from .crunchyroll import (

View File

@ -2,6 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
parse_duration, parse_duration,
@ -14,14 +15,13 @@ class ComCarCoffIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/', 'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
'info_dict': { 'info_dict': {
'id': 'miranda-sings-happy-thanksgiving-miranda', 'id': '2494164',
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20141127', 'upload_date': '20141127',
'timestamp': 1417107600, 'timestamp': 1417107600,
'duration': 1232, 'duration': 1232,
'title': 'Happy Thanksgiving Miranda', 'title': 'Happy Thanksgiving Miranda',
'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.', 'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg',
}, },
'params': { 'params': {
'skip_download': 'requires ffmpeg', 'skip_download': 'requires ffmpeg',
@ -39,15 +39,14 @@ def _real_extract(self, url):
r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'), r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'),
display_id)['videoData'] display_id)['videoData']
video_id = full_data['activeVideo']['video'] display_id = full_data['activeVideo']['video']
video_data = full_data.get('videos', {}).get(video_id) or full_data['singleshots'][video_id] video_data = full_data.get('videos', {}).get(display_id) or full_data['singleshots'][display_id]
video_id = compat_str(video_data['mediaId'])
thumbnails = [{ thumbnails = [{
'url': video_data['images']['thumb'], 'url': video_data['images']['thumb'],
}, { }, {
'url': video_data['images']['poster'], 'url': video_data['images']['poster'],
}] }]
formats = self._extract_m3u8_formats(
video_data['mediaUrl'], video_id, ext='mp4')
timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601( timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601(
video_data.get('pubDate')) video_data.get('pubDate'))
@ -55,6 +54,8 @@ def _real_extract(self, url):
video_data.get('duration')) video_data.get('duration'))
return { return {
'_type': 'url_transparent',
'url': 'crackle:%s' % video_id,
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': video_data['title'], 'title': video_data['title'],
@ -62,6 +63,7 @@ def _real_extract(self, url):
'timestamp': timestamp, 'timestamp': timestamp,
'duration': duration, 'duration': duration,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'formats': formats, 'season_number': int_or_none(video_data.get('season')),
'episode_number': int_or_none(video_data.get('episode')),
'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))), 'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),
} }

View File

@ -0,0 +1,92 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import int_or_none
class CrackleIE(InfoExtractor):
_VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
_TEST = {
'url': 'http://www.crackle.com/the-art-of-more/2496419',
'info_dict': {
'id': '2496419',
'ext': 'mp4',
'title': 'Heavy Lies the Head',
'description': 'md5:bb56aa0708fe7b9a4861535f15c3abca',
},
'params': {
# m3u8 download
'skip_download': True,
}
}
# extracted from http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx
_SUBTITLE_SERVER = 'http://web-us-az.crackle.com'
_UPLYNK_OWNER_ID = 'e8773f7770a44dbd886eee4fca16a66b'
_THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614'
# extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx
_MEDIA_FILE_SLOTS = {
'c544.flv': {
'width': 544,
'height': 306,
},
'360p.mp4': {
'width': 640,
'height': 360,
},
'480p.mp4': {
'width': 852,
'height': 478,
},
'480p_1mbps.mp4': {
'width': 852,
'height': 478,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
item = self._download_xml(
'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id, video_id).find('i')
title = item.attrib['t']
thumbnail = None
subtitles = {}
formats = self._extract_m3u8_formats('http://content.uplynk.com/ext/%s/%s.m3u8' % (self._UPLYNK_OWNER_ID, video_id), video_id, 'mp4', fatal=None)
path = item.attrib.get('p')
if path:
thumbnail = self._THUMBNAIL_TEMPLATE % path
http_base_url = 'http://ahttp.crackle.com/' + path
for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items():
formats.append({
'url': http_base_url + mfs_path,
'format_id': mfs_path.split('.')[0],
'width': mfs_info['width'],
'height': mfs_info['height'],
})
for cc in item.findall('cc'):
locale = cc.attrib.get('l')
v = cc.attrib.get('v')
if locale and v:
if locale not in subtitles:
subtitles[locale] = []
subtitles[locale] = [{
'url': '%s/%s%s_%s.xml' % (self._SUBTITLE_SERVER, path, locale, v),
'ext': 'ttml',
}]
self._sort_formats(formats, ('width', 'height', 'tbr'))
return {
'id': video_id,
'title': title,
'description': item.attrib.get('d'),
'duration': int(item.attrib.get('r'), 16) if item.attrib.get('r') else None,
'series': item.attrib.get('sn'),
'season_number': int_or_none(item.attrib.get('se')),
'episode_number': int_or_none(item.attrib.get('ep')),
'thumbnail': thumbnail,
'subtitles': subtitles,
'formats': formats,
}