Merge branch 'bleacherreport' of github.com:remitamine/youtube-dl into remitamine-bleacherreport

This commit is contained in:
remitamine 2015-12-21 11:18:32 +01:00
commit 2c28da8e05
5 changed files with 218 additions and 118 deletions

View File

@ -61,6 +61,10 @@
from .bet import BetIE
from .bild import BildIE
from .bilibili import BiliBiliIE
from .bleacherreport import (
BleacherReportIE,
BleacherReportCMSIE,
)
from .blinkx import BlinkxIE
from .bloomberg import BloombergIE
from .bpb import BpbIE

View File

@ -0,0 +1,84 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
)
class AMPIE(InfoExtractor):
# parse Akamai Adaptive Media Player feed
def _extract_feed_info(self, url):
item = self._download_json(
url, None, 'Downloading Akamai AMP feed',
'Unable to download Akamai AMP feed')['channel']['item']
video_id = item['guid']
def get_media_node(name, default=None):
media_name = 'media-%s' % name
media_group = item.get('media-group') or item
return media_group.get(media_name) or item.get(media_name) or item.get(name, default)
thumbnails = []
media_thumbnail = get_media_node('thumbnail')
if media_thumbnail:
if isinstance(media_thumbnail, dict):
media_thumbnail = [media_thumbnail]
for thumbnail_data in media_thumbnail:
thumbnail = thumbnail_data['@attributes']
thumbnails.append({
'url': self._proto_relative_url(thumbnail['url'], 'http:'),
'width': int_or_none(thumbnail.get('width')),
'height': int_or_none(thumbnail.get('height')),
})
subtitles = {}
media_subtitle = get_media_node('subTitle')
if media_subtitle:
if isinstance(media_subtitle, dict):
media_subtitle = [media_subtitle]
for subtitle_data in media_subtitle:
subtitle = subtitle_data['@attributes']
lang = subtitle.get('lang') or 'en'
subtitles[lang] = [{'url': subtitle['href']}]
formats = []
media_content = get_media_node('content')
if isinstance(media_content, dict):
media_content = [media_content]
for media_data in media_content:
media = media_data['@attributes']
media_type = media['type']
if media_type == 'video/f4m':
f4m_formats = self._extract_f4m_formats(
media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
video_id, f4m_id='hds', fatal=False)
if f4m_formats:
formats.extend(f4m_formats)
elif media_type == 'application/x-mpegURL':
m3u8_formats = self._extract_m3u8_formats(
media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
else:
formats.append({
'format_id': media_data['media-category']['@attributes']['label'],
'url': media['url'],
'tbr': int_or_none(media.get('bitrate')),
'filesize': int_or_none(media.get('fileSize')),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': get_media_node('title'),
'description': get_media_node('description'),
'thumbnails': thumbnails,
'timestamp': parse_iso8601(item.get('pubDate'), ' '),
'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')),
'formats': formats,
}

View File

@ -0,0 +1,106 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .amp import AMPIE
from ..utils import (
ExtractorError,
int_or_none,
parse_iso8601,
)
class BleacherReportIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/articles/(?P<id>\d+)'
_TESTS = [{
'url': 'http://bleacherreport.com/articles/2496438-fsu-stat-projections-is-jalen-ramsey-best-defensive-player-in-college-football',
'md5': 'a3ffc3dc73afdbc2010f02d98f990f20',
'info_dict': {
'id': '2496438',
'ext': 'mp4',
'title': 'FSU Stat Projections: Is Jalen Ramsey Best Defensive Player in College Football?',
'uploader_id': 3992341,
'description': 'CFB, ACC, Florida State',
'timestamp': 1434380212,
'upload_date': '20150615',
'uploader': 'Team Stream Now ',
},
'add_ie': ['Ooyala'],
}, {
'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo',
'md5': 'af5f90dc9c7ba1c19d0a3eac806bbf50',
'info_dict': {
'id': '2586817',
'ext': 'mp4',
'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
'timestamp': 1446839961,
'uploader': 'Sean Fay',
'description': 'md5:825e94e0f3521df52fa83b2ed198fa20',
'uploader_id': 6466954,
'upload_date': '20151011',
},
'add_ie': ['Youtube'],
}]
def _real_extract(self, url):
article_id = self._match_id(url)
article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article']
thumbnails = []
primary_photo = article_data.get('primaryPhoto')
if primary_photo:
thumbnails = [{
'url': primary_photo['url'],
'width': primary_photo.get('width'),
'height': primary_photo.get('height'),
}]
info = {
'_type': 'url_transparent',
'id': article_id,
'title': article_data['title'],
'uploader': article_data.get('author', {}).get('name'),
'uploader_id': article_data.get('authorId'),
'timestamp': parse_iso8601(article_data.get('createdAt')),
'thumbnails': thumbnails,
'comment_count': int_or_none(article_data.get('commentsCount')),
'view_count': int_or_none(article_data.get('hitCount')),
}
video = article_data.get('video')
if video:
video_type = video['type']
if video_type == 'cms.bleacherreport.com':
info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
elif video_type == 'ooyala.com':
info['url'] = 'ooyala:%s' % video['id']
elif video_type == 'youtube.com':
info['url'] = video['id']
elif video_type == 'vine.co':
info['url'] = 'https://vine.co/v/%s' % video['id']
else:
info['url'] = video_type + video['id']
return info
else:
raise ExtractorError('no video in the article', expected=True)
class BleacherReportCMSIE(AMPIE):
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})'
_TESTS = [{
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
'md5': 'f0ca220af012d4df857b54f792c586bb',
'info_dict': {
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
'ext': 'flv',
'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
info = self._extract_feed_info('http://cms.bleacherreport.com/media/items/%s/akamai.json' % video_id)
info['id'] = video_id
return info

View File

@ -3,7 +3,7 @@
import itertools
from .common import InfoExtractor
from .amp import AMPIE
from ..compat import (
compat_HTTPError,
compat_urllib_parse,
@ -12,14 +12,11 @@
from ..utils import (
ExtractorError,
clean_html,
determine_ext,
int_or_none,
parse_iso8601,
sanitized_Request,
)
class DramaFeverBaseIE(InfoExtractor):
class DramaFeverBaseIE(AMPIE):
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
_NETRC_MACHINE = 'dramafever'
@ -80,60 +77,25 @@ class DramaFeverIE(DramaFeverBaseIE):
'timestamp': 1404336058,
'upload_date': '20140702',
'duration': 343,
}
},
'params': {
# m3u8 download
'skip_download': True,
},
}
def _real_extract(self, url):
video_id = self._match_id(url).replace('/', '.')
try:
feed = self._download_json(
'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id,
video_id, 'Downloading episode JSON')['channel']['item']
info = self._extract_feed_info(
'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError):
raise ExtractorError(
'Currently unavailable in your country.', expected=True)
raise
media_group = feed.get('media-group', {})
formats = []
for media_content in media_group['media-content']:
src = media_content.get('@attributes', {}).get('url')
if not src:
continue
ext = determine_ext(src)
if ext == 'f4m':
formats.extend(self._extract_f4m_formats(
src, video_id, f4m_id='hds'))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
src, video_id, 'mp4', m3u8_id='hls'))
else:
formats.append({
'url': src,
})
self._sort_formats(formats)
title = media_group.get('media-title')
description = media_group.get('media-description')
duration = int_or_none(media_group['media-content'][0].get('@attributes', {}).get('duration'))
thumbnail = self._proto_relative_url(
media_group.get('media-thumbnail', {}).get('@attributes', {}).get('url'))
timestamp = parse_iso8601(feed.get('pubDate'), ' ')
subtitles = {}
for media_subtitle in media_group.get('media-subTitle', []):
lang = media_subtitle.get('@attributes', {}).get('lang')
href = media_subtitle.get('@attributes', {}).get('href')
if not lang or not href:
continue
subtitles[lang] = [{
'ext': 'ttml',
'url': href,
}]
series_id, episode_number = video_id.split('.')
episode_info = self._download_json(
# We only need a single episode info, so restricting page size to one episode
@ -146,21 +108,12 @@ def _real_extract(self, url):
if value:
subfile = value[0].get('subfile') or value[0].get('new_subfile')
if subfile and subfile != 'http://www.dramafever.com/st/':
subtitles.setdefault('English', []).append({
info['subtitiles'].setdefault('English', []).append({
'ext': 'srt',
'url': subfile,
})
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'formats': formats,
'subtitles': subtitles,
}
return info
class DramaFeverSeriesIE(DramaFeverBaseIE):

View File

@ -2,14 +2,10 @@
import re
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
int_or_none,
)
from .amp import AMPIE
class FoxNewsIE(InfoExtractor):
class FoxNewsIE(AMPIE):
IE_DESC = 'Fox News and Fox Business Video'
_VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
_TESTS = [
@ -20,10 +16,10 @@ class FoxNewsIE(InfoExtractor):
'id': '3937480',
'ext': 'flv',
'title': 'Frozen in Time',
'description': 'Doctors baffled by 16-year-old girl that is the size of a toddler',
'description': '16-year-old girl is size of toddler',
'duration': 265,
'timestamp': 1304411491,
'upload_date': '20110503',
# 'timestamp': 1304411491,
# 'upload_date': '20110503',
'thumbnail': 're:^https?://.*\.jpg$',
},
},
@ -34,10 +30,10 @@ class FoxNewsIE(InfoExtractor):
'id': '3922535568001',
'ext': 'mp4',
'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal",
'description': "Congressman discusses the president's executive action",
'description': "Congressman discusses president's plan",
'duration': 292,
'timestamp': 1417662047,
'upload_date': '20141204',
# 'timestamp': 1417662047,
# 'upload_date': '20141204',
'thumbnail': 're:^https?://.*\.jpg$',
},
},
@ -52,52 +48,9 @@ class FoxNewsIE(InfoExtractor):
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
host = mobj.group('host')
host, video_id = re.match(self._VALID_URL, url).groups()
video = self._download_json(
'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id), video_id)
item = video['channel']['item']
title = item['title']
description = item['description']
timestamp = parse_iso8601(item['dc-date'])
media_group = item['media-group']
duration = None
formats = []
for media in media_group['media-content']:
attributes = media['@attributes']
video_url = attributes['url']
if video_url.endswith('.f4m'):
formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id))
elif video_url.endswith('.m3u8'):
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'flv'))
elif not video_url.endswith('.smil'):
duration = int_or_none(attributes.get('duration'))
formats.append({
'url': video_url,
'format_id': media['media-category']['@attributes']['label'],
'preference': 1,
'vbr': int_or_none(attributes.get('bitrate')),
'filesize': int_or_none(attributes.get('fileSize'))
})
self._sort_formats(formats)
media_thumbnail = media_group['media-thumbnail']['@attributes']
thumbnails = [{
'url': media_thumbnail['url'],
'width': int_or_none(media_thumbnail.get('width')),
'height': int_or_none(media_thumbnail.get('height')),
}] if media_thumbnail else []
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'timestamp': timestamp,
'formats': formats,
'thumbnails': thumbnails,
}
info = self._extract_feed_info(
'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id))
info['id'] = video_id
return info