Merge remote-tracking branch 'origin/master'

This commit is contained in:
Philipp Hagemeister 2014-09-22 12:53:41 +02:00
commit 45c85d7ba1
6 changed files with 73 additions and 26 deletions

View File

@ -16,6 +16,7 @@
format_bytes, format_bytes,
encodeFilename, encodeFilename,
sanitize_open, sanitize_open,
xpath_text,
) )
@ -251,6 +252,8 @@ def real_download(self, filename, info_dict):
# We only download the first fragment # We only download the first fragment
fragments_list = fragments_list[:1] fragments_list = fragments_list[:1]
total_frags = len(fragments_list) total_frags = len(fragments_list)
# For some akamai manifests we'll need to add a query to the fragment url
akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
@ -290,6 +293,8 @@ def frag_progress_hook(status):
for (seg_i, frag_i) in fragments_list: for (seg_i, frag_i) in fragments_list:
name = 'Seg%d-Frag%d' % (seg_i, frag_i) name = 'Seg%d-Frag%d' % (seg_i, frag_i)
url = base_url + name url = base_url + name
if akamai_pv:
url += '?' + akamai_pv.strip(';')
frag_filename = '%s-%s' % (tmpfilename, name) frag_filename = '%s-%s' % (tmpfilename, name)
success = http_dl.download(frag_filename, {'url': url}) success = http_dl.download(frag_filename, {'url': url})
if not success: if not success:

View File

@ -249,7 +249,10 @@
from .novamov import NovaMovIE from .novamov import NovaMovIE
from .nowness import NownessIE from .nowness import NownessIE
from .nowvideo import NowVideoIE from .nowvideo import NowVideoIE
from .npo import NPOIE from .npo import (
NPOIE,
TegenlichtVproIE,
)
from .nrk import ( from .nrk import (
NRKIE, NRKIE,
NRKTVIE, NRKTVIE,

View File

@ -16,9 +16,9 @@ class NBCIE(InfoExtractor):
_TEST = { _TEST = {
'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188', 'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
'md5': '54d0fbc33e0b853a65d7b4de5c06d64e', # md5 checksum is not stable
'info_dict': { 'info_dict': {
'id': 'u1RInQZRN7QJ', 'id': 'bTmnLCvIbaaH',
'ext': 'flv', 'ext': 'flv',
'title': 'I Am a Firefighter', 'title': 'I Am a Firefighter',
'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.', 'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',

View File

@ -7,6 +7,7 @@
unified_strdate, unified_strdate,
parse_duration, parse_duration,
qualities, qualities,
url_basename,
) )
@ -55,7 +56,9 @@ class NPOIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
return self._get_info(video_id)
def _get_info(self, video_id):
metadata = self._download_json( metadata = self._download_json(
'http://e.omroep.nl/metadata/aflevering/%s' % video_id, 'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
video_id, video_id,
@ -106,3 +109,30 @@ def _real_extract(self, url):
'duration': parse_duration(metadata.get('tijdsduur')), 'duration': parse_duration(metadata.get('tijdsduur')),
'formats': formats, 'formats': formats,
} }
class TegenlichtVproIE(NPOIE):
IE_NAME = 'tegenlicht.vpro.nl'
_VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?'
_TESTS = [
{
'url': 'http://tegenlicht.vpro.nl/afleveringen/2012-2013/de-toekomst-komt-uit-afrika.html',
'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
'info_dict': {
'id': 'VPWON_1169289',
'ext': 'm4v',
'title': 'Tegenlicht',
'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1',
'upload_date': '20130225',
},
},
]
def _real_extract(self, url):
name = url_basename(url)
webpage = self._download_webpage(url, name)
urn = self._html_search_meta('mediaurn', webpage)
info_page = self._download_json(
'http://rs.vpro.nl/v2/api/media/%s.json' % urn, name)
return self._get_info(info_page['mid'])

View File

@ -21,7 +21,7 @@ class SBSIE(InfoExtractor):
'md5': '3150cf278965eeabb5b4cea1c963fe0a', 'md5': '3150cf278965eeabb5b4cea1c963fe0a',
'info_dict': { 'info_dict': {
'id': '320403011771', 'id': '320403011771',
'ext': 'flv', 'ext': 'mp4',
'title': 'Dingo Conservation', 'title': 'Dingo Conservation',
'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction', 'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction',
'thumbnail': 're:http://.*\.jpg', 'thumbnail': 're:http://.*\.jpg',

View File

@ -5,6 +5,7 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_str,
ExtractorError, ExtractorError,
xpath_with_ns, xpath_with_ns,
) )
@ -55,21 +56,19 @@ def _get_info(self, video_id, smil_url):
body = meta.find(_x('smil:body')) body = meta.find(_x('smil:body'))
f4m_node = body.find(_x('smil:seq//smil:video')) f4m_node = body.find(_x('smil:seq//smil:video'))
if f4m_node is not None: if f4m_node is not None and '.f4m' in f4m_node.attrib['src']:
f4m_url = f4m_node.attrib['src'] f4m_url = f4m_node.attrib['src']
if 'manifest.f4m?' not in f4m_url: if 'manifest.f4m?' not in f4m_url:
f4m_url += '?' f4m_url += '?'
# the parameters are from syfy.com, other sites may use others, # the parameters are from syfy.com, other sites may use others,
# they also work for nbc.com # they also work for nbc.com
f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3' f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3'
formats = [{ formats = self._extract_f4m_formats(f4m_url, video_id)
'ext': 'flv',
'url': f4m_url,
}]
else: else:
base_url = head.find(_x('smil:meta')).attrib['base']
switch = body.find(_x('smil:switch'))
formats = [] formats = []
switch = body.find(_x('smil:switch'))
if switch is not None:
base_url = head.find(_x('smil:meta')).attrib['base']
for f in switch.findall(_x('smil:video')): for f in switch.findall(_x('smil:video')):
attr = f.attrib attr = f.attrib
width = int(attr['width']) width = int(attr['width'])
@ -85,6 +84,16 @@ def _get_info(self, video_id, smil_url):
'height': height, 'height': height,
'vbr': vbr, 'vbr': vbr,
}) })
else:
switch = body.find(_x('smil:seq//smil:switch'))
for f in switch.findall(_x('smil:video')):
attr = f.attrib
vbr = int(attr['system-bitrate']) // 1000
formats.append({
'format_id': compat_str(vbr),
'url': attr['src'],
'vbr': vbr,
})
self._sort_formats(formats) self._sort_formats(formats)
return { return {