1
1
mirror of https://github.com/ytdl-org/youtube-dl synced 2024-11-27 15:16:57 +01:00

Merge remote-tracking branch 'origin/master'

This commit is contained in:
Philipp Hagemeister 2015-02-16 04:09:10 +01:00
commit 5bfd430f81
16 changed files with 163 additions and 48 deletions

View File

@ -110,3 +110,4 @@ Shaya Goldberg
Paul Hartmann
Frans de Jonge
Robin de Rooij
Ryan Schmidt

View File

@ -138,7 +138,7 @@ class TestDailymotionSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 5)
self.assertTrue(len(subtitles.keys()) >= 6)
def test_list_subtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
@ -247,7 +247,7 @@ class TestVimeoSubtitles(BaseTestSubtitles):
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '26399116d23ae3cf2c087cea94bc43b4')
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
@ -334,7 +334,7 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['cs']))
self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4')
self.assertTrue(len(subtitles['cs']) > 20000)
def test_nosubtitles(self):
self.DL.expect_warning('video doesn\'t have subtitles')

View File

@ -189,6 +189,7 @@ from .hellporno import HellPornoIE
from .helsinki import HelsinkiIE
from .hentaistigma import HentaiStigmaIE
from .historicfilms import HistoricFilmsIE
from .history import HistoryIE
from .hitbox import HitboxIE, HitboxLiveIE
from .hornbunny import HornBunnyIE
from .hostingbulk import HostingBulkIE

View File

@ -50,7 +50,7 @@ class BambuserIE(InfoExtractor):
'duration': int(info['length']),
'view_count': int(info['views_total']),
'uploader': info['username'],
'uploader_id': info['uid'],
'uploader_id': info['owner']['uid'],
}

View File

@ -273,7 +273,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
formats, subtitles = self._download_media_selector(programme_id)
return programme_id, title, description, duration, formats, subtitles
except ExtractorError as ee:
if not isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
raise
# fallback to legacy playlist

View File

@ -9,7 +9,7 @@ class BeegIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
_TEST = {
'url': 'http://beeg.com/5416503',
'md5': '634526ae978711f6b748fe0dd6c11f57',
'md5': '1bff67111adb785c51d1b42959ec10e5',
'info_dict': {
'id': '5416503',
'ext': 'mp4',

View File

@ -16,7 +16,7 @@ from ..utils import (
class CamdemyIE(InfoExtractor):
_VALID_URL = r'http://www.camdemy.com/media/(?P<id>\d+)'
_VALID_URL = r'http://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
_TESTS = [{
# single file
'url': 'http://www.camdemy.com/media/5181/',

View File

@ -665,7 +665,7 @@ class InfoExtractor(object):
return RATING_TABLE.get(rating.lower(), None)
def _family_friendly_search(self, html):
# See http://schema.org/VideoObj
# See http://schema.org/VideoObject
family_friendly = self._html_search_meta('isFamilyFriendly', html)
if not family_friendly:

View File

@ -15,7 +15,7 @@ class DrTuberIE(InfoExtractor):
'id': '1740434',
'display_id': 'hot-perky-blonde-naked-golf',
'ext': 'mp4',
'title': 'Hot Perky Blonde Naked Golf',
'title': 'hot perky blonde naked golf',
'like_count': int,
'dislike_count': int,
'comment_count': int,
@ -36,7 +36,8 @@ class DrTuberIE(InfoExtractor):
r'<source src="([^"]+)"', webpage, 'video URL')
title = self._html_search_regex(
r'<title>([^<]+)\s*-\s*Free', webpage, 'title')
[r'class="hd_title" style="[^"]+">([^<]+)</h1>', r'<title>([^<]+) - \d+'],
webpage, 'title')
thumbnail = self._html_search_regex(
r'poster="([^"]+)"',

View File

@ -1,52 +1,71 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import int_or_none
class FirstTVIE(InfoExtractor):
IE_NAME = 'firsttv'
IE_DESC = 'Видеоархив - Первый канал'
_VALID_URL = r'http://(?:www\.)?1tv\.ru/videoarchive/(?P<id>\d+)'
IE_NAME = '1tv'
IE_DESC = 'Первый канал'
_VALID_URL = r'http://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)'
_TEST = {
_TESTS = [{
'url': 'http://www.1tv.ru/videoarchive/73390',
'md5': '3de6390cf0cca4a5eae1d1d83895e5ad',
'md5': '777f525feeec4806130f4f764bc18a4f',
'info_dict': {
'id': '73390',
'ext': 'mp4',
'title': 'Олимпийские канатные дороги',
'description': 'md5:cc730d2bf4215463e37fff6a1e277b13',
'thumbnail': 'http://img1.1tv.ru/imgsize640x360/PR20140210114657.JPG',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
'duration': 149,
'like_count': int,
'dislike_count': int,
},
'skip': 'Only works from Russia',
}
}, {
'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930',
'md5': 'a1b6b60d530ebcf8daacf4565762bbaf',
'info_dict': {
'id': '35930',
'ext': 'mp4',
'title': 'Наедине со всеми. Людмила Сенчина',
'description': 'md5:89553aed1d641416001fe8d450f06cb9',
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
'duration': 2694,
},
'skip': 'Only works from Russia',
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id, 'Downloading page')
video_url = self._html_search_regex(
r'''(?s)jwplayer\('flashvideoportal_1'\)\.setup\({.*?'file': '([^']+)'.*?}\);''', webpage, 'video URL')
r'''(?s)(?:jwplayer\('flashvideoportal_1'\)\.setup\({|var\s+playlistObj\s*=).*?'file'\s*:\s*'([^']+)'.*?}\);''',
webpage, 'video URL')
title = self._html_search_regex(
r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', webpage, 'title')
[r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
r"'title'\s*:\s*'([^']+)'"], webpage, 'title')
description = self._html_search_regex(
r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>', webpage, 'description', fatal=False)
r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>',
webpage, 'description', default=None) or self._html_search_meta(
'description', webpage, 'description')
thumbnail = self._og_search_thumbnail(webpage)
duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False)
duration = self._og_search_property(
'video:duration', webpage,
'video duration', fatal=False)
like_count = self._html_search_regex(r'title="Понравилось".*?/></label> \[(\d+)\]',
webpage, 'like count', fatal=False)
dislike_count = self._html_search_regex(r'title="Не понравилось".*?/></label> \[(\d+)\]',
webpage, 'dislike count', fatal=False)
like_count = self._html_search_regex(
r'title="Понравилось".*?/></label> \[(\d+)\]',
webpage, 'like count', default=None)
dislike_count = self._html_search_regex(
r'title="Не понравилось".*?/></label> \[(\d+)\]',
webpage, 'dislike count', default=None)
return {
'id': video_id,

View File

@ -0,0 +1,31 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import smuggle_url
class HistoryIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?history\.com/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
_TESTS = [{
'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
'md5': '6fe632d033c92aa10b8d4a9be047a7c5',
'info_dict': {
'id': 'bLx5Dv5Aka1G',
'ext': 'mp4',
'title': "Bet You Didn't Know: Valentine's Day",
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
},
'add_ie': ['ThePlatform'],
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
webpage, 'video url')
return self.url_result(smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}}))

View File

@ -1,7 +1,6 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..compat import (
@ -52,9 +51,9 @@ class NBCIE(InfoExtractor):
class NBCNewsIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://www\.nbcnews\.com/
((video/.+?/(?P<id>\d+))|
(feature/[^/]+/(?P<title>.+)))
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
(?:video/.+?/(?P<id>\d+)|
(?:feature|nightly-news)/[^/]+/(?P<title>.+))
'''
_TESTS = [
@ -89,6 +88,16 @@ class NBCNewsIE(InfoExtractor):
'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
},
},
{
'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
'md5': 'b5dda8cddd8650baa0dcb616dd2cf60d',
'info_dict': {
'id': 'sekXqyTVnmN3',
'ext': 'mp4',
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
},
},
]
def _real_extract(self, url):
@ -107,13 +116,13 @@ class NBCNewsIE(InfoExtractor):
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
}
else:
# "feature" pages use theplatform.com
# "feature" and "nightly-news" pages use theplatform.com
title = mobj.group('title')
webpage = self._download_webpage(url, title)
bootstrap_json = self._search_regex(
r'var bootstrapJson = ({.+})\s*$', webpage, 'bootstrap json',
flags=re.MULTILINE)
bootstrap = json.loads(bootstrap_json)
r'var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$',
webpage, 'bootstrap json', flags=re.MULTILINE)
bootstrap = self._parse_json(bootstrap_json, video_id)
info = bootstrap['results'][0]['video']
mpxid = info['mpxId']

View File

@ -1,14 +1,30 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import hashlib
import time
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
int_or_none,
)
def _get_api_key(api_path):
if api_path.endswith('?'):
api_path = api_path[:-1]
api_key = 'fb5f58a820353bd7095de526253c14fd'
a = '{0:}{1:}{2:}'.format(api_key, api_path, int(round(time.time() / 24 / 3600)))
return hashlib.md5(a.encode('ascii')).hexdigest()
class StreamCZIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<id>[0-9]+)'
_API_URL = 'http://www.stream.cz/API'
_TESTS = [{
'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
@ -36,8 +52,11 @@ class StreamCZIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
'http://www.stream.cz/API/episode/%s' % video_id, video_id)
api_path = '/episode/%s' % video_id
req = compat_urllib_request.Request(self._API_URL + api_path)
req.add_header('Api-Password', _get_api_key(api_path))
data = self._download_json(req, video_id)
formats = []
for quality, video in enumerate(data['video_qualities']):

View File

@ -52,7 +52,7 @@ class SunPornoIE(InfoExtractor):
formats = []
quality = qualities(['mp4', 'flv'])
for video_url in re.findall(r'<source src="([^"]+)"', webpage):
for video_url in re.findall(r'<(?:source|video) src="([^"]+)"', webpage):
video_ext = determine_ext(video_url)
formats.append({
'url': video_url,

View File

@ -2,6 +2,11 @@ from __future__ import unicode_literals
import re
import json
import time
import hmac
import binascii
import hashlib
from .subtitles import SubtitlesInfoExtractor
from ..compat import (
@ -11,6 +16,7 @@ from ..utils import (
determine_ext,
ExtractorError,
xpath_with_ns,
unsmuggle_url,
)
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
@ -18,7 +24,7 @@ _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language
class ThePlatformIE(SubtitlesInfoExtractor):
_VALID_URL = r'''(?x)
(?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
|theplatform:)(?P<id>[^/\?&]+)'''
@ -38,9 +44,33 @@ class ThePlatformIE(SubtitlesInfoExtractor):
},
}
@staticmethod
def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
flags = '10' if include_qs else '00'
expiration_date = '%x' % (int(time.time()) + life)
def str_to_hex(str):
return binascii.b2a_hex(str.encode('ascii')).decode('ascii')
def hex_to_str(hex):
return binascii.a2b_hex(hex)
relative_path = url.split('http://link.theplatform.com/s/')[1].split('?')[0]
clear_text = hex_to_str(flags + expiration_date + str_to_hex(relative_path))
checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
return '%s&sig=%s' % (url, sig)
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
mobj = re.match(self._VALID_URL, url)
provider_id = mobj.group('provider_id')
video_id = mobj.group('id')
if not provider_id:
provider_id = 'dJ5BDC'
if mobj.group('config'):
config_url = url + '&form=json'
config_url = config_url.replace('swf/', 'config/')
@ -48,8 +78,12 @@ class ThePlatformIE(SubtitlesInfoExtractor):
config = self._download_json(config_url, video_id, 'Downloading config')
smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
else:
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
'format=smil&mbr=true'.format(video_id))
smil_url = ('http://link.theplatform.com/s/{0}/{1}/meta.smil?'
'format=smil&mbr=true'.format(provider_id, video_id))
sig = smuggled_data.get('sig')
if sig:
smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])
meta = self._download_xml(smil_url, video_id)
try:
@ -62,7 +96,7 @@ class ThePlatformIE(SubtitlesInfoExtractor):
else:
raise ExtractorError(error_msg, expected=True)
info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id)
info_url = 'http://link.theplatform.com/s/{0}/{1}?format=preview'.format(provider_id, video_id)
info_json = self._download_webpage(info_url, video_id)
info = json.loads(info_json)

View File

@ -138,7 +138,7 @@ class FFmpegPostProcessor(PostProcessor):
if self._downloader.params.get('verbose', False):
self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd))
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
stdout, stderr = p.communicate()
if p.returncode != 0:
stderr = stderr.decode('utf-8', 'replace')
@ -178,8 +178,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
encodeArgument('-show_streams'),
encodeFilename(self._ffmpeg_filename_argument(path), True)]
if self._downloader.params.get('verbose', False):
self._downloader.to_screen('[debug] ffprobe command line: %s' % shell_quote(cmd))
handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd)))
handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE)
output = handle.communicate()[0]
if handle.wait() != 0:
return None