Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Allan Zhou 2013-09-03 12:22:29 -07:00
commit c3dd69eab4
8 changed files with 131 additions and 26 deletions

View File

@ -0,0 +1,33 @@
#!/usr/bin/env python3
import sys
import os
import textwrap
# We must be able to import youtube_dl
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
import youtube_dl
def main():
with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf:
template = tmplf.read()
ie_htmls = []
for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME):
ie_html = '<b>{}</b>'.format(ie.IE_NAME)
try:
ie_html += ': {}'.format(ie.IE_DESC)
except AttributeError:
pass
if ie.working() == False:
ie_html += ' (Currently broken)'
ie_htmls.append('<li>{}</li>'.format(ie_html))
template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t'))
with open('supportedsites.html', 'w', encoding='utf-8') as sitesf:
sitesf.write(template)
if __name__ == '__main__':
main()

View File

@ -85,6 +85,7 @@ ROOT=$(pwd)
"$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem" "$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem"
"$ROOT/devscripts/gh-pages/generate-download.py" "$ROOT/devscripts/gh-pages/generate-download.py"
"$ROOT/devscripts/gh-pages/update-copyright.py" "$ROOT/devscripts/gh-pages/update-copyright.py"
"$ROOT/devscripts/gh-pages/update-sites.py"
git add *.html *.html.in update git add *.html *.html.in update
git commit -m "release $version" git commit -m "release $version"
git show HEAD git show HEAD

View File

@ -89,6 +89,7 @@
from .unistra import UnistraIE from .unistra import UnistraIE
from .ustream import UstreamIE from .ustream import UstreamIE
from .vbox7 import Vbox7IE from .vbox7 import Vbox7IE
from .veehd import VeeHDIE
from .veoh import VeohIE from .veoh import VeohIE
from .vevo import VevoIE from .vevo import VevoIE
from .videofyme import VideofyMeIE from .videofyme import VideofyMeIE

View File

@ -55,7 +55,8 @@ def _real_extract(self, url):
embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
embed_page = self._download_webpage(embed_url, video_id, embed_page = self._download_webpage(embed_url, video_id,
u'Downloading embed page') u'Downloading embed page')
info = self._search_regex(r'var info = ({.*?}),', embed_page, 'video info') info = self._search_regex(r'var info = ({.*?}),$', embed_page,
'video info', flags=re.MULTILINE)
info = json.loads(info) info = json.loads(info)
# TODO: support choosing qualities # TODO: support choosing qualities

View File

@ -122,7 +122,7 @@ def _real_extract(self, url):
video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title') video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title')
description = self._og_search_description(webpage) description = self._og_search_description(webpage)
video_uploader = self._html_search_regex( video_uploader = self._html_search_regex(
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("channel","([^"]+)"\);', r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
webpage, u'uploader nickname', fatal=False) webpage, u'uploader nickname', fatal=False)
return { return {

View File

@ -14,19 +14,6 @@
class ORFIE(InfoExtractor): class ORFIE(InfoExtractor):
_VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)' _VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
_TEST = {
u'url': u'http://tvthek.orf.at/programs/1171769-Wetter-ZIB/episodes/6557323-Wetter',
u'file': u'6566957.flv',
u'info_dict': {
u'title': u'Wetter',
u'description': u'Christa Kummer, Marcus Wadsak und Kollegen präsentieren abwechselnd ihre täglichen Wetterprognosen für Österreich.\r \r Mehr Wetter unter wetter.ORF.at',
},
u'params': {
# It uses rtmp
u'skip_download': True,
}
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id') playlist_id = mobj.group('id')

View File

@ -0,0 +1,56 @@
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_urlparse,
get_element_by_id,
clean_html,
)
class VeeHDIE(InfoExtractor):
_VALID_URL = r'https?://veehd.com/video/(?P<id>\d+)'
_TEST = {
u'url': u'http://veehd.com/video/4686958',
u'file': u'4686958.mp4',
u'info_dict': {
u'title': u'Time Lapse View from Space ( ISS)',
u'uploader_id': u'spotted',
u'description': u'md5:f0094c4cf3a72e22bc4e4239ef767ad7',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
player_path = self._search_regex(r'\$\("#playeriframe"\).attr\({src : "(.+?)"',
webpage, u'player path')
player_url = compat_urlparse.urljoin(url, player_path)
player_page = self._download_webpage(player_url, video_id,
u'Downloading player page')
config_json = self._search_regex(r'value=\'config=({.+?})\'',
player_page, u'config json')
config = json.loads(config_json)
video_url = compat_urlparse.unquote(config['clip']['url'])
title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0])
uploader_id = self._html_search_regex(r'<a href="/profile/\d+">(.+?)</a>',
webpage, u'uploader')
thumbnail = self._search_regex(r'<img id="veehdpreview" src="(.+?)"',
webpage, u'thumbnail')
description = self._html_search_regex(r'<td class="infodropdown".*?<div>(.*?)<ul',
webpage, u'description', flags=re.DOTALL)
return {
'_type': 'video',
'id': video_id,
'title': title,
'url': video_url,
'ext': 'mp4',
'uploader_id': uploader_id,
'thumbnail': thumbnail,
'description': description,
}

View File

@ -44,6 +44,16 @@ class VimeoIE(InfoExtractor):
u'title': u'Andy Allan - Putting the Carto into OpenStreetMap Cartography', u'title': u'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
}, },
}, },
{
u'url': u'http://player.vimeo.com/video/54469442',
u'file': u'54469442.mp4',
u'md5': u'619b811a4417aa4abe78dc653becf511',
u'note': u'Videos that embed the url in the player page',
u'info_dict': {
u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software',
u'uploader': u'The BLN & Business of Software',
},
},
] ]
def _login(self): def _login(self):
@ -112,7 +122,8 @@ def _real_extract(self, url, new_video=True):
# Extract the config JSON # Extract the config JSON
try: try:
config = webpage.split(' = {config:')[1].split(',assets:')[0] config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'],
webpage, u'info section', flags=re.DOTALL)
config = json.loads(config) config = json.loads(config)
except: except:
if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
@ -132,12 +143,22 @@ def _real_extract(self, url, new_video=True):
video_uploader_id = config["video"]["owner"]["url"].split('/')[-1] if config["video"]["owner"]["url"] else None video_uploader_id = config["video"]["owner"]["url"].split('/')[-1] if config["video"]["owner"]["url"] else None
# Extract video thumbnail # Extract video thumbnail
video_thumbnail = config["video"]["thumbnail"] video_thumbnail = config["video"].get("thumbnail")
if video_thumbnail is None:
_, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in config["video"]["thumbs"].items())[-1]
# Extract video description # Extract video description
video_description = get_element_by_attribute("itemprop", "description", webpage) video_description = None
if video_description: video_description = clean_html(video_description) try:
else: video_description = u'' video_description = get_element_by_attribute("itemprop", "description", webpage)
if video_description: video_description = clean_html(video_description)
except AssertionError as err:
# On some pages like (http://player.vimeo.com/video/54469442) the
# html tags are not closed, python 2.6 cannot handle it
if err.args[0] == 'we should not get here!':
pass
else:
raise
# Extract upload date # Extract upload date
video_upload_date = None video_upload_date = None
@ -154,14 +175,15 @@ def _real_extract(self, url, new_video=True):
# TODO bind to format param # TODO bind to format param
codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')] codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')]
files = { 'hd': [], 'sd': [], 'other': []} files = { 'hd': [], 'sd': [], 'other': []}
config_files = config["video"].get("files") or config["request"].get("files")
for codec_name, codec_extension in codecs: for codec_name, codec_extension in codecs:
if codec_name in config["video"]["files"]: if codec_name in config_files:
if 'hd' in config["video"]["files"][codec_name]: if 'hd' in config_files[codec_name]:
files['hd'].append((codec_name, codec_extension, 'hd')) files['hd'].append((codec_name, codec_extension, 'hd'))
elif 'sd' in config["video"]["files"][codec_name]: elif 'sd' in config_files[codec_name]:
files['sd'].append((codec_name, codec_extension, 'sd')) files['sd'].append((codec_name, codec_extension, 'sd'))
else: else:
files['other'].append((codec_name, codec_extension, config["video"]["files"][codec_name][0])) files['other'].append((codec_name, codec_extension, config_files[codec_name][0]))
for quality in ('hd', 'sd', 'other'): for quality in ('hd', 'sd', 'other'):
if len(files[quality]) > 0: if len(files[quality]) > 0:
@ -173,8 +195,12 @@ def _real_extract(self, url, new_video=True):
else: else:
raise ExtractorError(u'No known codec found') raise ExtractorError(u'No known codec found')
video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \ video_url = None
%(video_id, sig, timestamp, video_quality, video_codec.upper()) if isinstance(config_files[video_codec], dict):
video_url = config_files[video_codec][video_quality].get("url")
if video_url is None:
video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
%(video_id, sig, timestamp, video_quality, video_codec.upper())
return [{ return [{
'id': video_id, 'id': video_id,