Merge remote-tracking branch 'upstream/master'

2024-12-01 00:52:58 +01:00 · 2013-09-03 12:22:29 -07:00 · 2013-09-03 12:22:29 -07:00 · c3dd69eab4
commit c3dd69eab4
parent 85f03346eb c8dbccde30
8 changed files with 131 additions and 26 deletions
--- a/devscripts/gh-pages/update-sites.py
+++ b/devscripts/gh-pages/update-sites.py
@ -0,0 +1,33 @@
 #!/usr/bin/env python3
 import sys
 import os
 import textwrap
 # We must be able to import youtube_dl
 sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 import youtube_dl
 def main():
    with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf:
        template = tmplf.read()
    ie_htmls = []
    for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME):
        ie_html = '<b>{}</b>'.format(ie.IE_NAME)
        try:
            ie_html += ': {}'.format(ie.IE_DESC)
        except AttributeError:
            pass
        if ie.working() == False:
            ie_html += ' (Currently broken)'
        ie_htmls.append('<li>{}</li>'.format(ie_html))
    template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t'))
    with open('supportedsites.html', 'w', encoding='utf-8') as sitesf:
        sitesf.write(template)
 if __name__ == '__main__':
    main()
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@ -85,6 +85,7 @@ ROOT=$(pwd)
    "$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem"
    "$ROOT/devscripts/gh-pages/generate-download.py"
    "$ROOT/devscripts/gh-pages/update-copyright.py"
    "$ROOT/devscripts/gh-pages/update-sites.py"
    git add *.html *.html.in update
    git commit -m "release $version"
    git show HEAD
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -89,6 +89,7 @@
 from .unistra import UnistraIE
 from .ustream import UstreamIE
 from .vbox7 import Vbox7IE
 from .veehd import VeeHDIE
 from .veoh import VeohIE
 from .vevo import VevoIE
 from .videofyme import VideofyMeIE
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@ -55,7 +55,8 @@ def _real_extract(self, url):
        embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
        embed_page = self._download_webpage(embed_url, video_id,
                                            u'Downloading embed page')
-        info = self._search_regex(r'var info = ({.*?}),', embed_page, 'video info')
+        info = self._search_regex(r'var info = ({.*?}),$', embed_page,
            'video info', flags=re.MULTILINE)
        info = json.loads(info)
        # TODO: support choosing qualities
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@ -122,7 +122,7 @@ def _real_extract(self, url):
        video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title')
        description = self._og_search_description(webpage)
        video_uploader = self._html_search_regex(
-                r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("channel","([^"]+)"\);',
+                r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
                webpage, u'uploader nickname', fatal=False)
        return {
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@ -14,19 +14,6 @@
 class ORFIE(InfoExtractor):
    _VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
    _TEST = {
        u'url': u'http://tvthek.orf.at/programs/1171769-Wetter-ZIB/episodes/6557323-Wetter',
        u'file': u'6566957.flv',
        u'info_dict': {
            u'title': u'Wetter',
            u'description': u'Christa Kummer, Marcus Wadsak und Kollegen  präsentieren abwechselnd ihre täglichen Wetterprognosen für Österreich.\r \r Mehr Wetter unter wetter.ORF.at',
        },
        u'params': {
            # It uses rtmp
            u'skip_download': True,
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        playlist_id = mobj.group('id')
--- a/youtube_dl/extractor/veehd.py
+++ b/youtube_dl/extractor/veehd.py
@ -0,0 +1,56 @@
 import re
 import json
 from .common import InfoExtractor
 from ..utils import (
    compat_urlparse,
    get_element_by_id,
    clean_html,
 )
 class VeeHDIE(InfoExtractor):
    _VALID_URL = r'https?://veehd.com/video/(?P<id>\d+)'
    _TEST = {
        u'url': u'http://veehd.com/video/4686958',
        u'file': u'4686958.mp4',
        u'info_dict': {
            u'title': u'Time Lapse View from Space ( ISS)',
            u'uploader_id': u'spotted',
            u'description': u'md5:f0094c4cf3a72e22bc4e4239ef767ad7',
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        player_path = self._search_regex(r'\$\("#playeriframe"\).attr\({src : "(.+?)"',
            webpage, u'player path')
        player_url = compat_urlparse.urljoin(url, player_path)
        player_page = self._download_webpage(player_url, video_id,
            u'Downloading player page')
        config_json = self._search_regex(r'value=\'config=({.+?})\'',
            player_page, u'config json')
        config = json.loads(config_json)
        video_url = compat_urlparse.unquote(config['clip']['url'])
        title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0])
        uploader_id = self._html_search_regex(r'<a href="/profile/\d+">(.+?)</a>',
            webpage, u'uploader')
        thumbnail = self._search_regex(r'<img id="veehdpreview" src="(.+?)"',
            webpage, u'thumbnail')
        description = self._html_search_regex(r'<td class="infodropdown".*?<div>(.*?)<ul',
            webpage, u'description', flags=re.DOTALL)
        return {
            '_type': 'video',
            'id': video_id,
            'title': title,
            'url': video_url,
            'ext': 'mp4',
            'uploader_id': uploader_id,
            'thumbnail': thumbnail,
            'description': description,
        }
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@ -44,6 +44,16 @@ class VimeoIE(InfoExtractor):
                u'title': u'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
            },
        },
        {
            u'url': u'http://player.vimeo.com/video/54469442',
            u'file': u'54469442.mp4',
            u'md5': u'619b811a4417aa4abe78dc653becf511',
            u'note': u'Videos that embed the url in the player page',
            u'info_dict': {
                u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software',
                u'uploader': u'The BLN & Business of Software',
            },
        },
    ]
    def _login(self):
@ -112,7 +122,8 @@ def _real_extract(self, url, new_video=True):
        # Extract the config JSON
        try:
-            config = webpage.split(' = {config:')[1].split(',assets:')[0]
+            config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'],
                webpage, u'info section', flags=re.DOTALL)
            config = json.loads(config)
        except:
            if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
@ -132,12 +143,22 @@ def _real_extract(self, url, new_video=True):
        video_uploader_id = config["video"]["owner"]["url"].split('/')[-1] if config["video"]["owner"]["url"] else None
        # Extract video thumbnail
-        video_thumbnail = config["video"]["thumbnail"]
+        video_thumbnail = config["video"].get("thumbnail")
        if video_thumbnail is None:
            _, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in config["video"]["thumbs"].items())[-1]
        # Extract video description
        video_description = None
        try:
            video_description = get_element_by_attribute("itemprop", "description", webpage)
            if video_description: video_description = clean_html(video_description)
-        else: video_description = u''
+        except AssertionError as err:
            # On some pages like (http://player.vimeo.com/video/54469442) the
            # html tags are not closed, python 2.6 cannot handle it
            if err.args[0] == 'we should not get here!':
                pass
            else:
                raise
        # Extract upload date
        video_upload_date = None
@ -154,14 +175,15 @@ def _real_extract(self, url, new_video=True):
        # TODO bind to format param
        codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')]
        files = { 'hd': [], 'sd': [], 'other': []}
        config_files = config["video"].get("files") or config["request"].get("files")
        for codec_name, codec_extension in codecs:
-            if codec_name in config["video"]["files"]:
+            if codec_name in config_files:
-                if 'hd' in config["video"]["files"][codec_name]:
+                if 'hd' in config_files[codec_name]:
                    files['hd'].append((codec_name, codec_extension, 'hd'))
-                elif 'sd' in config["video"]["files"][codec_name]:
+                elif 'sd' in config_files[codec_name]:
                    files['sd'].append((codec_name, codec_extension, 'sd'))
                else:
-                    files['other'].append((codec_name, codec_extension, config["video"]["files"][codec_name][0]))
+                    files['other'].append((codec_name, codec_extension, config_files[codec_name][0]))
        for quality in ('hd', 'sd', 'other'):
            if len(files[quality]) > 0:
@ -173,6 +195,10 @@ def _real_extract(self, url, new_video=True):
        else:
            raise ExtractorError(u'No known codec found')
        video_url = None
        if isinstance(config_files[video_codec], dict):
            video_url = config_files[video_codec][video_quality].get("url")
        if video_url is None:
            video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
                        %(video_id, sig, timestamp, video_quality, video_codec.upper())