Merge remote-tracking branch 'upstream/master'

2025-02-18 21:36:49 +01:00 · 2014-08-13 04:22:45 -07:00 · 2014-08-13 04:22:45 -07:00 · f96252b913
commit f96252b913
parent 04b89c9026 6f600ff5d6
15 changed files with 165 additions and 81 deletions
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@ -99,6 +99,7 @@ class TestAllURLsMatching(unittest.TestCase):

    def test_facebook_matching(self):
        self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
+        self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793'))

    def test_no_duplicates(self):
        ies = gen_extractors()
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -280,7 +280,7 @@ class TestUtil(unittest.TestCase):
        d = json.loads(stripped)
        self.assertEqual(d, [{"id": "532cb", "x": 3}])

-    def test_uppercase_escpae(self):
+    def test_uppercase_escape(self):
        self.assertEqual(uppercase_escape(u'aä'), u'aä')
        self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐')

--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -68,6 +68,7 @@ __authors__  = (
    'Hassaan Ali',
    'Dobrosław Żybort',
    'David Fabijan',
+    'Sebastian Haas',
 )

 __license__ = 'Public Domain'
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@ -295,7 +295,7 @@ class FileDownloader(object):

    def real_download(self, filename, info_dict):
        """Real download process. Redefine in subclasses."""
-        raise NotImplementedError(u'This method must be implemented by sublcasses')
+        raise NotImplementedError(u'This method must be implemented by subclasses')

    def _hook_progress(self, status):
        for ph in self._progress_hooks:
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -225,9 +225,12 @@ from .nrk import (
 from .ntv import NTVIE
 from .nytimes import NYTimesIE
 from .nuvid import NuvidIE
-from .oe1 import OE1IE
 from .ooyala import OoyalaIE
-from .orf import ORFIE
+from .orf import (
+    ORFTVthekIE,
+    ORFOE1IE,
+    ORFFM4IE,
+)
 from .parliamentliveuk import ParliamentLiveUKIE
 from .pbs import PBSIE
 from .photobucket import PhotobucketIE
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@ -6,6 +6,7 @@ import json
 from .common import InfoExtractor
 from ..utils import (
    compat_urlparse,
+    int_or_none,
 )


@ -110,8 +111,8 @@ class AppleTrailersIE(InfoExtractor):
                formats.append({
                    'url': format_url,
                    'format': format['type'],
-                    'width': format['width'],
-                    'height': int(format['height']),
+                    'width': int_or_none(format['width']),
+                    'height': int_or_none(format['height']),
                })

            self._sort_formats(formats)
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@ -51,6 +51,9 @@ class ARDIE(InfoExtractor):

        webpage = self._download_webpage(url, video_id)

+        if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
+            raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
+
        title = self._html_search_regex(
            [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
             r'<meta name="dcterms.title" content="(.*?)"/>',
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@ -109,15 +109,19 @@ class ArteTVPlus7IE(InfoExtractor):
            regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
            return any(re.match(r, f['versionCode']) for r in regexes)
        # Some formats may not be in the same language as the url
+        # TODO: Might want not to drop videos that does not match requested language
+        # but to process those formats with lower precedence
        formats = filter(_match_lang, all_formats)
        formats = list(formats)  # in python3 filter returns an iterator
        if not formats:
            # Some videos are only available in the 'Originalversion'
            # they aren't tagged as being in French or German
-            if all(f['versionCode'] == 'VO' or f['versionCode'] == 'VA' for f in all_formats):
+            # Sometimes there are neither videos of requested lang code
+            # nor original version videos available
+            # For such cases we just take all_formats as is
            formats = all_formats
-            else:
-                raise ExtractorError(u'The formats list is empty')
+            if not formats:
+                raise ExtractorError('The formats list is empty')

        if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
            def sort_key(f):
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@ -20,7 +20,7 @@ from ..utils import (
 class FacebookIE(InfoExtractor):
    _VALID_URL = r'''(?x)
        https?://(?:\w+\.)?facebook\.com/
-        (?:[^#?]*\#!/)?
+        (?:[^#]*?\#!/)?
        (?:video/video\.php|photo\.php|video/embed)\?(?:.*?)
        (?:v|video_id)=(?P<id>[0-9]+)
        (?:.*)'''
--- a/youtube_dl/extractor/oe1.py
+++ b/youtube_dl/extractor/oe1.py
@ -1,40 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import calendar
-import datetime
-import re
-
-from .common import InfoExtractor
-
-# audios on oe1.orf.at are only available for 7 days, so we can't
-# add tests.
-
-
-class OE1IE(InfoExtractor):
-    IE_DESC = 'oe1.orf.at'
-    _VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        show_id = mobj.group('id')
-
-        data = self._download_json(
-            'http://oe1.orf.at/programm/%s/konsole' % show_id,
-            show_id
-        )
-
-        timestamp = datetime.datetime.strptime('%s %s' % (
-            data['item']['day_label'],
-            data['item']['time']
-        ), '%d.%m.%Y %H:%M')
-        unix_timestamp = calendar.timegm(timestamp.utctimetuple())
-
-        return {
-            'id': show_id,
-            'title': data['item']['title'],
-            'url': data['item']['url_stream'],
-            'ext': 'mp3',
-            'description': data['item'].get('info'),
-            'timestamp': unix_timestamp
-        }
--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@ -3,13 +3,17 @@ import re
 import json

 from .common import InfoExtractor
-from ..utils import unescapeHTML
+from ..utils import (
+    unescapeHTML,
+    ExtractorError,
+)


 class OoyalaIE(InfoExtractor):
    _VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)'

-    _TEST = {
+    _TESTS = [
+        {
            # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
            'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
            'md5': '3f5cceb3a7bf461d6c29dc466cf8033c',
@ -19,7 +23,18 @@ class OoyalaIE(InfoExtractor):
                'title': 'Explaining Data Recovery from Hard Drives and SSDs',
                'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
            },
-    }
+        }, {
+            # Only available for ipad
+            'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
+            'md5': '4b9754921fddb68106e48c142e2a01e6',
+            'info_dict': {
+                'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
+                'ext': 'mp4',
+                'title': 'Simulation Overview - Levels of Simulation',
+                'description': '',
+            },
+        },
+    ]

    @staticmethod
    def _url_for_embed_code(embed_code):
@ -47,11 +62,28 @@ class OoyalaIE(InfoExtractor):
        player = self._download_webpage(player_url, embedCode)
        mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
                                        player, 'mobile player url')
-        mobile_player = self._download_webpage(mobile_url, embedCode)
+        # Looks like some videos are only available for particular devices
+        # (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0
+        # is only available for ipad)
+        # Working around with fetching URLs for all the devices found starting with 'unknown'
+        # until we succeed or eventually fail for each device.
+        devices = re.findall(r'device\s*=\s*"([^"]+)";', player)
+        devices.remove('unknown')
+        devices.insert(0, 'unknown')
+        for device in devices:
+            mobile_player = self._download_webpage(
+                '%s&device=%s' % (mobile_url, device), embedCode,
+                'Downloading mobile player JS for %s device' % device)
            videos_info = self._search_regex(
                r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
-            mobile_player, 'info').replace('\\"','"')
-        videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"','"')
+                mobile_player, 'info', fatal=False, default=None)
+            if videos_info:
+                break
+        if not videos_info:
+            raise ExtractorError('Unable to extract info')
+        videos_info = videos_info.replace('\\"', '"')
+        videos_more_info = self._search_regex(
+            r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"')
        videos_info = json.loads(videos_info)
        videos_more_info = json.loads(videos_more_info)

--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@ -3,6 +3,8 @@ from __future__ import unicode_literals

 import json
 import re
+import calendar
+import datetime

 from .common import InfoExtractor
 from ..utils import (
@ -12,7 +14,9 @@ from ..utils import (
 )


-class ORFIE(InfoExtractor):
+class ORFTVthekIE(InfoExtractor):
+    IE_NAME = 'orf:tvthek'
+    IE_DESC = 'ORF TVthek'
    _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)'

    _TEST = {
@ -105,3 +109,73 @@ class ORFIE(InfoExtractor):
            'entries': entries,
            'id': playlist_id,
        }
+
+
+# Audios on ORF radio are only available for 7 days, so we can't add tests.
+
+
+class ORFOE1IE(InfoExtractor):
+    IE_NAME = 'orf:oe1'
+    IE_DESC = 'Radio Österreich 1'
+    _VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        show_id = mobj.group('id')
+
+        data = self._download_json(
+            'http://oe1.orf.at/programm/%s/konsole' % show_id,
+            show_id
+        )
+
+        timestamp = datetime.datetime.strptime('%s %s' % (
+            data['item']['day_label'],
+            data['item']['time']
+        ), '%d.%m.%Y %H:%M')
+        unix_timestamp = calendar.timegm(timestamp.utctimetuple())
+
+        return {
+            'id': show_id,
+            'title': data['item']['title'],
+            'url': data['item']['url_stream'],
+            'ext': 'mp3',
+            'description': data['item'].get('info'),
+            'timestamp': unix_timestamp
+        }
+
+
+class ORFFM4IE(InfoExtractor):
+    IE_DESC = 'orf:fm4'
+    IE_DESC = 'radio FM4'
+    _VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P<date>[0-9]+)/(?P<show>\w+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        show_date = mobj.group('date')
+        show_id = mobj.group('show')
+
+        data = self._download_json(
+            'http://audioapi.orf.at/fm4/json/2.0/broadcasts/%s/4%s' % (show_date, show_id),
+            show_id
+        )
+
+        def extract_entry_dict(info, title, subtitle):
+            return {
+                'id': info['loopStreamId'].replace('.mp3', ''),
+                'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info['loopStreamId'],
+                'title': title,
+                'description': subtitle,
+                'duration': (info['end'] - info['start']) / 1000,
+                'timestamp': info['start'] / 1000,
+                'ext': 'mp3'
+            }
+
+        entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']]
+
+        return {
+            '_type': 'playlist',
+            'id': show_id,
+            'title': data['title'],
+            'description': data['subtitle'],
+            'entries': entries
+        }
--- a/youtube_dl/extractor/reverbnation.py
+++ b/youtube_dl/extractor/reverbnation.py
@ -1,23 +1,23 @@
 from __future__ import unicode_literals

 import re
-import time

 from .common import InfoExtractor
-from ..utils import strip_jsonp
+from ..utils import str_or_none


 class ReverbNationIE(InfoExtractor):
    _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
    _TESTS = [{
        'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
-        'file': '16965047.mp3',
        'md5': '3da12ebca28c67c111a7f8b262d3f7a7',
        'info_dict': {
+            "id": "16965047",
+            "ext": "mp3",
            "title": "MONA LISA",
            "uploader": "ALKILADOS",
-            "uploader_id": 216429,
-            "thumbnail": "//gp1.wac.edgecastcdn.net/802892/production_public/Photo/13761700/image/1366002176_AVATAR_MONA_LISA.jpg"
+            "uploader_id": "216429",
+            "thumbnail": "re:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$"
        },
    }]

@ -26,10 +26,8 @@ class ReverbNationIE(InfoExtractor):
        song_id = mobj.group('id')

        api_res = self._download_json(
-            'https://api.reverbnation.com/song/%s?callback=api_response_5&_=%d'
-                % (song_id, int(time.time() * 1000)),
+            'https://api.reverbnation.com/song/%s' % song_id,
            song_id,
-            transform_source=strip_jsonp,
            note='Downloading information of song %s' % song_id
        )

@ -38,8 +36,9 @@ class ReverbNationIE(InfoExtractor):
            'title': api_res.get('name'),
            'url': api_res.get('url'),
            'uploader': api_res.get('artist', {}).get('name'),
-            'uploader_id': api_res.get('artist', {}).get('id'),
-            'thumbnail': api_res.get('image', api_res.get('thumbnail')),
+            'uploader_id': str_or_none(api_res.get('artist', {}).get('id')),
+            'thumbnail': self._proto_relative_url(
+                api_res.get('image', api_res.get('thumbnail'))),
            'ext': 'mp3',
            'vcodec': 'none',
        }
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -1273,9 +1273,15 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
    if get_attr:
        if v is not None:
            v = getattr(v, get_attr, None)
+    if v == '':
+        v = None
    return default if v is None else (int(v) * invscale // scale)


+def str_or_none(v, default=None):
+    return default if v is None else compat_str(v)
+
+
 def str_to_int(int_str):
    if int_str is None:
        return None
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,2 +1,2 @@

-__version__ = '2014.08.05'
+__version__ = '2014.08.10'