youtube-dl/youtube_dl/extractor/chilloutzone.py

from __future__ import unicode_literals

import re
import base64
import json

from .common import InfoExtractor
from ..utils import (
    clean_html,
    ExtractorError
)


class ChilloutzoneIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w|-]+)\.html'
    _TESTS = [{
        'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html',
        'md5': 'a76f3457e813ea0037e5244f509e66d1',
        'info_dict': {
            'id': 'enemene-meck-alle-katzen-weg',
            'ext': 'mp4',
            'title': 'Enemene Meck - Alle Katzen weg',
            'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?',
        },
    }, {
        'note': 'Video hosted at YouTube',
        'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html',
        'info_dict': {
            'id': '1YVQaAgHyRU',
            'ext': 'mp4',
            'title': '16 Photos Taken 1 Second Before Disaster',
            'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814',
            'uploader': 'BuzzFeedVideo',
            'uploader_id': 'BuzzFeedVideo',
            'upload_date': '20131105',
        },
    }, {
        'note': 'Video hosted at Vimeo',
        'url': 'http://www.chilloutzone.net/video/icon-blending.html',
        'md5': '2645c678b8dc4fefcc0e1b60db18dac1',
        'info_dict': {
            'id': '85523671',
            'ext': 'mp4',
            'title': 'The Sunday Times - Icons',
            'description': 're:(?s)^Watch the making of - makingoficons.com.{300,}',
            'uploader': 'Us',
            'uploader_id': 'usfilms',
            'upload_date': '20140131'
        },
    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')

        webpage = self._download_webpage(url, video_id)

        base64_video_info = self._html_search_regex(
            r'var cozVidData = "(.+?)";', webpage, 'video data')
        decoded_video_info = base64.b64decode(base64_video_info).decode("utf-8")
        video_info_dict = json.loads(decoded_video_info)

        # get video information from dict
        video_url = video_info_dict['mediaUrl']
        description = clean_html(video_info_dict.get('description'))
        title = video_info_dict['title']
        native_platform = video_info_dict['nativePlatform']
        native_video_id = video_info_dict['nativeVideoId']
        source_priority = video_info_dict['sourcePriority']

        # If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
        if native_platform is None:
            youtube_url = self._html_search_regex(
                r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
                webpage, 'fallback video URL', default=None)
            if youtube_url is not None:
                return self.url_result(youtube_url, ie='Youtube')

        # Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
        # the own CDN
        if source_priority == 'native':
            if native_platform == 'youtube':
                return self.url_result(native_video_id, ie='Youtube')
            if native_platform == 'vimeo':
                return self.url_result(
                    'http://vimeo.com/' + native_video_id, ie='Vimeo')

        if not video_url:
            raise ExtractorError('No video found')

        return {
            'id': video_id,
            'url': video_url,
            'ext': 'mp4',
            'title': title,
            'description': description,
        }
[chilloutzone] Simplify (#2338) 2014-02-07 12:00:25 +01:00			`from __future__ import unicode_literals`

[chilloutzone] Added support for chilloutzone.net Added support for chilloutzone.net videos including embedded youtube and vimeo movies. In case you find a not working movie, drop me an email. 2014-02-06 11:44:44 +01:00			`import re`
			`import base64`
			`import json`

			`from .common import InfoExtractor`
[chilloutzone] Add import 2014-02-07 12:03:19 +01:00			`from ..utils import (`
			`clean_html,`
			`ExtractorError`
			`)`
[chilloutzone] Added support for chilloutzone.net Added support for chilloutzone.net videos including embedded youtube and vimeo movies. In case you find a not working movie, drop me an email. 2014-02-06 11:44:44 +01:00

			`class ChilloutzoneIE(InfoExtractor):`
[chilloutzone] Simplify (#2338) 2014-02-07 12:00:25 +01:00			`_VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w\|-]+)\.html'`
[chilloutzone] Add additional tests (#2340) 2014-02-07 15:42:31 +01:00			`_TESTS = [{`
[chilloutzone] Bug fix, runs against tests Fixes a bug with python3.3 and made the extractor run successfully against tox 2014-02-06 21:31:04 +01:00			`'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html',`
			`'md5': 'a76f3457e813ea0037e5244f509e66d1',`
			`'info_dict': {`
			`'id': 'enemene-meck-alle-katzen-weg',`
			`'ext': 'mp4',`
			`'title': 'Enemene Meck - Alle Katzen weg',`
[chilloutzone] Simplify (#2338) 2014-02-07 12:00:25 +01:00			`'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?',`
[chilloutzone] Bug fix, runs against tests Fixes a bug with python3.3 and made the extractor run successfully against tox 2014-02-06 21:31:04 +01:00			`},`
[chilloutzone] Add additional tests (#2340) 2014-02-07 15:42:31 +01:00			`}, {`
			`'note': 'Video hosted at YouTube',`
			`'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html',`
			`'info_dict': {`
			`'id': '1YVQaAgHyRU',`
			`'ext': 'mp4',`
			`'title': '16 Photos Taken 1 Second Before Disaster',`
			`'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814',`
			`'uploader': 'BuzzFeedVideo',`
			`'uploader_id': 'BuzzFeedVideo',`
			`'upload_date': '20131105',`
			`},`
			`}, {`
			`'note': 'Video hosted at Vimeo',`
			`'url': 'http://www.chilloutzone.net/video/icon-blending.html',`
			`'md5': '2645c678b8dc4fefcc0e1b60db18dac1',`
			`'info_dict': {`
			`'id': '85523671',`
			`'ext': 'mp4',`
			`'title': 'The Sunday Times - Icons',`
[chilloutzone] Make test case more flexible 2014-09-13 09:04:03 +02:00			`'description': 're:(?s)^Watch the making of - makingoficons.com.{300,}',`
[chilloutzone] Add additional tests (#2340) 2014-02-07 15:42:31 +01:00			`'uploader': 'Us',`
			`'uploader_id': 'usfilms',`
			`'upload_date': '20140131'`
			`},`
			`}]`
[chilloutzone] Added support for chilloutzone.net Added support for chilloutzone.net videos including embedded youtube and vimeo movies. In case you find a not working movie, drop me an email. 2014-02-06 11:44:44 +01:00
			`def _real_extract(self, url):`
			`mobj = re.match(self._VALID_URL, url)`
[chilloutzone] Bug fix, runs against tests Fixes a bug with python3.3 and made the extractor run successfully against tox 2014-02-06 21:31:04 +01:00			`video_id = mobj.group('id')`

[chilloutzone] Simplify (#2338) 2014-02-07 12:00:25 +01:00			`webpage = self._download_webpage(url, video_id)`
[chilloutzone] Bug fix, runs against tests Fixes a bug with python3.3 and made the extractor run successfully against tox 2014-02-06 21:31:04 +01:00
[chilloutzone] Simplify (#2338) 2014-02-07 12:00:25 +01:00			`base64_video_info = self._html_search_regex(`
			`r'var cozVidData = "(.+?)";', webpage, 'video data')`
[chilloutzone] Bug fix, runs against tests Fixes a bug with python3.3 and made the extractor run successfully against tox 2014-02-06 21:31:04 +01:00			`decoded_video_info = base64.b64decode(base64_video_info).decode("utf-8")`
			`video_info_dict = json.loads(decoded_video_info)`
[chilloutzone] Simplify (#2338) 2014-02-07 12:00:25 +01:00
[chilloutzone] Bug fix, runs against tests Fixes a bug with python3.3 and made the extractor run successfully against tox 2014-02-06 21:31:04 +01:00			`# get video information from dict`
[chilloutzone] Simplify (#2338) 2014-02-07 12:00:25 +01:00			`video_url = video_info_dict['mediaUrl']`
			`description = clean_html(video_info_dict.get('description'))`
[chilloutzone] Bug fix, runs against tests Fixes a bug with python3.3 and made the extractor run successfully against tox 2014-02-06 21:31:04 +01:00			`title = video_info_dict['title']`
			`native_platform = video_info_dict['nativePlatform']`
			`native_video_id = video_info_dict['nativeVideoId']`
			`source_priority = video_info_dict['sourcePriority']`

			`# If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)`
[chilloutzone] Simplify (#2338) 2014-02-07 12:00:25 +01:00			`if native_platform is None:`
			`youtube_url = self._html_search_regex(`
			`r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',`
			`webpage, 'fallback video URL', default=None)`
			`if youtube_url is not None:`
			`return self.url_result(youtube_url, ie='Youtube')`
[chilloutzone] Bug fix, runs against tests Fixes a bug with python3.3 and made the extractor run successfully against tox 2014-02-06 21:31:04 +01:00
			`# Non Fallback: Decide to use native source (e.g. youtube or vimeo) or`
			`# the own CDN`
			`if source_priority == 'native':`
			`if native_platform == 'youtube':`
[chilloutzone] fixes bug with youtube extraction the id used for extracting the video from youtube is stored in native_video_id not video_id. This id is only used on chilloutzone.net 2014-02-07 12:29:58 +01:00			`return self.url_result(native_video_id, ie='Youtube')`
[chilloutzone] Added support for chilloutzone.net Added support for chilloutzone.net videos including embedded youtube and vimeo movies. In case you find a not working movie, drop me an email. 2014-02-06 11:44:44 +01:00			`if native_platform == 'vimeo':`
[chilloutzone] Simplify (#2338) 2014-02-07 12:00:25 +01:00			`return self.url_result(`
			`'http://vimeo.com/' + native_video_id, ie='Vimeo')`
[chilloutzone] Added support for chilloutzone.net Added support for chilloutzone.net videos including embedded youtube and vimeo movies. In case you find a not working movie, drop me an email. 2014-02-06 11:44:44 +01:00
[chilloutzone] Simplify (#2338) 2014-02-07 12:00:25 +01:00			`if not video_url:`
			`raise ExtractorError('No video found')`
[chilloutzone] Added support for chilloutzone.net Added support for chilloutzone.net videos including embedded youtube and vimeo movies. In case you find a not working movie, drop me an email. 2014-02-06 11:44:44 +01:00
[chilloutzone] Simplify (#2338) 2014-02-07 12:00:25 +01:00			`return {`
			`'id': video_id,`
			`'url': video_url,`
			`'ext': 'mp4',`
			`'title': title,`
[chilloutzone] Bug fix, runs against tests Fixes a bug with python3.3 and made the extractor run successfully against tox 2014-02-06 21:31:04 +01:00			`'description': description,`
[chilloutzone] Simplify (#2338) 2014-02-07 12:00:25 +01:00			`}`