youtube-dl/youtube_dl/extractor/worldstarhiphop.py

from __future__ import unicode_literals

import re

from .common import InfoExtractor


class WorldStarHipHopIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/(?:videos|android)/video\.php\?v=(?P<id>.*)'
    _TESTS = [{
        "url": "http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO",
        "md5": "9d04de741161603bf7071bbf4e883186",
        "info_dict": {
            "id": "wshh6a7q1ny0G34ZwuIO",
            "ext": "mp4",
            "title": "KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
        }
    }, {
        'url': 'http://m.worldstarhiphop.com/android/video.php?v=wshh6a7q1ny0G34ZwuIO',
        'md5': 'dc1c76c83ecc4190bb1eb143899b87d3',
        'info_dict': {
            'id': 'wshh6a7q1ny0G34ZwuIO',
            'ext': 'mp4',
            "title": "KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
        }
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

        m_vevo_id = re.search(r'videoId=(.*?)&amp?', webpage)
        if m_vevo_id is not None:
            return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')

        video_url = self._search_regex(
            [r'so\.addVariable\("file","(.*?)"\)',
             r'<div class="artlist">\s*<a[^>]+href="([^"]+)">'],
            webpage, 'video URL')

        if 'youtube' in video_url:
            return self.url_result(video_url, ie='Youtube')

        video_title = self._html_search_regex(
            [r'(?s)<div class="content-heading">\s*<h1>(.*?)</h1>',
             r'<span[^>]+class="tc-sp-pinned-title">(.*)</span>'],
            webpage, 'title')

        # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
        thumbnail = self._html_search_regex(
            r'rel="image_src" href="(.*)" />', webpage, 'thumbnail',
            default=None)
        if not thumbnail:
            _title = r'candytitles.*>(.*)</span>'
            mobj = re.search(_title, webpage)
            if mobj is not None:
                video_title = mobj.group(1)

        return {
            'id': video_id,
            'url': video_url,
            'title': video_title,
            'thumbnail': thumbnail,
        }
[worldstarhiphop] Modernize 2014-03-23 13:49:15 +01:00			`from __future__ import unicode_literals`

Move WorldStarHipHop into its own file 2013-06-23 22:04:08 +02:00			`import re`

			`from .common import InfoExtractor`


			`class WorldStarHipHopIE(InfoExtractor):`
[worldstarhiphop] Support Android URLs (fixes #5629) 2015-05-14 12:00:57 +02:00			`_VALID_URL = r'https?://(?:www\|m)\.worldstar(?:candy\|hiphop)\.com/(?:videos\|android)/video\.php\?v=(?P<id>.*)'`
			`_TESTS = [{`
Allow moving tests into IE files Allow adding download tests right in the IE file. This will cut down on merge conflicts and make it more likely that new IE authors will add tests right away. 2013-06-27 18:28:45 +02:00			`"url": "http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO",`
			`"md5": "9d04de741161603bf7071bbf4e883186",`
			`"info_dict": {`
[worldstarhiphop] Modernize 2014-03-23 13:49:15 +01:00			`"id": "wshh6a7q1ny0G34ZwuIO",`
			`"ext": "mp4",`
[worldstarhiphop] Correct title extraction 2014-09-29 05:02:58 +02:00			`"title": "KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"`
Allow moving tests into IE files Allow adding download tests right in the IE file. This will cut down on merge conflicts and make it more likely that new IE authors will add tests right away. 2013-06-27 18:28:45 +02:00			`}`
[worldstarhiphop] Support Android URLs (fixes #5629) 2015-05-14 12:00:57 +02:00			`}, {`
			`'url': 'http://m.worldstarhiphop.com/android/video.php?v=wshh6a7q1ny0G34ZwuIO',`
			`'md5': 'dc1c76c83ecc4190bb1eb143899b87d3',`
			`'info_dict': {`
			`'id': 'wshh6a7q1ny0G34ZwuIO',`
			`'ext': 'mp4',`
			`"title": "KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"`
			`}`
			`}]`
Allow moving tests into IE files Allow adding download tests right in the IE file. This will cut down on merge conflicts and make it more likely that new IE authors will add tests right away. 2013-06-27 18:28:45 +02:00
Move WorldStarHipHop into its own file 2013-06-23 22:04:08 +02:00			`def _real_extract(self, url):`
[worldstarhiphop] Correct title extraction 2014-09-29 05:02:58 +02:00			`video_id = self._match_id(url)`
			`webpage = self._download_webpage(url, video_id)`
Move WorldStarHipHop into its own file 2013-06-23 22:04:08 +02:00
[worldstarhiphop] Correct title extraction 2014-09-29 05:02:58 +02:00			`m_vevo_id = re.search(r'videoId=(.*?)&amp?', webpage)`
[worldstarhiphop] Small cleanup The second check for the Vevo id is not necessary. 2013-07-30 11:10:17 +02:00			`if m_vevo_id is not None:`
			`return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')`
detect vevo embed 2013-07-29 21:11:57 +02:00
[worldstarhiphop] Modernize 2014-03-23 13:49:15 +01:00			`video_url = self._search_regex(`
[worldstarhiphop] Support Android URLs (fixes #5629) 2015-05-14 12:00:57 +02:00			`[r'so\.addVariable\("file","(.*?)"\)',`
			`r'<div class="artlist">\s*<a[^>]+href="([^"]+)">'],`
			`webpage, 'video URL')`
Move WorldStarHipHop into its own file 2013-06-23 22:04:08 +02:00
added Youtube embed detection to WorldstarIE 2013-06-25 03:58:49 +02:00			`if 'youtube' in video_url:`
Merge pull request #922 from JohnyMoSwag/master Added embedded youtube detection to WorldstarIE 2013-06-25 22:07:31 +02:00			`return self.url_result(video_url, ie='Youtube')`
added Youtube embed detection to WorldstarIE 2013-06-25 03:58:49 +02:00
[worldstarhiphop] Modernize 2014-03-23 13:49:15 +01:00			`video_title = self._html_search_regex(`
[worldstarhiphop] Support Android URLs (fixes #5629) 2015-05-14 12:00:57 +02:00			`[r'(?s)<div class="content-heading">\s<h1>(.?)</h1>',`
			`r'<span[^>]+class="tc-sp-pinned-title">(.*)</span>'],`
[worldstarhiphop] Correct title extraction 2014-09-29 05:02:58 +02:00			`webpage, 'title')`
Move WorldStarHipHop into its own file 2013-06-23 22:04:08 +02:00
			`# Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.`
[worldstarhiphop] Modernize 2014-03-23 13:49:15 +01:00			`thumbnail = self._html_search_regex(`
[worldstarhiphop] Correct title extraction 2014-09-29 05:02:58 +02:00			`r'rel="image_src" href="(.*)" />', webpage, 'thumbnail',`
[worldstarhiphop] Support Android URLs (fixes #5629) 2015-05-14 12:00:57 +02:00			`default=None)`
Move WorldStarHipHop into its own file 2013-06-23 22:04:08 +02:00			`if not thumbnail:`
[worldstarhiphop] Correct title extraction 2014-09-29 05:02:58 +02:00			`_title = r'candytitles.>(.)</span>'`
			`mobj = re.search(_title, webpage)`
Move WorldStarHipHop into its own file 2013-06-23 22:04:08 +02:00			`if mobj is not None:`
			`video_title = mobj.group(1)`

[worldstarhiphop] Modernize 2014-03-23 13:49:15 +01:00			`return {`
			`'id': video_id,`
			`'url': video_url,`
			`'title': video_title,`
			`'thumbnail': thumbnail,`
			`}`