[thisav] Add IE (Fixes #1056)

2024-11-23 20:16:54 +01:00 · 2013-07-17 11:16:53 +02:00 · 2013-07-17 11:16:53 +02:00 · 466de68801
commit 466de68801
parent 88d4111cfa
2 changed files with 47 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -58,6 +58,7 @@ from .steam import SteamIE
 from .teamcoco import TeamcocoIE
 from .ted import TEDIE
 from .tf1 import TF1IE
 from .thisav import ThisAVIE
 from .traileraddict import TrailerAddictIE
 from .tudou import TudouIE
 from .tumblr import TumblrIE
--- a/youtube_dl/extractor/thisav.py
+++ b/youtube_dl/extractor/thisav.py
@ -0,0 +1,46 @@
 #coding: utf-8
 import re
 from .common import InfoExtractor
 from ..utils import (
    determine_ext,
 )
 class ThisAVIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*'
    _TEST = {
        u"url": u"http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html",
        u"file": u"47734.flv",
        u"info_dict": {
            u"title": u"高樹マリア - Just fit",
            u"uploader": u"dj7970",
            u"uploader_id": u"dj7970"
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, u'title')
        video_url = self._html_search_regex(
            r"addVariable\('file','([^']+)'\);", webpage, u'video url')
        uploader = self._html_search_regex(
            r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
            webpage, u'uploader name', fatal=False)
        uploader_id = self._html_search_regex(
            r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
            webpage, u'uploader id', fatal=False)
        ext = determine_ext(video_url)
        return {
            '_type':       'video',
            'id':          video_id,
            'url':         video_url,
            'uploader':    uploader,
            'uploader_id': uploader_id,
            'title':       title,
            'ext':         ext,
        }