1
1
mirror of https://github.com/ytdl-org/youtube-dl synced 2024-11-30 19:02:55 +01:00

[Tumblr] move into own file

This commit is contained in:
Philipp Hagemeister 2013-06-23 22:24:07 +02:00
parent a37f27ae99
commit ae287755b7
2 changed files with 42 additions and 33 deletions

View File

@ -52,6 +52,7 @@ from .extractor.spiegel import SpiegelIE
from .extractor.stanfordoc import StanfordOpenClassroomIE from .extractor.stanfordoc import StanfordOpenClassroomIE
from .extractor.steam import SteamIE from .extractor.steam import SteamIE
from .extractor.ted import TEDIE from .extractor.ted import TEDIE
from .extractor.tumblr import TumblrIE
from .extractor.ustream import UstreamIE from .extractor.ustream import UstreamIE
from .extractor.vimeo import VimeoIE from .extractor.vimeo import VimeoIE
from .extractor.worldstarhiphop import WorldStarHipHopIE from .extractor.worldstarhiphop import WorldStarHipHopIE
@ -96,39 +97,6 @@ from .extractor.zdf import ZDFIE
class TumblrIE(InfoExtractor):
_VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'
def _real_extract(self, url):
m_url = re.match(self._VALID_URL, url)
video_id = m_url.group('id')
blog = m_url.group('blog_name')
url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
webpage = self._download_webpage(url, video_id)
re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
video = re.search(re_video, webpage)
if video is None:
raise ExtractorError(u'Unable to extract video')
video_url = video.group('video_url')
ext = video.group('ext')
video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22',
webpage, u'thumbnail', fatal=False) # We pick the first poster
if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '')
# The only place where you can get a title, it's not complete,
# but searching in other places doesn't work for all videos
video_title = self._html_search_regex(r'<title>(?P<title>.*?)</title>',
webpage, u'title', flags=re.DOTALL)
return [{'id': video_id,
'url': video_url,
'title': video_title,
'thumbnail': video_thumbnail,
'ext': ext
}]
class BandcampIE(InfoExtractor): class BandcampIE(InfoExtractor):
_VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)' _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'

View File

@ -0,0 +1,41 @@
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
)
class TumblrIE(InfoExtractor):
_VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'
def _real_extract(self, url):
m_url = re.match(self._VALID_URL, url)
video_id = m_url.group('id')
blog = m_url.group('blog_name')
url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
webpage = self._download_webpage(url, video_id)
re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
video = re.search(re_video, webpage)
if video is None:
raise ExtractorError(u'Unable to extract video')
video_url = video.group('video_url')
ext = video.group('ext')
video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22',
webpage, u'thumbnail', fatal=False) # We pick the first poster
if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '')
# The only place where you can get a title, it's not complete,
# but searching in other places doesn't work for all videos
video_title = self._html_search_regex(r'<title>(?P<title>.*?)</title>',
webpage, u'title', flags=re.DOTALL)
return [{'id': video_id,
'url': video_url,
'title': video_title,
'thumbnail': video_thumbnail,
'ext': ext
}]