From a625e5654365a8a246fa5fc5f5d347adc67c5617 Mon Sep 17 00:00:00 2001 From: Marco Ferragina Date: Wed, 14 Oct 2015 11:11:52 +0200 Subject: [PATCH] [vidto] Add extractor --- docs/supportedsites.md | 1 + youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/vidto.py | 82 ++++++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+) create mode 100644 youtube_dl/extractor/vidto.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a9820c1f52..510d4d627c 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -624,6 +624,7 @@ - **VideoTt**: video.tt - Your True Tube - **videoweed**: VideoWeed - **Vidme** + - **vidto**: VidTo.me - **Vidzi** - **vier** - **vier:videos** diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0a90da73cd..ef6c7f0ded 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -730,6 +730,7 @@ from .videopremium import VideoPremiumIE from .videott import VideoTtIE from .videoweed import VideoWeedIE from .vidme import VidmeIE +from .vidto import VidtoIE from .vidzi import VidziIE from .vier import VierIE, VierVideosIE from .viewster import ViewsterIE diff --git a/youtube_dl/extractor/vidto.py b/youtube_dl/extractor/vidto.py new file mode 100644 index 0000000000..3cc5854711 --- /dev/null +++ b/youtube_dl/extractor/vidto.py @@ -0,0 +1,82 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import sys +from .common import InfoExtractor +import time + +from ..utils import ( + encode_dict, +) +from ..compat import ( + compat_chr, + compat_parse_qs, + compat_urllib_parse, + compat_urllib_parse_unquote, + compat_urllib_parse_unquote_plus, + compat_urllib_parse_urlparse, + compat_urllib_request, + compat_urlparse, + compat_str, +) + + +class VidtoIE(InfoExtractor): + IE_NAME = 'vidto' + IE_DESC = 'VidTo.me' + _VALID_URL = r'https?://(?:www\.)?vidto\.me/(?P[0-9a-zA-Z]+)\.html' + _HOST = 'vidto.me' + _TEST = { + 'url': 'http://vidto.me/ku5glz52nqe1.html', + 'info_dict': { + 'id': 'ku5glz52nqe1', + 'ext': 'mp4', + 'title': 'test.mp4' + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + page = self._download_webpage( + 'http://%s/%s.html' % (self._HOST, video_id), video_id, 'Downloading video page') + hash_regex = r'' + hash_value = self._search_regex(hash_regex, page, 'hash', fatal=True) + title_regex = r'' + title = self._search_regex(title_regex, page, 'title', fatal=False) + id_regex = r'' + id_value = self._search_regex(id_regex, page, 'id', fatal=True) + cookies = self._get_cookies('http://%s/%s.html' % (self._HOST, video_id)) + + + form_str = { + 'op': 'download1', + 'imhuman': 'Proceed to video', + 'usr_login': '', + 'id': id_value, + 'fname': title, + 'referer': '', + 'hash': hash_value, + } + post_data = compat_urllib_parse.urlencode(encode_dict(form_str)).encode('ascii') + req = compat_urllib_request.Request(url, post_data) + req.add_header('Content-type', 'application/x-www-form-urlencoded') + for key, morsel in cookies.iteritems(): + req.add_header('Cookie', '%s=%s' % (morsel.key, morsel.value)) + + print("Waiting for countdown...") + time.sleep(7) + post_result = self._download_webpage( + req, None, + note='Proceed to video...', errnote='unable to proceed', fatal=True) + + file_link_regex = r'file_link ?= ?\'(https?:\/\/[0-9a-zA-z.\/\-_]+)' + file_link = self._search_regex(file_link_regex, post_result, 'file_link', fatal=True) + + return { + 'id': video_id, + 'url': file_link, + 'title': title, + }