From 72c3d02d294b04b35a19417b31ad497e7540caa3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 26 Sep 2016 23:39:54 +0700 Subject: [PATCH] [promptfile] Improve and modernize --- youtube_dl/extractor/promptfile.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py index 54c4aee13..d40cca06f 100644 --- a/youtube_dl/extractor/promptfile.py +++ b/youtube_dl/extractor/promptfile.py @@ -7,7 +7,6 @@ from ..utils import ( determine_ext, ExtractorError, - sanitized_Request, urlencode_postdata, ) @@ -33,20 +32,23 @@ def _real_extract(self, url): raise ExtractorError('Video %s does not exist' % video_id, expected=True) - chash_pattern = r'\$\("#chash"\)\.val\("(.+)"\+\$\("#chash"\)' - chash = self._html_search_regex(chash_pattern, webpage, "chash") + chash = self._search_regex( + r'val\("([^"]*)"\s*\+\s*\$\("#chash"\)', webpage, 'chash') fields = self._hidden_inputs(webpage) - k = list(fields)[0] - fields[k] = chash + fields[k] + keys = list(fields.keys()) + chash_key = keys[0] if len(keys) == 1 else next( + key for key in keys if key.startswith('cha')) + fields[chash_key] = chash + fields[chash_key] - post = urlencode_postdata(fields) - req = sanitized_Request(url, post) - req.add_header('Content-type', 'application/x-www-form-urlencoded') webpage = self._download_webpage( - req, video_id, 'Downloading video page') + url, video_id, 'Downloading video page', + data=urlencode_postdata(fields), + headers={'Content-type': 'application/x-www-form-urlencoded'}) - url_pattern = r']+href=(["\'])(?P(?:(?!\1).)+)\1[^>]*>\s*Download File', + r']+href=(["\'])(?Phttps?://(?:www\.)?promptfile\.com/file/(?:(?!\1).)+)\1'), + webpage, 'video url', group='url') title = self._html_search_regex( r'', webpage, 'title') thumbnail = self._html_search_regex( @@ -55,7 +57,7 @@ def _real_extract(self, url): formats = [{ 'format_id': 'sd', - 'url': url, + 'url': video_url, 'ext': determine_ext(title), }] self._sort_formats(formats)