From 7862ad88b725daae957ad27ae60993e360c01e13 Mon Sep 17 00:00:00 2001 From: HanYOLO Date: Fri, 27 Feb 2015 15:41:58 +0100 Subject: [PATCH] puls4 Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/puls4.py | 61 ++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 youtube_dl/extractor/puls4.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e3b2cb54f..d137e1104 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -373,6 +373,7 @@ from .pornoxo import PornoXOIE from .promptfile import PromptFileIE from .prosiebensat1 import ProSiebenSat1IE +from .puls4 import Puls4IE from .pyvideo import PyvideoIE from .quickvid import QuickVidIE from .r7 import R7IE diff --git a/youtube_dl/extractor/puls4.py b/youtube_dl/extractor/puls4.py new file mode 100644 index 000000000..70dedbff3 --- /dev/null +++ b/youtube_dl/extractor/puls4.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- + +from __future__ import unicode_literals + +from .common import InfoExtractor + +import re + + +class Puls4IE(InfoExtractor): + + _VALID_URL = r'https?://www.puls4.com/video/.+?/play/(?P[0-9]+)' + _TESTS = [{ + 'url': 'http://www.puls4.com/video/pro-und-contra/play/2716816', + 'md5': '49f6a6629747eeec43cef6a46b5df81d', + 'info_dict': { + 'id': '2716816', + 'ext': 'mp4', + 'title': 'Pro und Contra vom 23.02.2015'}}, + { + 'url': 'http://www.puls4.com/video/kult-spielfilme/play/1298106', + 'md5': '6a48316c8903ece8dab9b9a7bf7a59ec', + 'info_dict': { + 'id': '1298106', + 'ext': 'mp4', + 'title': 'Lucky Fritz'}} + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + # if fsk-button + real_url = self._html_search_regex(r'\"fsk-button\".+?href=\"([^"]+)', + webpage, 'fsk_button', default=None) + if real_url: + webpage = self._download_webpage(real_url, video_id) + + title = self._html_search_regex( + r'
.+?

(.+?)

', + webpage, 'title', flags=re.DOTALL) + + sd_url = self._html_search_regex( + r'{\"url\":\"([^"]+?)\",\"hd\":false', + webpage, 'sd_url').replace('\\', '') + + formats = [{'format_id': 'sd', 'url': sd_url, 'quality': -2}] + + hd_url = self._html_search_regex( + r'{\"url\":\"([^"]+?)\",\"hd\":true', + webpage, 'hd_url', default=None) + if hd_url: + hd_url = hd_url.replace('\\', '') + formats.append({'format_id': 'hd', 'url': hd_url, 'quality': -1}) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'ext': 'mp4' + }