From 80b8b72cb847bd286eea01819bf43fd8bbe6aa92 Mon Sep 17 00:00:00 2001 From: remitamine Date: Thu, 31 Dec 2015 13:36:07 +0100 Subject: [PATCH] [animalplanet] Add new extractor(closes #5303) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/animalplanet.py | 53 ++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 youtube_dl/extractor/animalplanet.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 2063ef6337..abfabc7da9 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -19,6 +19,7 @@ from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE from .alphaporno import AlphaPornoIE +from .animalplanet import AnimalPlanetIE from .anitube import AnitubeIE from .anysex import AnySexIE from .aol import AolIE diff --git a/youtube_dl/extractor/animalplanet.py b/youtube_dl/extractor/animalplanet.py new file mode 100644 index 0000000000..0cebc81855 --- /dev/null +++ b/youtube_dl/extractor/animalplanet.py @@ -0,0 +1,53 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + parse_duration, + parse_iso8601, +) + + +class AnimalPlanetIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?animalplanet\.com/([^/]+/)*(?P[^/\?#]+)' + _TESTS = [{ + 'url': 'http://www.animalplanet.com/tv-shows/i-shouldnt-be-alive/videos/dog-saves-injured-owner/', + 'info_dict': { + 'id': '10608', + 'ext': 'mp4', + 'title': 'Dog Saves Injured Owner', + 'description': 'A world class athlete is put to the test when she falls into a canyon and breaks her hip. Her only companion is her dog, Taz, who is on a mission to save her!', + 'upload_date': '20100410', + 'timestamp': 1270857727, + 'duration': 220, + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'http://www.animalplanet.com/longfin-eels-maneaters/', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + video_data = self._parse_json(self._search_regex( + r'initialVideoData\s*=\s*({.+?});', + webpage, 'initialVideoData'), display_id)['playlist'][0] + + return { + 'id': compat_str(video_data['id']), + 'display_id': display_id, + 'title': video_data['title'], + 'description': video_data.get('description'), + 'thumbnail': video_data.get('thumbnailURL'), + 'duration': parse_duration(video_data.get('video_length')), + 'timestamp': parse_iso8601(video_data.get('publishedDate')), + 'formats': self._extract_m3u8_formats( + video_data['src'], display_id, 'mp4', + 'm3u8_native', m3u8_id='hls') + }