diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a038f298a7..a680973de8 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -406,6 +406,7 @@ from .tube8 import Tube8IE from .tudou import TudouIE from .tumblr import TumblrIE +from .tunein import TuneInIE from .turbo import TurboIE from .tutv import TutvIE from .tvigle import TvigleIE diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index d7e9aef90d..9bc959a9b5 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -26,6 +26,21 @@ def _extract_video(self, video_id, catalogue): if info.get('status') == 'NOK': raise ExtractorError( '%s returned error: %s' % (self.IE_NAME, info['message']), expected=True) + allowed_countries = info['videos'][0].get('geoblocage') + if allowed_countries: + georestricted = True + geo_info = self._download_json( + 'http://geo.francetv.fr/ws/edgescape.json', video_id, + 'Downloading geo restriction info') + country = geo_info['reponse']['geo_info']['country_code'] + if country not in allowed_countries: + raise ExtractorError( + 'The video is not available from your location', + expected=True) + else: + georestricted = False + + formats = [] for video in info['videos']: @@ -36,6 +51,10 @@ def _extract_video(self, video_id, catalogue): continue format_id = video['format'] if video_url.endswith('.f4m'): + if georestricted: + # See https://github.com/rg3/youtube-dl/issues/3963 + # m3u8 urls work fine + continue video_url_parsed = compat_urllib_parse_urlparse(video_url) f4m_url = self._download_webpage( 'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path, diff --git a/youtube_dl/extractor/tunein.py b/youtube_dl/extractor/tunein.py new file mode 100644 index 0000000000..8c29f16343 --- /dev/null +++ b/youtube_dl/extractor/tunein.py @@ -0,0 +1,101 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class TuneInIE(InfoExtractor): + _VALID_URL = r'''(?x)https?://(?:www\.)? + (?: + tunein\.com/ + (?: + radio/.*?-s| + station/.*?StationId\= + )(?P[0-9]+) + |tun\.in/(?P[A-Za-z0-9]+) + ) + ''' + + _INFO_DICT = { + 'id': '34682', + 'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2', + 'ext': 'AAC', + 'thumbnail': 're:^https?://.*\.png$', + 'location': 'Tacoma, WA', + } + _TESTS = [ + { + 'url': 'http://tunein.com/radio/Jazz24-885-s34682/', + 'info_dict': _INFO_DICT, + 'params': { + 'skip_download': True, # live stream + }, + }, + { # test redirection + 'url': 'http://tun.in/ser7s', + 'info_dict': _INFO_DICT, + 'params': { + 'skip_download': True, # live stream + }, + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + redirect_id = mobj.group('redirect_id') + if redirect_id: + # The server doesn't support HEAD requests + urlh = self._request_webpage( + url, redirect_id, note='Downloading redirect page') + url = urlh.geturl() + self.to_screen('Following redirect: %s' % url) + mobj = re.match(self._VALID_URL, url) + station_id = mobj.group('id') + + webpage = self._download_webpage( + url, station_id, note='Downloading station webpage') + + payload = self._html_search_regex( + r'(?m)TuneIn\.payload\s*=\s*(\{[^$]+?)$', webpage, 'JSON data') + json_data = json.loads(payload) + station_info = json_data['Station']['broadcast'] + title = station_info['Title'] + thumbnail = station_info.get('Logo') + location = station_info.get('Location') + streams_url = station_info.get('StreamUrl') + if not streams_url: + raise ExtractorError('No downloadable streams found', + expected=True) + stream_data = self._download_webpage( + streams_url, station_id, note='Downloading stream data') + streams = json.loads(self._search_regex( + r'\((.*)\);', stream_data, 'stream info'))['Streams'] + + is_live = None + formats = [] + for stream in streams: + if stream.get('Type') == 'Live': + is_live = True + formats.append({ + 'abr': stream.get('Bandwidth'), + 'ext': stream.get('MediaType'), + 'acodec': stream.get('MediaType'), + 'vcodec': 'none', + 'url': stream.get('Url'), + # Sometimes streams with the highest quality do not exist + 'preference': stream.get('Reliability'), + }) + self._sort_formats(formats) + + return { + 'id': station_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + 'location': location, + 'is_live': is_live, + }