diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index fb5e6ac77d..cdcda1fa93 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -405,6 +405,7 @@ from .tube8 import Tube8IE from .tudou import TudouIE from .tumblr import TumblrIE +from .tunein import TuneInIE from .turbo import TurboIE from .tutv import TutvIE from .tvigle import TvigleIE diff --git a/youtube_dl/extractor/tunein.py b/youtube_dl/extractor/tunein.py new file mode 100644 index 0000000000..8c29f16343 --- /dev/null +++ b/youtube_dl/extractor/tunein.py @@ -0,0 +1,101 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class TuneInIE(InfoExtractor): + _VALID_URL = r'''(?x)https?://(?:www\.)? + (?: + tunein\.com/ + (?: + radio/.*?-s| + station/.*?StationId\= + )(?P[0-9]+) + |tun\.in/(?P[A-Za-z0-9]+) + ) + ''' + + _INFO_DICT = { + 'id': '34682', + 'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2', + 'ext': 'AAC', + 'thumbnail': 're:^https?://.*\.png$', + 'location': 'Tacoma, WA', + } + _TESTS = [ + { + 'url': 'http://tunein.com/radio/Jazz24-885-s34682/', + 'info_dict': _INFO_DICT, + 'params': { + 'skip_download': True, # live stream + }, + }, + { # test redirection + 'url': 'http://tun.in/ser7s', + 'info_dict': _INFO_DICT, + 'params': { + 'skip_download': True, # live stream + }, + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + redirect_id = mobj.group('redirect_id') + if redirect_id: + # The server doesn't support HEAD requests + urlh = self._request_webpage( + url, redirect_id, note='Downloading redirect page') + url = urlh.geturl() + self.to_screen('Following redirect: %s' % url) + mobj = re.match(self._VALID_URL, url) + station_id = mobj.group('id') + + webpage = self._download_webpage( + url, station_id, note='Downloading station webpage') + + payload = self._html_search_regex( + r'(?m)TuneIn\.payload\s*=\s*(\{[^$]+?)$', webpage, 'JSON data') + json_data = json.loads(payload) + station_info = json_data['Station']['broadcast'] + title = station_info['Title'] + thumbnail = station_info.get('Logo') + location = station_info.get('Location') + streams_url = station_info.get('StreamUrl') + if not streams_url: + raise ExtractorError('No downloadable streams found', + expected=True) + stream_data = self._download_webpage( + streams_url, station_id, note='Downloading stream data') + streams = json.loads(self._search_regex( + r'\((.*)\);', stream_data, 'stream info'))['Streams'] + + is_live = None + formats = [] + for stream in streams: + if stream.get('Type') == 'Live': + is_live = True + formats.append({ + 'abr': stream.get('Bandwidth'), + 'ext': stream.get('MediaType'), + 'acodec': stream.get('MediaType'), + 'vcodec': 'none', + 'url': stream.get('Url'), + # Sometimes streams with the highest quality do not exist + 'preference': stream.get('Reliability'), + }) + self._sort_formats(formats) + + return { + 'id': station_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + 'location': location, + 'is_live': is_live, + }