[showroomlive] Improve (closes #11458)

2025-02-18 21:36:49 +01:00 · 2016-12-30 00:12:35 +07:00 · 2016-12-30 00:12:35 +07:00 · df086e74e2
commit df086e74e2
parent 963bd5ecfc
1 changed files with 62 additions and 58 deletions
--- a/youtube_dl/extractor/showroomlive.py
+++ b/youtube_dl/extractor/showroomlive.py
@ -2,79 +2,83 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
-from ..utils import ExtractorError, compat_urlparse
+from ..compat import compat_str
 from ..utils import (
    ExtractorError,
    int_or_none,
    urljoin,
 )
-class ShowroomLiveIE(InfoExtractor):
+class ShowRoomLiveIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?showroom-live\.com/(?P<id>[0-9a-zA-Z_]+)'
+    _VALID_URL = r'https?://(?:www\.)?showroom-live\.com/(?!onlive|timetable|event|campaign|news|ranking|room)(?P<id>[^/?#&]+)'
    _TEST = {
        'url': 'https://www.showroom-live.com/48_Nana_Okada',
-        'skip': 'Only live broadcasts, can\'t predict test case.',
+        'only_matching': True,
        'info_dict': {
            'id': '48_Nana_Okada',
            'ext': 'mp4',
            'uploader_id': '48_Nana_Okada',
        }
    }
    def _real_extract(self, url):
        broadcaster_id = self._match_id(url)
        # There is no showroom on these pages.
        if broadcaster_id in ['onlive', 'timetable', 'event', 'campaign', 'news', 'ranking']:
            raise ExtractorError('URL %s does not contain a showroom' % url)
        # Retrieve the information we need
        webpage = self._download_webpage(url, broadcaster_id)
-        room_id = self._search_regex(r'profile\?room_id\=(\d+)', webpage, 'room_id')
+
-        room_url = compat_urlparse.urljoin(url, "/api/room/profile?room_id=%s") % room_id
+        room_id = self._search_regex(
-        room = self._download_json(room_url, broadcaster_id)
+            (r'SrGlobal\.roomId\s*=\s*(\d+)',
             r'(?:profile|room)\?room_id\=(\d+)'), webpage, 'room_id')
        room = self._download_json(
            urljoin(url, '/api/room/profile?room_id=%s' % room_id),
            broadcaster_id)
        is_live = room.get('is_onlive')
-        if not is_live:
+        if is_live is not True:
-            raise ExtractorError('%s their showroom is not live' % broadcaster_id)
+            raise ExtractorError('%s is offline' % broadcaster_id, expected=True)
-        # Prepare and return the information
+        uploader = room.get('performer_name') or broadcaster_id
-        uploader = room.get('performer_name') or broadcaster_id  # performer_name can be an empty string.
+        title = room.get('room_name') or room.get('main_name') or uploader
-        title = room.get('room_name', room.get('main_name', "%s's Showroom" % uploader))
+
        streaming_url_list = self._download_json(
            urljoin(url, '/api/live/streaming_url?room_id=%s' % room_id),
            broadcaster_id)['streaming_url_list']
        formats = []
        for stream in streaming_url_list:
            stream_url = stream.get('url')
            if not stream_url:
                continue
            stream_type = stream.get('type')
            if stream_type == 'hls':
                m3u8_formats = self._extract_m3u8_formats(
                    stream_url, broadcaster_id, ext='mp4', m3u8_id='hls',
                    live=True)
                for f in m3u8_formats:
                    f['quality'] = int_or_none(stream.get('quality', 100))
                formats.extend(m3u8_formats)
            elif stream_type == 'rtmp':
                stream_name = stream.get('stream_name')
                if not stream_name:
                    continue
                formats.append({
                    'url': stream_url,
                    'play_path': stream_name,
                    'page_url': url,
                    'player_url': 'https://www.showroom-live.com/assets/swf/v3/ShowRoomLive.swf',
                    'rtmp_live': True,
                    'ext': 'flv',
                    'format_id': 'rtmp',
                    'format_note': stream.get('label'),
                    'quality': int_or_none(stream.get('quality', 100)),
                })
        self._sort_formats(formats)
        return {
-            'is_live': is_live,
+            'id': compat_str(room.get('live_id') or broadcaster_id),
-            'id': str(room.get('live_id')),
+            'title': self._live_title(title),
-            'timestamp': room.get('current_live_started_at'),
+            'description': room.get('description'),
            'timestamp': int_or_none(room.get('current_live_started_at')),
            'uploader': uploader,
            'uploader_id': broadcaster_id,
-            'title': title,
+            'view_count': int_or_none(room.get('view_num')),
-            'description': room.get('description'),
+            'formats': formats,
-            'formats': self._extract_formats(url, broadcaster_id, room_id)
+            'is_live': True,
        }
    def _extract_formats(self, url, broadcaster_id, room_id):
        formats = []
        stream_url = compat_urlparse.urljoin(url, "/api/live/streaming_url?room_id=%s") % room_id
        streaming_url_list = self._download_json(stream_url, broadcaster_id).get('streaming_url_list', [])
        for stream in streaming_url_list:
            if stream.get('type') == "hls":
                formats.extend(self._extract_m3u8_formats(
                    stream.get('url'),
                    broadcaster_id,
                    ext='mp4',
                    m3u8_id='hls',
                    preference=stream.get('quality', 100),
                    live=True
                ))
            elif stream.get('type') == 'rtmp':
                url = stream.get('url') + '/' + stream.get('stream_name')
                formats.append({
                    'url': url,
                    'format_id': 'rtmp',
                    'protocol': 'rtmp',
                    'ext': 'flv',
                    'preference': stream.get('quality', 100),
                    'format_note': stream.get('label')
                })
        self._sort_formats(formats)
        return formats