[extractor/mediastream] Improve WinSports support (#6401)

Closes #6360
Authored by: bashonly
This commit is contained in:
bashonly 2023-03-03 03:37:23 -06:00 committed by GitHub
parent 77d6d13646
commit 2d5a8c5db2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,7 +1,13 @@
import re
from .common import InfoExtractor
from ..utils import clean_html, get_element_html_by_class
from ..utils import (
remove_end,
str_or_none,
strip_or_none,
traverse_obj,
urljoin,
)
class MediaStreamIE(InfoExtractor):
@ -117,39 +123,56 @@ def _real_extract(self, url):
class WinSportsVideoIE(InfoExtractor):
_VALID_URL = r'https?://www\.winsports\.co/videos/(?P<display_id>[\w-]+)-(?P<id>\d+)'
_VALID_URL = r'https?://www\.winsports\.co/videos/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://www.winsports.co/videos/siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco-60536',
'info_dict': {
'id': '62dc8357162c4b0821fcfb3c',
'display_id': 'siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco',
'display_id': 'siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco-60536',
'title': '¡Siempre Castellanos! Gran atajada del portero \'cardenal\' para evitar la caída de su arco',
'description': 'md5:eb811b2b2882bdc59431732c06b905f2',
'thumbnail': r're:^https?://[^?#]+62dc8357162c4b0821fcfb3c',
'ext': 'mp4',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.winsports.co/videos/observa-aqui-los-goles-del-empate-entre-tolima-y-nacional-60548',
'info_dict': {
'id': '62dcb875ef12a5526790b552',
'display_id': 'observa-aqui-los-goles-del-empate-entre-tolima-y-nacional',
'display_id': 'observa-aqui-los-goles-del-empate-entre-tolima-y-nacional-60548',
'title': 'Observa aquí los goles del empate entre Tolima y Nacional',
'description': 'md5:b19402ba6e46558b93fd24b873eea9c9',
'thumbnail': r're:^https?://[^?#]+62dcb875ef12a5526790b552',
'ext': 'mp4',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.winsports.co/videos/equidad-vuelve-defender-su-arco-de-remates-de-junior',
'info_dict': {
'id': '63fa7eca72f1741ad3a4d515',
'display_id': 'equidad-vuelve-defender-su-arco-de-remates-de-junior',
'title': '⚽ Equidad vuelve a defender su arco de remates de Junior',
'description': 'Remate de Sierra',
'thumbnail': r're:^https?://[^?#]+63fa7eca72f1741ad3a4d515',
'ext': 'mp4',
},
'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):
display_id, video_id = self._match_valid_url(url).group('display_id', 'id')
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
json_ld = self._search_json_ld(webpage, display_id, expected_type='VideoObject', default={})
media_setting_json = self._search_json(
r'<script\s*[^>]+data-drupal-selector="drupal-settings-json">', webpage, 'drupal-setting-json', display_id)
mediastream_id = media_setting_json['settings']['mediastream_formatter'][video_id]['mediastream_id']
mediastream_id = traverse_obj(
media_setting_json, ('settings', 'mediastream_formatter', ..., 'mediastream_id', {str_or_none}),
get_all=False) or json_ld.get('url')
if not mediastream_id:
self.raise_no_formats('No MediaStream embed found in webpage')
return self.url_result(
f'https://mdstrm.com/embed/{mediastream_id}', MediaStreamIE, video_id, url_transparent=True,
display_id=display_id, video_title=clean_html(get_element_html_by_class('title-news', webpage)))
urljoin('https://mdstrm.com/embed/', mediastream_id), MediaStreamIE, display_id, url_transparent=True,
display_id=display_id, video_title=strip_or_none(remove_end(json_ld.get('title'), '| Win Sports')))