[ie/radiko] Extract more metadata (#9115)

Authored by: YoshichikaAAA
This commit is contained in:
YoshichikaAAA 2024-02-04 03:44:17 +09:00 committed by GitHub
parent 4253e3b7f4
commit e3ce2b385e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,5 +1,6 @@
import base64 import base64
import random import random
import re
import urllib.parse import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
@ -11,6 +12,7 @@
unified_timestamp, unified_timestamp,
update_url_query, update_url_query,
) )
from ..utils.traversal import traverse_obj
class RadikoBaseIE(InfoExtractor): class RadikoBaseIE(InfoExtractor):
@ -159,6 +161,12 @@ def _extract_formats(self, video_id, station, is_onair, ft, cursor, auth_token,
return formats return formats
def _extract_performers(self, prog):
performers = traverse_obj(prog, (
'pfm/text()', ..., {lambda x: re.split(r'[//、 ,]', x)}, ..., {str.strip}))
# TODO: change 'artist' fields to 'artists' and return traversal list instead of str
return ', '.join(performers) or None
class RadikoIE(RadikoBaseIE): class RadikoIE(RadikoBaseIE):
_VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)'
@ -186,10 +194,12 @@ def _real_extract(self, url):
return { return {
'id': video_id, 'id': video_id,
'title': try_call(lambda: prog.find('title').text), 'title': try_call(lambda: prog.find('title').text),
'artist': self._extract_performers(prog),
'description': clean_html(try_call(lambda: prog.find('info').text)), 'description': clean_html(try_call(lambda: prog.find('info').text)),
'uploader': try_call(lambda: station_program.find('.//name').text), 'uploader': try_call(lambda: station_program.find('.//name').text),
'uploader_id': station, 'uploader_id': station,
'timestamp': vid_int, 'timestamp': vid_int,
'duration': try_call(lambda: unified_timestamp(radio_end, False) - unified_timestamp(radio_begin, False)),
'is_live': True, 'is_live': True,
'formats': self._extract_formats( 'formats': self._extract_formats(
video_id=video_id, station=station, is_onair=False, video_id=video_id, station=station, is_onair=False,
@ -243,6 +253,7 @@ def _real_extract(self, url):
return { return {
'id': station, 'id': station,
'title': title, 'title': title,
'artist': self._extract_performers(prog),
'description': description, 'description': description,
'uploader': station_name, 'uploader': station_name,
'uploader_id': station, 'uploader_id': station,