[sendtonews] improve info extraction

This commit is contained in:
Remita Amine 2017-01-06 11:23:43 +01:00
parent a814da3f62
commit e5dfdc8164

View File

@ -8,6 +8,9 @@
float_or_none, float_or_none,
parse_iso8601, parse_iso8601,
update_url_query, update_url_query,
int_or_none,
determine_protocol,
unescapeHTML,
) )
@ -20,18 +23,18 @@ class SendtoNewsIE(JWPlatformBaseIE):
'info_dict': { 'info_dict': {
'id': 'GxfCe0Zo7D-175909-5588' 'id': 'GxfCe0Zo7D-175909-5588'
}, },
'playlist_count': 9, 'playlist_count': 8,
# test the first video only to prevent lengthy tests # test the first video only to prevent lengthy tests
'playlist': [{ 'playlist': [{
'info_dict': { 'info_dict': {
'id': '198180', 'id': '240385',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Recap: CLE 5, LAA 4', 'title': 'Indians introduce Encarnacion',
'description': '8/14/16: Naquin, Almonte lead Indians in 5-4 win', 'description': 'Indians president of baseball operations Chris Antonetti and Edwin Encarnacion discuss the slugger\'s three-year contract with Cleveland',
'duration': 57.343, 'duration': 137.898,
'thumbnail': r're:https?://.*\.jpg$', 'thumbnail': r're:https?://.*\.jpg$',
'upload_date': '20160815', 'upload_date': '20170105',
'timestamp': 1471221961, 'timestamp': 1483649762,
}, },
}], }],
'params': { 'params': {
@ -64,7 +67,20 @@ def _real_extract(self, url):
for video in playlist_data['playlistData'][0]: for video in playlist_data['playlistData'][0]:
info_dict = self._parse_jwplayer_data( info_dict = self._parse_jwplayer_data(
video['jwconfiguration'], video['jwconfiguration'],
require_title=False, rtmp_params={'no_resume': True}) require_title=False, m3u8_id='hls', rtmp_params={'no_resume': True})
for f in info_dict['formats']:
if f.get('tbr'):
continue
tbr = int_or_none(self._search_regex(
r'/(\d+)k/', f['url'], 'bitrate', default=None))
if not tbr:
continue
f.update({
'format_id': '%s-%d' % (determine_protocol(f), tbr),
'tbr': tbr,
})
self._sort_formats(info_dict['formats'], ('tbr', 'height', 'width', 'format_id'))
thumbnails = [] thumbnails = []
if video.get('thumbnailUrl'): if video.get('thumbnailUrl'):
@ -78,8 +94,8 @@ def _real_extract(self, url):
'url': video['smThumbnailUrl'], 'url': video['smThumbnailUrl'],
}) })
info_dict.update({ info_dict.update({
'title': video['S_headLine'], 'title': video['S_headLine'].strip(),
'description': video.get('S_fullStory'), 'description': unescapeHTML(video.get('S_fullStory')),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'duration': float_or_none(video.get('SM_length')), 'duration': float_or_none(video.get('SM_length')),
'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '), 'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '),