[extractor/generic] Support relative URIs in _parse_xspf

<location> can have relative URIs, not just absolute.
This commit is contained in:
Ricardo Constantino 2018-03-07 21:31:53 +00:00 committed by Sergey M․
parent 178ee88319
commit 96b8b9abae
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
4 changed files with 82 additions and 4 deletions

View File

@ -694,6 +694,48 @@ def test_parse_f4m_formats(self):
self.ie._sort_formats(formats) self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None) expect_value(self, formats, expected_formats, None)
def test_parse_xspf(self):
_TEST_CASES = [
(
'foo_xspf',
'https://example.org/src/',
[{
'description': 'Visit http://bigbrother404.bandcamp.com',
'duration': 202.416,
'formats': [{'url': 'https://example.org/src/cd1/track%201.mp3'}],
'id': 'foo_xspf',
'title': 'Pandemonium'
},
{
'description': 'Visit http://bigbrother404.bandcamp.com',
'duration': 255.857,
'formats': [{'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3'}],
'id': 'foo_xspf',
'title': 'Final Cartridge (Nichico Twelve Remix)'
},
{
'description': 'Visit http://bigbrother404.bandcamp.com',
'duration': 287.915,
'formats': [
{'url': 'https://example.org/src/track3.mp3'},
{'url': 'https://example.com/track3.mp3'}
],
'id': 'foo_xspf',
'title': 'Rebuilding Nightingale'
}]
),
]
for xspf_file, xspf_base_url, expected_entries in _TEST_CASES:
with io.open('./test/testdata/xspf/%s.xspf' % xspf_file,
mode='r', encoding='utf-8') as f:
entries = self.ie._parse_xspf(
compat_etree_fromstring(f.read().encode('utf-8')),
xspf_file, xspf_base_url)
expect_value(self, entries, expected_entries, None)
for i in range(len(entries)):
expect_dict(self, entries[i], expected_entries[i])
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

34
test/testdata/xspf/foo_xspf.xspf vendored Normal file
View File

@ -0,0 +1,34 @@
<?xml version="1.0" encoding="UTF-8"?>
<playlist version="1" xmlns="http://xspf.org/ns/0/">
<date>2018-03-09T18:01:43Z</date>
<trackList>
<track>
<location>cd1/track%201.mp3</location>
<title>Pandemonium</title>
<creator>Foilverb</creator>
<annotation>Visit http://bigbrother404.bandcamp.com</annotation>
<album>Pandemonium EP</album>
<trackNum>1</trackNum>
<duration>202416</duration>
</track>
<track>
<location>../%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3</location>
<title>Final Cartridge (Nichico Twelve Remix)</title>
<annotation>Visit http://bigbrother404.bandcamp.com</annotation>
<creator>Foilverb</creator>
<album>Pandemonium EP</album>
<trackNum>2</trackNum>
<duration>255857</duration>
</track>
<track>
<location>track3.mp3</location>
<location>https://example.com/track3.mp3</location>
<title>Rebuilding Nightingale</title>
<annotation>Visit http://bigbrother404.bandcamp.com</annotation>
<creator>Foilverb</creator>
<album>Pandemonium EP</album>
<trackNum>3</trackNum>
<duration>287915</duration>
</track>
</trackList>
</playlist>

View File

@ -1700,9 +1700,9 @@ def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True):
'Unable to download xspf manifest', fatal=fatal) 'Unable to download xspf manifest', fatal=fatal)
if xspf is False: if xspf is False:
return [] return []
return self._parse_xspf(xspf, playlist_id) return self._parse_xspf(xspf, playlist_id, base_url(playlist_url))
def _parse_xspf(self, playlist, playlist_id): def _parse_xspf(self, playlist, playlist_id, playlist_base_url=''):
NS_MAP = { NS_MAP = {
'xspf': 'http://xspf.org/ns/0/', 'xspf': 'http://xspf.org/ns/0/',
's1': 'http://static.streamone.nl/player/ns/0', 's1': 'http://static.streamone.nl/player/ns/0',
@ -1720,7 +1720,7 @@ def _parse_xspf(self, playlist, playlist_id):
xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000) xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
formats = [{ formats = [{
'url': location.text, 'url': urljoin(playlist_base_url, location.text),
'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)), 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))), 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))), 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),

View File

@ -2232,7 +2232,9 @@ def _real_extract(self, url):
self._sort_formats(smil['formats']) self._sort_formats(smil['formats'])
return smil return smil
elif doc.tag == '{http://xspf.org/ns/0/}playlist': elif doc.tag == '{http://xspf.org/ns/0/}playlist':
return self.playlist_result(self._parse_xspf(doc, video_id), video_id) return self.playlist_result(
self._parse_xspf(doc, video_id, compat_str(full_response.geturl())),
video_id)
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
info_dict['formats'] = self._parse_mpd_formats( info_dict['formats'] = self._parse_mpd_formats(
doc, doc,