1
1
mirror of https://github.com/ytdl-org/youtube-dl synced 2025-01-11 20:56:11 +01:00

[jamendo] Improve

This commit is contained in:
Sergey M․ 2016-10-25 02:46:48 +07:00
parent b17422753f
commit d3b6b3b95b
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
2 changed files with 54 additions and 61 deletions

View File

@ -408,7 +408,10 @@ from .ivi import (
from .ivideon import IvideonIE from .ivideon import IvideonIE
from .iwara import IwaraIE from .iwara import IwaraIE
from .izlesene import IzleseneIE from .izlesene import IzleseneIE
from .jamendo import JamendoIE, JamendoAlbumIE from .jamendo import (
JamendoIE,
JamendoAlbumIE,
)
from .jeuxvideo import JeuxVideoIE from .jeuxvideo import JeuxVideoIE
from .jove import JoveIE from .jove import JoveIE
from .jwplatform import JWPlatformIE from .jwplatform import JWPlatformIE

View File

@ -2,17 +2,13 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
from collections import namedtuple
from ..compat import compat_urlparse from ..compat import compat_urlparse
from .common import InfoExtractor from .common import InfoExtractor
FormatData = namedtuple('FormatData', [
'format_id', 'sub_domain', 'ext', 'quality'])
class JamendoIE(InfoExtractor): class JamendoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/track/(?P<id>[0-9]+)/(?P<display_id>[\w-]+)' _VALID_URL = r'https?://(?:www\.)?jamendo\.com/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)'
_TEST = { _TEST = {
'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i', 'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i',
'md5': '6e9e82ed6db98678f171c25a8ed09ffd', 'md5': '6e9e82ed6db98678f171c25a8ed09ffd',
@ -26,36 +22,31 @@ class JamendoIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
url_data = self._VALID_URL_RE.match(url) mobj = self._VALID_URL_RE.match(url)
track_id = url_data.group('id') track_id = mobj.group('id')
display_id = url_data.group('display_id') display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
title = self._html_search_meta('name', webpage, 'title')
formats = [{
'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
% (sub_domain, track_id, format_id),
'format_id': format_id,
'ext': ext,
'quality': quality,
} for quality, (format_id, sub_domain, ext) in enumerate((
('mp31', 'mp3l', 'mp3'),
('mp32', 'mp3d', 'mp3'),
('ogg1', 'ogg', 'ogg'),
('flac', 'flac', 'flac'),
))]
self._sort_formats(formats)
thumbnail = self._html_search_meta( thumbnail = self._html_search_meta(
'image', webpage, 'thumbnail', fatal=False) 'image', webpage, 'thumbnail', fatal=False)
title = self._html_search_meta('name', webpage, 'title')
url_template = 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
format_data = [
FormatData(
format_id='mp31', sub_domain='mp3l', ext='mp3', quality=0),
FormatData(
format_id='mp32', sub_domain='mp3d', ext='mp3', quality=1),
FormatData(
format_id='ogg1', sub_domain='ogg', ext='ogg', quality=2),
FormatData(
format_id='flac', sub_domain='flac', ext='flac', quality=3),
]
formats = [
{
'format_id': fd.format_id,
'url': url_template % (fd.sub_domain, track_id, fd.format_id),
'ext': fd.ext,
'quality': fd.quality
}
for fd in format_data
]
self._check_formats(formats, video_id=display_id)
return { return {
'id': track_id, 'id': track_id,
'display_id': display_id, 'display_id': display_id,
@ -73,45 +64,44 @@ class JamendoAlbumIE(InfoExtractor):
'id': '121486', 'id': '121486',
'title': 'Duck On Cover' 'title': 'Duck On Cover'
}, },
'playlist_mincount': 2, 'playlist': [{
'playlist': [ 'md5': 'e1a2fcb42bda30dfac990212924149a8',
{ 'info_dict': {
'md5': 'e1a2fcb42bda30dfac990212924149a8', 'id': '1032333',
'info_dict': { 'ext': 'flac',
'id': '1032333', 'title': 'Warmachine'
'ext': 'flac',
'title': 'Warmachine'
}
},
{
'md5': '1f358d7b2f98edfe90fd55dac0799d50',
'info_dict': {
'id': '1032330',
'ext': 'flac',
'title': 'Without Your Ghost'
}
} }
], }, {
'md5': '1f358d7b2f98edfe90fd55dac0799d50',
'info_dict': {
'id': '1032330',
'ext': 'flac',
'title': 'Without Your Ghost'
}
}],
'params': { 'params': {
'playlistend': 2 'playlistend': 2
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
url_data = self._VALID_URL_RE.match(url) mobj = self._VALID_URL_RE.match(url)
album_id = url_data.group('id') album_id = mobj.group('id')
webpage = self._download_webpage(url, url_data.group('display_id'))
webpage = self._download_webpage(url, mobj.group('display_id'))
title = self._html_search_meta('name', webpage, 'title') title = self._html_search_meta('name', webpage, 'title')
track_paths = re.findall(r'<a href="(.+)" class="link-wrap js-trackrow-albumpage-link" itemprop="url">', webpage)
entries = [ entries = [
self.url_result(compat_urlparse.urljoin(url, path), ie=JamendoIE.ie_key()) self.url_result(
for path in track_paths compat_urlparse.urljoin(url, m.group('path')),
ie=JamendoIE.ie_key(),
video_id=self._search_regex(
r'/track/(\d+)', m.group('path'),
'track id', default=None))
for m in re.finditer(
r'<a[^>]+href=(["\'])(?P<path>(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link',
webpage)
] ]
return {
'_type': 'playlist', return self.playlist_result(entries, album_id, title)
'id': album_id,
'title': title,
'entries': entries
}