Merge branch 'master' of github.com:rg3/youtube-dl

Conflicts:
	youtube_dl/extractor/mtv.py
This commit is contained in:
Philipp Hagemeister 2014-01-22 00:21:27 +01:00
commit 6562df768d
4 changed files with 65 additions and 13 deletions

View File

@ -119,7 +119,10 @@
from .mixcloud import MixcloudIE from .mixcloud import MixcloudIE
from .mpora import MporaIE from .mpora import MporaIE
from .mofosex import MofosexIE from .mofosex import MofosexIE
from .mtv import MTVIE from .mtv import (
MTVIE,
MTVIggyIE,
)
from .muzu import MuzuTVIE from .muzu import MuzuTVIE
from .myspace import MySpaceIE from .myspace import MySpaceIE
from .myspass import MySpassIE from .myspass import MySpassIE
@ -171,6 +174,7 @@
from .space import SpaceIE from .space import SpaceIE
from .spankwire import SpankwireIE from .spankwire import SpankwireIE
from .spiegel import SpiegelIE from .spiegel import SpiegelIE
from .spike import SpikeIE
from .stanfordoc import StanfordOpenClassroomIE from .stanfordoc import StanfordOpenClassroomIE
from .statigram import StatigramIE from .statigram import StatigramIE
from .steam import SteamIE from .steam import SteamIE

View File

@ -90,9 +90,12 @@ def _build_brighcove_url(cls, object_str):
object_doc = xml.etree.ElementTree.fromstring(object_str) object_doc = xml.etree.ElementTree.fromstring(object_str)
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars') fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
flashvars = dict( if fv_el is not None:
(k, v[0]) flashvars = dict(
for k, v in compat_parse_qs(fv_el.attrib['value']).items()) (k, v[0])
for k, v in compat_parse_qs(fv_el.attrib['value']).items())
else:
flashvars = {}
def find_param(name): def find_param(name):
if name in flashvars: if name in flashvars:
@ -131,7 +134,7 @@ def _extract_brightcove_url(cls, webpage):
m_brightcove = re.search( m_brightcove = re.search(
r'''(?sx)<object r'''(?sx)<object
(?: (?:
:[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 | [^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 |
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/ [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
).+?</object>''', ).+?</object>''',
webpage) webpage)
@ -230,6 +233,6 @@ def _extract_video_info(self, video_info):
else: else:
return ad_info return ad_info
if 'url' not in info: if 'url' not in info and not info.get('formats'):
raise ExtractorError('Unable to extract video url for %s' % info['id']) raise ExtractorError('Unable to extract video url for %s' % info['id'])
return info return info

View File

@ -1,7 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -9,6 +8,8 @@
ExtractorError, ExtractorError,
find_xpath_attr, find_xpath_attr,
fix_xml_ampersands, fix_xml_ampersands,
url_basename,
RegexNotFoundError,
) )
@ -38,10 +39,9 @@ def _get_thumbnail_url(self, uri, itemdoc):
else: else:
return thumb_node.attrib['url'] return thumb_node.attrib['url']
def _extract_video_formats(self, metadataXml): def _extract_video_formats(self, mdoc):
if '/error_country_block.swf' in metadataXml: if re.match(r'.*/error_country_block\.swf$', mdoc.find('.//src').text) is not None:
raise ExtractorError('This video is not available from your country.', expected=True) raise ExtractorError('This video is not available from your country.', expected=True)
mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8'))
formats = [] formats = []
for rendition in mdoc.findall('.//rendition'): for rendition in mdoc.findall('.//rendition'):
@ -67,8 +67,9 @@ def _get_video_info(self, itemdoc):
mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', mediagen_url) mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', mediagen_url)
if 'acceptMethods' not in mediagen_url: if 'acceptMethods' not in mediagen_url:
mediagen_url += '&acceptMethods=fms' mediagen_url += '&acceptMethods=fms'
mediagen_page = self._download_webpage(mediagen_url, video_id,
'Downloading video urls') mediagen_doc = self._download_xml(mediagen_url, video_id,
'Downloading video urls')
description_node = itemdoc.find('description') description_node = itemdoc.find('description')
if description_node is not None: if description_node is not None:
@ -91,7 +92,7 @@ def _get_video_info(self, itemdoc):
return { return {
'title': title, 'title': title,
'formats': self._extract_video_formats(mediagen_page), 'formats': self._extract_video_formats(mediagen_doc),
'id': video_id, 'id': video_id,
'thumbnail': self._get_thumbnail_url(uri, itemdoc), 'thumbnail': self._get_thumbnail_url(uri, itemdoc),
'description': description, 'description': description,
@ -106,6 +107,17 @@ def _get_videos_info(self, uri):
'Downloading info', transform_source=fix_xml_ampersands) 'Downloading info', transform_source=fix_xml_ampersands)
return [self._get_video_info(item) for item in idoc.findall('.//item')] return [self._get_video_info(item) for item in idoc.findall('.//item')]
def _real_extract(self, url):
title = url_basename(url)
webpage = self._download_webpage(url, title)
try:
# the url is in the format http://media.mtvnservices.com/fb/{mgid}.swf
fb_url = self._og_search_video_url(webpage)
mgid = url_basename(fb_url).rpartition('.')[0]
except RegexNotFoundError:
mgid = self._search_regex(r'data-mgid="(.*?)"', webpage, u'mgid')
return self._get_videos_info(mgid)
class MTVIE(MTVServicesInfoExtractor): class MTVIE(MTVServicesInfoExtractor):
_VALID_URL = r'''(?x)^https?:// _VALID_URL = r'''(?x)^https?://
@ -158,3 +170,17 @@ def _real_extract(self, url):
uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, 'uri') uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, 'uri')
return self._get_videos_info(uri) return self._get_videos_info(uri)
class MTVIggyIE(MTVServicesInfoExtractor):
IE_NAME = 'mtviggy.com'
_VALID_URL = r'https?://www\.mtviggy\.com/videos/.+'
_TEST = {
'url': 'http://www.mtviggy.com/videos/arcade-fire-behind-the-scenes-at-the-biggest-music-experiment-yet/',
'info_dict': {
'id': '984696',
'ext': 'mp4',
'title': 'Short',
}
}
_FEED_URL = 'http://all.mtvworldverticals.com/feed-xml/'

View File

@ -0,0 +1,19 @@
from __future__ import unicode_literals
from .mtv import MTVServicesInfoExtractor
class SpikeIE(MTVServicesInfoExtractor):
_VALID_URL = r'https?://www\.spike\.com/(video-clips|episodes)/.+'
_TEST = {
'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle',
'md5': '1a9265f32b0c375793d6c4ce45255256',
'info_dict': {
'id': 'b9c8221a-4e50-479a-b86d-3333323e38ba',
'ext': 'mp4',
'title': 'Can Allen Ride A Hundred Year-Old Motorcycle?',
'description': 'md5:fbed7e82ed5fad493615b3094a9499cb',
},
}
_FEED_URL = 'http://www.spike.com/feeds/mrss/'