2015-01-04 13:33:26 +01:00
import re
from . common import InfoExtractor
2015-01-05 18:13:19 +01:00
from . . utils import parse_duration
2015-01-04 13:33:26 +01:00
class RadioBremenIE ( InfoExtractor ) :
2015-01-05 18:13:19 +01:00
_VALID_URL = r ' http?://(?:www \ .)?radiobremen \ .de/mediathek/(?:index \ .html)? \ ?id=(?P<id>[0-9]+) '
2015-01-04 13:33:26 +01:00
IE_NAME = ' radiobremen '
_TEST = {
2016-08-19 23:12:30 +08:00
' url ' : ' http://www.radiobremen.de/mediathek/?id=141876 ' ,
2015-01-04 13:33:26 +01:00
' info_dict ' : {
2016-08-19 23:12:30 +08:00
' id ' : ' 141876 ' ,
2015-01-04 13:33:26 +01:00
' ext ' : ' mp4 ' ,
2016-08-19 23:12:30 +08:00
' duration ' : 178 ,
2015-01-04 13:33:26 +01:00
' width ' : 512 ,
2016-08-19 23:12:30 +08:00
' title ' : ' Druck auf Patrick Öztürk ' ,
2017-01-02 20:08:07 +08:00
' thumbnail ' : r ' re:https?://.* \ .jpg$ ' ,
2016-08-19 23:12:30 +08:00
' description ' : ' Gegen den SPD-Bürgerschaftsabgeordneten Patrick Öztürk wird wegen Beihilfe zum gewerbsmäßigen Betrug ermittelt. Am Donnerstagabend sollte er dem Vorstand des SPD-Unterbezirks Bremerhaven dazu Rede und Antwort stehen. ' ,
2015-01-04 13:33:26 +01:00
} ,
}
def _real_extract ( self , url ) :
2015-01-05 18:13:19 +01:00
video_id = self . _match_id ( url )
2015-01-04 13:33:26 +01:00
2016-02-14 15:37:17 +06:00
meta_url = ' http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id= %s ' % video_id
2015-01-05 18:17:03 +01:00
meta_doc = self . _download_webpage (
meta_url , video_id , ' Downloading metadata ' )
title = self . _html_search_regex (
2016-02-14 15:37:17 +06:00
r ' <h1.*>(?P<title>.+)</h1> ' , meta_doc , ' title ' )
2015-01-05 18:17:03 +01:00
description = self . _html_search_regex (
2016-02-14 15:37:17 +06:00
r ' <p>(?P<description>.*)</p> ' , meta_doc , ' description ' , fatal = False )
2015-01-05 18:17:03 +01:00
duration = parse_duration ( self . _html_search_regex (
2016-02-14 15:37:17 +06:00
r ' Länge:</td> \ s+<td>(?P<duration>[0-9]+:[0-9]+)</td> ' ,
meta_doc , ' duration ' , fatal = False ) )
2015-01-05 18:17:03 +01:00
page_doc = self . _download_webpage (
url , video_id , ' Downloading video information ' )
mobj = re . search (
r " ardformatplayerclassic \ ( \ ' playerbereich \ ' , \ ' (?P<width>[0-9]+) \ ' , \ ' .* \ ' , \ ' (?P<video_id>[0-9]+) \ ' , \ ' (?P<secret>[0-9]+) \ ' , \ ' (?P<thumbnail>.+) \ ' , \ ' \ ' \ ) " ,
page_doc )
2015-01-05 18:13:19 +01:00
video_url = (
" http://dl-ondemand.radiobremen.de/mediabase/ %s / %s _ %s _ %s .mp4 " %
( video_id , video_id , mobj . group ( " secret " ) , mobj . group ( ' width ' ) ) )
2015-01-04 13:33:26 +01:00
2015-01-05 18:13:19 +01:00
formats = [ {
' url ' : video_url ,
' ext ' : ' mp4 ' ,
2016-02-14 15:37:17 +06:00
' width ' : int ( mobj . group ( ' width ' ) ) ,
2015-01-05 18:13:19 +01:00
} ]
2015-01-04 13:33:26 +01:00
return {
' id ' : video_id ,
' title ' : title ,
' description ' : description ,
' duration ' : duration ,
2015-01-05 18:13:19 +01:00
' formats ' : formats ,
' thumbnail ' : mobj . group ( ' thumbnail ' ) ,
2015-01-04 13:33:26 +01:00
}