2015-02-23 16:10:08 +13:00
import re
from . common import InfoExtractor
2015-02-26 01:25:00 +01:00
from . . utils import (
int_or_none ,
parse_duration ,
parse_iso8601 ,
)
2015-02-23 16:10:08 +13:00
class AirMozillaIE ( InfoExtractor ) :
_VALID_URL = r ' https?://air \ .mozilla \ .org/(?P<id>[0-9a-z-]+)/? '
_TEST = {
' url ' : ' https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/ ' ,
2017-04-08 15:39:58 +08:00
' md5 ' : ' 8d02f53ee39cf006009180e21df1f3ba ' ,
2015-02-23 16:10:08 +13:00
' info_dict ' : {
' id ' : ' 6x4q2w ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Privacy Lab - a meetup for privacy minded people in San Francisco ' ,
2017-04-08 15:39:58 +08:00
' thumbnail ' : r ' re:https?://.*/poster \ .jpg ' ,
2015-02-23 16:10:08 +13:00
' description ' : ' Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem... ' ,
' timestamp ' : 1422487800 ,
' upload_date ' : ' 20150128 ' ,
' location ' : ' SFO Commons ' ,
' duration ' : 3780 ,
' view_count ' : int ,
2015-09-06 04:45:13 +01:00
' categories ' : [ ' Main ' , ' Privacy ' ] ,
2015-02-23 16:10:08 +13:00
}
}
def _real_extract ( self , url ) :
display_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , display_id )
2017-04-08 15:39:58 +08:00
video_id = self . _html_search_regex ( r ' //vid \ .ly/(.*?)/embed ' , webpage , ' id ' )
2015-02-23 16:10:08 +13:00
embed_script = self . _download_webpage ( ' https://vid.ly/ {0} /embed ' . format ( video_id ) , video_id )
2017-04-08 15:39:58 +08:00
jwconfig = self . _parse_json ( self . _search_regex (
r ' initCallback \ ((.*) \ ); ' , embed_script , ' metadata ' ) , video_id ) [ ' config ' ]
2015-02-23 16:10:08 +13:00
2017-04-08 15:39:58 +08:00
info_dict = self . _parse_jwplayer_data ( jwconfig , video_id )
2015-02-26 01:25:00 +01:00
view_count = int_or_none ( self . _html_search_regex (
r ' Views since archived: ([0-9]+) ' ,
webpage , ' view count ' , fatal = False ) )
timestamp = parse_iso8601 ( self . _html_search_regex (
r ' <time datetime= " (.*?) " ' , webpage , ' timestamp ' , fatal = False ) )
duration = parse_duration ( self . _search_regex (
r ' Duration: \ s*( \ d+ \ s*hours? \ s* \ d+ \ s*minutes?) ' ,
webpage , ' duration ' , fatal = False ) )
2015-02-23 16:10:08 +13:00
2017-04-08 15:39:58 +08:00
info_dict . update ( {
2015-02-23 16:10:08 +13:00
' id ' : video_id ,
' title ' : self . _og_search_title ( webpage ) ,
' url ' : self . _og_search_url ( webpage ) ,
' display_id ' : display_id ,
' description ' : self . _og_search_description ( webpage ) ,
2015-02-26 01:25:00 +01:00
' timestamp ' : timestamp ,
2015-02-23 16:10:08 +13:00
' location ' : self . _html_search_regex ( r ' Location: (.*) ' , webpage , ' location ' , default = None ) ,
2015-02-26 01:25:00 +01:00
' duration ' : duration ,
' view_count ' : view_count ,
2015-02-23 16:10:08 +13:00
' categories ' : re . findall ( r ' <a href= " .*? " class= " channel " >(.*?)</a> ' , webpage ) ,
2017-04-08 15:39:58 +08:00
} )
return info_dict