[extractor/common] Add validation for JSON-LD URLs

This commit is contained in:
Sergey M․ 2018-10-29 00:19:08 +07:00
parent 4c237ab787
commit bebef10909
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -69,6 +69,7 @@
update_url_query,
urljoin,
url_basename,
url_or_none,
xpath_element,
xpath_text,
xpath_with_ns,
@ -1213,10 +1214,10 @@ def extract_interaction_statistic(e):
def extract_video_object(e):
assert e['@type'] == 'VideoObject'
info.update({
'url': e.get('contentUrl'),
'url': url_or_none(e.get('contentUrl')),
'title': unescapeHTML(e.get('name')),
'description': unescapeHTML(e.get('description')),
'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
'duration': parse_duration(e.get('duration')),
'timestamp': unified_timestamp(e.get('uploadDate')),
'filesize': float_or_none(e.get('contentSize')),