Merge branch 'age_limit'

This commit is contained in:
Philipp Hagemeister 2013-10-06 16:23:18 +02:00
commit 2a69c6b879
7 changed files with 81 additions and 3 deletions

View File

@ -0,0 +1,53 @@
#!/usr/bin/env python
import sys
import unittest
# Allow direct execution
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl import YoutubeDL
from helper import try_rm
def _download_restricted(url, filename, age):
""" Returns true iff the file has been downloaded """
params = {
'age_limit': age,
'skip_download': True,
'writeinfojson': True,
"outtmpl": "%(id)s.%(ext)s",
}
ydl = YoutubeDL(params)
ydl.add_default_info_extractors()
json_filename = filename + '.info.json'
try_rm(json_filename)
ydl.download([url])
res = os.path.exists(json_filename)
try_rm(json_filename)
return res
class TestAgeRestriction(unittest.TestCase):
def _assert_restricted(self, url, filename, age, old_age=None):
self.assertTrue(_download_restricted(url, filename, old_age))
self.assertFalse(_download_restricted(url, filename, age))
def test_youtube(self):
self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10)
def test_youporn(self):
self._assert_restricted(
'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
'505835.mp4', 2, old_age=25)
def test_pornotube(self):
self._assert_restricted(
'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
'1689755.flv', 13)
if __name__ == '__main__':
unittest.main()

View File

@ -84,6 +84,8 @@ class YoutubeDL(object):
cachedir: Location of the cache files in the filesystem. cachedir: Location of the cache files in the filesystem.
None to disable filesystem cache. None to disable filesystem cache.
noplaylist: Download single video instead of a playlist if in doubt. noplaylist: Download single video instead of a playlist if in doubt.
age_limit: An integer representing the user's age in years.
Unsuitable videos for the given age are skipped.
The following parameters are not used by YoutubeDL itself, they are used by The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader: the FileDownloader:
@ -309,6 +311,10 @@ def _match_entry(self, info_dict):
dateRange = self.params.get('daterange', DateRange()) dateRange = self.params.get('daterange', DateRange())
if date not in dateRange: if date not in dateRange:
return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
age_limit = self.params.get('age_limit')
if age_limit is not None:
if age_limit < info_dict.get('age_limit', 0):
return u'Skipping "' + title + '" because it is age restricted'
return None return None
def extract_info(self, url, download=True, ie_key=None, extra_info={}): def extract_info(self, url, download=True, ie_key=None, extra_info={}):

View File

@ -188,6 +188,9 @@ def _hide_login_info(opts):
selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None) selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None) selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False) selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
selection.add_option('--age-limit', metavar='YEARS', dest='age_limit',
help='download only videos suitable for the given age',
default=None, type=int)
authentication.add_option('-u', '--username', authentication.add_option('-u', '--username',
@ -631,6 +634,7 @@ def _real_main(argv=None):
'daterange': date, 'daterange': date,
'cachedir': opts.cachedir, 'cachedir': opts.cachedir,
'youtube_print_sig_code': opts.youtube_print_sig_code, 'youtube_print_sig_code': opts.youtube_print_sig_code,
'age_limit': opts.age_limit,
}) })
if opts.verbose: if opts.verbose:

View File

@ -54,6 +54,7 @@ class InfoExtractor(object):
view_count: How many users have watched the video on the platform. view_count: How many users have watched the video on the platform.
urlhandle: [internal] The urlHandle to be used to download the file, urlhandle: [internal] The urlHandle to be used to download the file,
like returned by urllib.request.urlopen like returned by urllib.request.urlopen
age_limit: Age restriction for the video, as an integer (years)
formats: A list of dictionaries for each format available, it must formats: A list of dictionaries for each format available, it must
be ordered from worst to best quality. Potential fields: be ordered from worst to best quality. Potential fields:
* url Mandatory. The URL of the video file * url Mandatory. The URL of the video file
@ -318,6 +319,15 @@ def _og_search_video_url(self, html, name='video url', **kargs):
self._og_regex('video')], self._og_regex('video')],
html, name, **kargs) html, name, **kargs)
def _rta_search(self, html):
# See http://www.rtalabel.org/index.php?content=howtofaq#single
if re.search(r'(?ix)<meta\s+name="rating"\s+'
r' content="RTA-5042-1996-1400-1577-RTA"',
html):
return 18
return 0
class SearchInfoExtractor(InfoExtractor): class SearchInfoExtractor(InfoExtractor):
""" """
Base class for paged search queries extractors. Base class for paged search queries extractors.

View File

@ -38,6 +38,7 @@ def _real_extract(self, url):
VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by' VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False) upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False)
if upload_date: upload_date = unified_strdate(upload_date) if upload_date: upload_date = unified_strdate(upload_date)
age_limit = self._rta_search(webpage)
info = {'id': video_id, info = {'id': video_id,
'url': video_url, 'url': video_url,
@ -45,6 +46,7 @@ def _real_extract(self, url):
'upload_date': upload_date, 'upload_date': upload_date,
'title': video_title, 'title': video_title,
'ext': 'flv', 'ext': 'flv',
'format': 'flv'} 'format': 'flv',
'age_limit': age_limit}
return [info] return [info]

View File

@ -51,6 +51,7 @@ def _real_extract(self, url):
req = compat_urllib_request.Request(url) req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1') req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(req, video_id)
age_limit = self._rta_search(webpage)
# Get JSON parameters # Get JSON parameters
json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters') json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
@ -115,7 +116,8 @@ def _real_extract(self, url):
'ext': extension, 'ext': extension,
'format': format, 'format': format,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'description': video_description 'description': video_description,
'age_limit': age_limit,
}) })
if self._downloader.params.get('listformats', None): if self._downloader.params.get('listformats', None):

View File

@ -1495,7 +1495,8 @@ def _real_extract(self, url):
'description': video_description, 'description': video_description,
'player_url': player_url, 'player_url': player_url,
'subtitles': video_subtitles, 'subtitles': video_subtitles,
'duration': video_duration 'duration': video_duration,
'age_limit': 18 if age_gate else 0,
}) })
return results return results