From 663d9442a05a647d8a73b286e1e75151455a1a85 Mon Sep 17 00:00:00 2001 From: pj47x Date: Mon, 2 Sep 2024 20:58:17 +1000 Subject: [PATCH] [ie/manyvids] Fix ManyVids extractor after website update --- yt_dlp/extractor/manyvids.py | 136 ++++++++++++++--------------------- 1 file changed, 52 insertions(+), 84 deletions(-) diff --git a/yt_dlp/extractor/manyvids.py b/yt_dlp/extractor/manyvids.py index 8caa8f87f..661e5e8cc 100644 --- a/yt_dlp/extractor/manyvids.py +++ b/yt_dlp/extractor/manyvids.py @@ -1,21 +1,20 @@ -import re - from .common import InfoExtractor +from .. import traverse_obj from ..utils import ( determine_ext, - extract_attributes, int_or_none, - str_to_int, + parse_count, + parse_duration, + parse_iso8601, url_or_none, - urlencode_postdata, ) class ManyVidsIE(InfoExtractor): - _WORKING = False _VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P\d+)' _TESTS = [{ - # preview video + # Dead preview video + 'skip': True, 'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/', 'md5': '03f11bb21c52dd12a05be21a5c7dcc97', 'info_dict': { @@ -26,6 +25,24 @@ class ManyVidsIE(InfoExtractor): 'view_count': int, 'like_count': int, }, + }, { + # preview video + 'url': 'https://www.manyvids.com/Video/530341/mv-tips-tricks', + 'md5': '738dc723f7735ee9602f7ea352a6d058', + 'info_dict': { + 'id': '530341', + 'ext': 'mp4', + 'title': 'MV Tips & Tricks (Preview)', + 'description': 'md5:c3bae98c0f9453237c28b0f8795d9f83', + 'thumbnail': 'https://cdn5.manyvids.com/php_uploads/video_images/DestinyDiaz/thumbs/thumb_Hs26ATOO7fcZaI9sx3XT_screenshot_001.jpg', + 'uploader': 'DestinyDiaz', + 'view_count': int, + 'like_count': int, + 'release_timestamp': 1508419904, + 'tags': ['AdultSchool', 'BBW', 'SFW', 'TeacherFetish'], + 'release_date': '20171019', + 'duration': 3167.0, + }, }, { # full video 'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/', @@ -35,79 +52,41 @@ class ManyVidsIE(InfoExtractor): 'ext': 'mp4', 'title': 'MY FACE REVEAL', 'description': 'md5:ec5901d41808b3746fed90face161612', + 'thumbnail': 'https://ods.manyvids.com/1001061960/3aa5397f2a723ec4597e344df66ab845/screenshots/thumbs/custom_1_180_5be09c1dcce03.jpg', 'uploader': 'Sarah Calanthe', 'view_count': int, 'like_count': int, + 'release_date': '20181110', + 'tags': ['EyeContact', 'Interviews', 'MaskFetish', 'MouthFetish', 'Redhead'], + 'release_timestamp': 1541851200, + 'duration': 224.0, }, }] def _real_extract(self, url): video_id = self._match_id(url) - real_url = f'https://www.manyvids.com/video/{video_id}/gtm.js' - try: - webpage = self._download_webpage(real_url, video_id) - except Exception: - # probably useless fallback - webpage = self._download_webpage(url, video_id) + info = traverse_obj( + self._download_json(f'https://www.manyvids.com/bff/store/video/{video_id}', video_id), + ('data', {dict})) or {} - info = self._search_regex( - r'''(]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''', - webpage, 'meta details', default='') - info = extract_attributes(info) - - player = self._search_regex( - r'''(]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''', - webpage, 'player details', default='') - player = extract_attributes(player) + video_urls = self._download_json(f'https://www.manyvids.com/bff/store/video/{video_id}/private', video_id)[ + 'data'] video_urls_and_ids = ( - (info.get('data-meta-video'), 'video'), - (player.get('data-video-transcoded'), 'transcoded'), - (player.get('data-video-filepath'), 'filepath'), - (self._og_search_video_url(webpage, secure=False, default=None), 'og_video'), + (traverse_obj(video_urls, ('teaser', 'filepath')), 'preview'), + (video_urls.get('transcodedFilepath'), 'transcoded'), + (video_urls.get('filepath'), 'filepath'), ) - def txt_or_none(s, default=None): - return (s.strip() or default) if isinstance(s, str) else default + title = traverse_obj(info, 'title') - uploader = txt_or_none(info.get('data-meta-author')) - - def mung_title(s): - if uploader: - s = re.sub(rf'^\s*{re.escape(uploader)}\s+[|-]', '', s) - return txt_or_none(s) - - title = ( - mung_title(info.get('data-meta-title')) - or self._html_search_regex( - (r']+class=["\']item-title[^>]+>([^<]+)', - r']+class=["\']h2 m-0["\'][^>]*>([^<]+)'), - webpage, 'title', default=None) - or self._html_search_meta( - 'twitter:title', webpage, 'title', fatal=True)) - - title = re.sub(r'\s*[|-]\s+ManyVids\s*$', '', title) or title - - if any(p in webpage for p in ('preview_videos', '_preview.mp4')): + # If the video formats JSON only contains a teaser object, then it is a preview + if video_urls.get('teaser') and not video_urls.get('filepath'): title += ' (Preview)' - - mv_token = self._search_regex( - r'data-mvtoken=(["\'])(?P(?:(?!\1).)+)\1', webpage, - 'mv token', default=None, group='value') - - if mv_token: - # Sets some cookies - self._download_webpage( - 'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php', - video_id, note='Setting format cookies', fatal=False, - data=urlencode_postdata({ - 'mvtoken': mv_token, - 'vid': video_id, - }), headers={ - 'Referer': url, - 'X-Requested-With': 'XMLHttpRequest', - }) + video_id += '-preview' + self.report_warning( + f'Only extracting preview. Video may be paid or subscription only. {self._login_hint()}') formats = [] for v_url, fmt in video_urls_and_ids: @@ -130,33 +109,22 @@ class ManyVidsIE(InfoExtractor): if f.get('height') is None: f['height'] = int_or_none( self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None)) - if '/preview/' in f['url']: - f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview'))) + if 'preview' in f['format_id']: f['preference'] = -10 if 'transcoded' in f['format_id']: f['preference'] = f.get('preference', -1) - 1 - def get_likes(): - likes = self._search_regex( - rf'''(]*\bdata-id\s*=\s*(['"]){video_id}\2[^>]*>)''', - webpage, 'likes', default='') - likes = extract_attributes(likes) - return int_or_none(likes.get('data-likes')) - - def get_views(): - return str_to_int(self._html_search_regex( - r'''(?s)]*\bclass\s*=["']views-wrapper\b[^>]+>.+?]+>\s*(\d[\d,.]*)\s*''', - webpage, 'view count', default=None)) - return { 'id': video_id, 'title': title, 'formats': formats, - 'description': txt_or_none(info.get('data-meta-description')), - 'uploader': txt_or_none(info.get('data-meta-author')), + 'description': (traverse_obj(info, 'description')), + 'uploader': (traverse_obj(info, ('model', 'displayName'))), 'thumbnail': ( - url_or_none(info.get('data-meta-image')) - or url_or_none(player.get('data-video-screenshot'))), - 'view_count': get_views(), - 'like_count': get_likes(), + url_or_none(traverse_obj(info, 'screenshot')) or url_or_none(traverse_obj(info, 'thumbnail'))), + 'view_count': (parse_count(traverse_obj(info, 'views'))), + 'like_count': (parse_count(traverse_obj(info, 'likes'))), + 'release_timestamp': (parse_iso8601(traverse_obj(info, 'launchDate'))), + 'duration': (parse_duration(traverse_obj(info, 'videoDuration'))), + 'tags': [t.get('label') for t in traverse_obj(info, 'tagList')], }