From fcff2c5cd018c0ad3f0fd9736ce2cc477da80709 Mon Sep 17 00:00:00 2001 From: ringus1 Date: Wed, 10 Jan 2024 16:11:45 +0100 Subject: [PATCH 01/18] Adding potential fix --- yt_dlp/extractor/facebook.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index a07a0d344..87869fcfc 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -395,6 +395,7 @@ def _perform_login(self, username, password): try: login_results = self._download_webpage(request, None, note='Logging in', errnote='unable to fetch login page') + # TODO: Request couldn't be processed is returned always if re.search(r'', login_results) is not None: error = self._html_search_regex( r'(?s)]+class=(["\']).*?login_error_box.*?\1[^>]*>]*>.*?]*>(?P.+?)', @@ -465,6 +466,7 @@ def extract_metadata(webpage): description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text')) uploader_data = ( get_first(media, ('owner', {dict})) + or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name'])) or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name'])) or get_first(post, ('node', 'actors', ..., {dict})) or {}) @@ -555,7 +557,7 @@ def extract_relay_data(_filter): def extract_relay_prefetched_data(_filter): return traverse_obj(extract_relay_data(_filter), ( 'require', (None, (..., ..., ..., '__bbox', 'require')), - lambda _, v: 'RelayPrefetchedStreamCache' in v, ..., ..., + lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v if isinstance(key, str)), ..., ..., '__bbox', 'result', 'data', {dict}), get_all=False) or {} if not video_data: From 4f58aabe6722d19870618f8755087e93e6f8cd48 Mon Sep 17 00:00:00 2001 From: ringus1 Date: Thu, 11 Jan 2024 14:39:52 +0100 Subject: [PATCH 02/18] Add better login handling --- yt_dlp/extractor/facebook.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 87869fcfc..4fa0012a7 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -395,7 +395,9 @@ def _perform_login(self, username, password): try: login_results = self._download_webpage(request, None, note='Logging in', errnote='unable to fetch login page') - # TODO: Request couldn't be processed is returned always + if self._html_search_regex(r'(Your Request Couldn.+? be Processed)', login_results, "request error", default=None) is not None: + raise ExtractorError('Failed to perform login request. Report a bug.') + if re.search(r'', login_results) is not None: error = self._html_search_regex( r'(?s)]+class=(["\']).*?login_error_box.*?\1[^>]*>]*>.*?]*>(?P.+?)', @@ -432,6 +434,20 @@ def _extract_from_url(self, url, video_id): webpage = self._download_webpage( url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id) + if (self.get_param("username") and self.get_param("password")) or self.get_param("cookiefile"): + if 'We\'ve suspended your account' in webpage: + raise ExtractorError('Login account is suspended.', expected=True) + + sjs_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall( + r'data-sjs>({.*?ScheduledServerJS.*?})', webpage)] + userinfo = get_first(sjs_data, ('require', ..., ..., ..., "__bbox", "define", lambda _, v: 'CurrentUserInitialData' in v, lambda _, v: 'ACCOUNT_ID' in v)) + try: + user_id = int(userinfo['ACCOUNT_ID']) + except (TypeError, ValueError): + user_id = 0 + if user_id == 0: + raise ExtractorError('Failed to login with provided data.', expected=True) + def extract_metadata(webpage): post_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall( r'data-sjs>({.*?ScheduledServerJS.*?})', webpage)] From 6be151fed7d2fb93fce2c4cc53ad2ff13d92344c Mon Sep 17 00:00:00 2001 From: ringus1 Date: Thu, 11 Jan 2024 16:02:41 +0100 Subject: [PATCH 03/18] Handle removed content --- yt_dlp/extractor/facebook.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 4fa0012a7..d9bbd314c 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -434,12 +434,12 @@ def _extract_from_url(self, url, video_id): webpage = self._download_webpage( url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id) + sjs_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall( + r'data-sjs>({.*?ScheduledServerJS.*?})', webpage)] if (self.get_param("username") and self.get_param("password")) or self.get_param("cookiefile"): if 'We\'ve suspended your account' in webpage: raise ExtractorError('Login account is suspended.', expected=True) - sjs_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall( - r'data-sjs>({.*?ScheduledServerJS.*?})', webpage)] userinfo = get_first(sjs_data, ('require', ..., ..., ..., "__bbox", "define", lambda _, v: 'CurrentUserInitialData' in v, lambda _, v: 'ACCOUNT_ID' in v)) try: user_id = int(userinfo['ACCOUNT_ID']) @@ -448,6 +448,10 @@ def _extract_from_url(self, url, video_id): if user_id == 0: raise ExtractorError('Failed to login with provided data.', expected=True) + if props := get_first(sjs_data, ('require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., 'rootView', 'props'), expected_type=dict, default={}): + if props.get('title') == 'This content isn\'t available at the moment': + raise ExtractorError('Content removed. Facebook said: "%s"' % props.get('body', ''), expected=True) + def extract_metadata(webpage): post_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall( r'data-sjs>({.*?ScheduledServerJS.*?})', webpage)] From f1ed988c5f5f2043914b9a269f77310e9f35f1a8 Mon Sep 17 00:00:00 2001 From: ringus1 Date: Thu, 11 Jan 2024 17:01:20 +0100 Subject: [PATCH 04/18] One more exception handling --- yt_dlp/extractor/facebook.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index d9bbd314c..fb04ddad0 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -449,8 +449,11 @@ def _extract_from_url(self, url, video_id): raise ExtractorError('Failed to login with provided data.', expected=True) if props := get_first(sjs_data, ('require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., 'rootView', 'props'), expected_type=dict, default={}): - if props.get('title') == 'This content isn\'t available at the moment': - raise ExtractorError('Content removed. Facebook said: "%s"' % props.get('body', ''), expected=True) + if props.get('title') in ( + 'This content isn\'t available at the moment', + 'This content isn\'t available right now' + ): + raise ExtractorError('Content removed or not accessible. Facebook said: "%s"' % props.get('body', ''), expected=True) def extract_metadata(webpage): post_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall( From 6542c5124cee45a1359281093c84f2d09045c717 Mon Sep 17 00:00:00 2001 From: ringus1 Date: Fri, 12 Jan 2024 14:48:46 +0100 Subject: [PATCH 05/18] Formatting --- yt_dlp/extractor/facebook.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index fb04ddad0..62932a7ed 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -396,7 +396,7 @@ def _perform_login(self, username, password): login_results = self._download_webpage(request, None, note='Logging in', errnote='unable to fetch login page') if self._html_search_regex(r'(Your Request Couldn.+? be Processed)', login_results, "request error", default=None) is not None: - raise ExtractorError('Failed to perform login request. Report a bug.') + raise ExtractorError('Failed to perform login request.') if re.search(r'', login_results) is not None: error = self._html_search_regex( @@ -436,11 +436,12 @@ def _extract_from_url(self, url, video_id): sjs_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall( r'data-sjs>({.*?ScheduledServerJS.*?})', webpage)] - if (self.get_param("username") and self.get_param("password")) or self.get_param("cookiefile"): + if (self.get_param('username') and self.get_param('password')) or self.get_param('cookiefile'): if 'We\'ve suspended your account' in webpage: raise ExtractorError('Login account is suspended.', expected=True) - userinfo = get_first(sjs_data, ('require', ..., ..., ..., "__bbox", "define", lambda _, v: 'CurrentUserInitialData' in v, lambda _, v: 'ACCOUNT_ID' in v)) + userinfo = get_first(sjs_data, ('require', ..., ..., ..., '__bbox', 'define', + lambda _, v: 'CurrentUserInitialData' in v, lambda _, v: 'ACCOUNT_ID' in v)) try: user_id = int(userinfo['ACCOUNT_ID']) except (TypeError, ValueError): @@ -448,11 +449,11 @@ def _extract_from_url(self, url, video_id): if user_id == 0: raise ExtractorError('Failed to login with provided data.', expected=True) - if props := get_first(sjs_data, ('require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., 'rootView', 'props'), expected_type=dict, default={}): - if props.get('title') in ( - 'This content isn\'t available at the moment', - 'This content isn\'t available right now' - ): + if props := get_first(sjs_data, ('require', ..., ..., ..., '__bbox', 'require', + ..., ..., ..., 'rootView', 'props'), expected_type=dict, default={}): + + if props.get('title') in ('This content isn\'t available at the moment', + 'This content isn\'t available right now'): raise ExtractorError('Content removed or not accessible. Facebook said: "%s"' % props.get('body', ''), expected=True) def extract_metadata(webpage): @@ -489,7 +490,8 @@ def extract_metadata(webpage): description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text')) uploader_data = ( get_first(media, ('owner', {dict})) - or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name'])) + or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', + lambda k, v: k == 'owner' and v['name'])) or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name'])) or get_first(post, ('node', 'actors', ..., {dict})) or {}) @@ -580,8 +582,8 @@ def extract_relay_data(_filter): def extract_relay_prefetched_data(_filter): return traverse_obj(extract_relay_data(_filter), ( 'require', (None, (..., ..., ..., '__bbox', 'require')), - lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v if isinstance(key, str)), ..., ..., - '__bbox', 'result', 'data', {dict}), get_all=False) or {} + lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v if isinstance(key, str)), + ..., ..., '__bbox', 'result', 'data', {dict}), get_all=False) or {} if not video_data: server_js_data = self._parse_json(self._search_regex([ From 1bd55358a000a99f6f3b4f80a61ac1da17e1ca6f Mon Sep 17 00:00:00 2001 From: ringus1 Date: Fri, 12 Jan 2024 15:04:27 +0100 Subject: [PATCH 06/18] Flake fix --- yt_dlp/extractor/facebook.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 62932a7ed..e15c8d118 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -440,7 +440,8 @@ def _extract_from_url(self, url, video_id): if 'We\'ve suspended your account' in webpage: raise ExtractorError('Login account is suspended.', expected=True) - userinfo = get_first(sjs_data, ('require', ..., ..., ..., '__bbox', 'define', + userinfo = get_first(sjs_data, ( + 'require', ..., ..., ..., '__bbox', 'define', lambda _, v: 'CurrentUserInitialData' in v, lambda _, v: 'ACCOUNT_ID' in v)) try: user_id = int(userinfo['ACCOUNT_ID']) @@ -449,11 +450,12 @@ def _extract_from_url(self, url, video_id): if user_id == 0: raise ExtractorError('Failed to login with provided data.', expected=True) - if props := get_first(sjs_data, ('require', ..., ..., ..., '__bbox', 'require', - ..., ..., ..., 'rootView', 'props'), expected_type=dict, default={}): + if props := get_first(sjs_data, ( + 'require', ..., ..., ..., '__bbox', 'require', + ..., ..., ..., 'rootView', 'props'), expected_type=dict, default={} + ): - if props.get('title') in ('This content isn\'t available at the moment', - 'This content isn\'t available right now'): + if props.get('title') in ('This content isn\'t available at the moment', 'This content isn\'t available right now'): raise ExtractorError('Content removed or not accessible. Facebook said: "%s"' % props.get('body', ''), expected=True) def extract_metadata(webpage): @@ -490,8 +492,7 @@ def extract_metadata(webpage): description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text')) uploader_data = ( get_first(media, ('owner', {dict})) - or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', - lambda k, v: k == 'owner' and v['name'])) + or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name'])) or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name'])) or get_first(post, ('node', 'actors', ..., {dict})) or {}) From 6a04a4aaeca02b87ba8f9fc0623b441713323449 Mon Sep 17 00:00:00 2001 From: ringus1 Date: Mon, 15 Jan 2024 10:43:10 +0100 Subject: [PATCH 07/18] Detect mobile number checkpoint --- yt_dlp/extractor/facebook.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index e15c8d118..7db0c4db9 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -439,6 +439,8 @@ def _extract_from_url(self, url, video_id): if (self.get_param('username') and self.get_param('password')) or self.get_param('cookiefile'): if 'We\'ve suspended your account' in webpage: raise ExtractorError('Login account is suspended.', expected=True) + if 'send a code to confirm the mobile number you give us' in webpage: + raise ExtractorError('Mobile number checkpoint for logged in user.', expected=True) userinfo = get_first(sjs_data, ( 'require', ..., ..., ..., '__bbox', 'define', From c22e4bd29f774aa351ec82a906779c4bdab43afe Mon Sep 17 00:00:00 2001 From: ringus1 Date: Mon, 15 Jan 2024 13:05:53 +0100 Subject: [PATCH 08/18] Fix double quotes --- yt_dlp/extractor/facebook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 7db0c4db9..7fb4acf9c 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -395,7 +395,7 @@ def _perform_login(self, username, password): try: login_results = self._download_webpage(request, None, note='Logging in', errnote='unable to fetch login page') - if self._html_search_regex(r'(Your Request Couldn.+? be Processed)', login_results, "request error", default=None) is not None: + if self._html_search_regex(r'(Your Request Couldn.+? be Processed)', login_results, 'request error', default=None) is not None: raise ExtractorError('Failed to perform login request.') if re.search(r'', login_results) is not None: From 4916a777205911efe3c6267b70cee802318281d8 Mon Sep 17 00:00:00 2001 From: ringus1 Date: Wed, 24 Jan 2024 12:22:09 +0100 Subject: [PATCH 09/18] Add one more case when account is locked --- yt_dlp/extractor/facebook.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 7fb4acf9c..6cc10b402 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -441,6 +441,8 @@ def _extract_from_url(self, url, video_id): raise ExtractorError('Login account is suspended.', expected=True) if 'send a code to confirm the mobile number you give us' in webpage: raise ExtractorError('Mobile number checkpoint for logged in user.', expected=True) + if 'your account has been locked' in webpage: + raise ExtractorError('Account is locked.', expected=True) userinfo = get_first(sjs_data, ( 'require', ..., ..., ..., '__bbox', 'define', From 821ac9c0b886badab353aae3c073102e1b90fdd2 Mon Sep 17 00:00:00 2001 From: ringus1 Date: Wed, 24 Jan 2024 21:52:46 +0100 Subject: [PATCH 10/18] More messages for account suspension --- yt_dlp/extractor/facebook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 6cc10b402..f3b1580d3 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -437,7 +437,7 @@ def _extract_from_url(self, url, video_id): sjs_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall( r'data-sjs>({.*?ScheduledServerJS.*?})', webpage)] if (self.get_param('username') and self.get_param('password')) or self.get_param('cookiefile'): - if 'We\'ve suspended your account' in webpage: + if any(content in webpage for content in ['180 days left to appeal', 'suspended your account']): raise ExtractorError('Login account is suspended.', expected=True) if 'send a code to confirm the mobile number you give us' in webpage: raise ExtractorError('Mobile number checkpoint for logged in user.', expected=True) From 18b4296aa361ae1c436b4ed7e0e2a4f539bc1ade Mon Sep 17 00:00:00 2001 From: ringus1 Date: Wed, 31 Jan 2024 16:38:02 +0100 Subject: [PATCH 11/18] MR fixes --- yt_dlp/extractor/facebook.py | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index f3b1580d3..59f5aa513 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -395,7 +395,7 @@ def _perform_login(self, username, password): try: login_results = self._download_webpage(request, None, note='Logging in', errnote='unable to fetch login page') - if self._html_search_regex(r'(Your Request Couldn.+? be Processed)', login_results, 'request error', default=None) is not None: + if self._html_search_regex(r'(Your Request Couldn\'t be Processed)', login_results, 'request error', default=None) is not None: raise ExtractorError('Failed to perform login request.') if re.search(r'', login_results) is not None: @@ -436,7 +436,13 @@ def _extract_from_url(self, url, video_id): sjs_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall( r'data-sjs>({.*?ScheduledServerJS.*?})', webpage)] - if (self.get_param('username') and self.get_param('password')) or self.get_param('cookiefile'): + + cookies = self._get_cookies(url) + if all(k in cookies for k in ["c_user", "xs"]): + if get_first(sjs_data, ( + 'require', ..., ..., ..., '__bbox', 'define', + lambda _, v: 'CurrentUserInitialData' in v, ..., 'ACCOUNT_ID'), default="0") == "0": + raise ExtractorError('Failed to login with provided data.', expected=True) if any(content in webpage for content in ['180 days left to appeal', 'suspended your account']): raise ExtractorError('Login account is suspended.', expected=True) if 'send a code to confirm the mobile number you give us' in webpage: @@ -444,23 +450,11 @@ def _extract_from_url(self, url, video_id): if 'your account has been locked' in webpage: raise ExtractorError('Account is locked.', expected=True) - userinfo = get_first(sjs_data, ( - 'require', ..., ..., ..., '__bbox', 'define', - lambda _, v: 'CurrentUserInitialData' in v, lambda _, v: 'ACCOUNT_ID' in v)) - try: - user_id = int(userinfo['ACCOUNT_ID']) - except (TypeError, ValueError): - user_id = 0 - if user_id == 0: - raise ExtractorError('Failed to login with provided data.', expected=True) - if props := get_first(sjs_data, ( - 'require', ..., ..., ..., '__bbox', 'require', - ..., ..., ..., 'rootView', 'props'), expected_type=dict, default={} - ): - - if props.get('title') in ('This content isn\'t available at the moment', 'This content isn\'t available right now'): - raise ExtractorError('Content removed or not accessible. Facebook said: "%s"' % props.get('body', ''), expected=True) + 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., 'rootView', 'props', + lambda _, v: v['title'].startswith('This content isn\'t available'))): + raise ExtractorError( + f'Content unavailable. Facebook said: {props.get("body") or props["title"]}', expected=True) def extract_metadata(webpage): post_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall( @@ -587,7 +581,7 @@ def extract_relay_data(_filter): def extract_relay_prefetched_data(_filter): return traverse_obj(extract_relay_data(_filter), ( 'require', (None, (..., ..., ..., '__bbox', 'require')), - lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v if isinstance(key, str)), + lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v), ..., ..., '__bbox', 'result', 'data', {dict}), get_all=False) or {} if not video_data: From 8e0138270713b671b061d69adb47548ff1d21764 Mon Sep 17 00:00:00 2001 From: ringus1 Date: Wed, 31 Jan 2024 16:54:26 +0100 Subject: [PATCH 12/18] Fix props getter --- yt_dlp/extractor/facebook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 4c1c55f7b..0bd0e9bea 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -501,7 +501,7 @@ def _extract_from_url(self, url, video_id): raise ExtractorError('Account is locked.', expected=True) if props := get_first(sjs_data, ( - 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., 'rootView', 'props', + 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., 'rootView', lambda _, v: v['title'].startswith('This content isn\'t available'))): raise ExtractorError( f'Content unavailable. Facebook said: {props.get("body") or props["title"]}', expected=True) From 8f0d32fa838afd7e8e97735846282114325fb137 Mon Sep 17 00:00:00 2001 From: ringus1 Date: Wed, 31 Jan 2024 17:04:09 +0100 Subject: [PATCH 13/18] Simplify login error check --- yt_dlp/extractor/facebook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 0bd0e9bea..f1daa1fd6 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -445,7 +445,7 @@ def _perform_login(self, username, password): try: login_results = self._download_webpage(request, None, note='Logging in', errnote='unable to fetch login page') - if self._html_search_regex(r'(Your Request Couldn\'t be Processed)', login_results, 'request error', default=None) is not None: + if "Your Request Couldn" in login_results: raise ExtractorError('Failed to perform login request.') if re.search(r'', login_results) is not None: From 5143f916f3cbddbaf97a1d8c8591bd86cc65ea5b Mon Sep 17 00:00:00 2001 From: ringus1 Date: Thu, 1 Feb 2024 08:14:23 +0100 Subject: [PATCH 14/18] PR fixes - clear up messages --- yt_dlp/extractor/facebook.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index f1daa1fd6..b6366e78c 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -488,17 +488,17 @@ def _extract_from_url(self, url, video_id): r'data-sjs>({.*?ScheduledServerJS.*?})', webpage)] cookies = self._get_cookies(url) - if all(k in cookies for k in ["c_user", "xs"]): + if cookies.get('c_user') and cookies.get('xs'): # user passed logged-in cookies or attempted to login: if get_first(sjs_data, ( 'require', ..., ..., ..., '__bbox', 'define', - lambda _, v: 'CurrentUserInitialData' in v, ..., 'ACCOUNT_ID'), default="0") == "0": + lambda _, v: 'CurrentUserInitialData' in v, ..., 'ACCOUNT_ID'), default='0') == '0': raise ExtractorError('Failed to login with provided data.', expected=True) if any(content in webpage for content in ['180 days left to appeal', 'suspended your account']): - raise ExtractorError('Login account is suspended.', expected=True) + raise ExtractorError('Your account is suspended', expected=True) if 'send a code to confirm the mobile number you give us' in webpage: - raise ExtractorError('Mobile number checkpoint for logged in user.', expected=True) + raise ExtractorError('Facebook is requiring mobile number confirmation', expected=True) if 'your account has been locked' in webpage: - raise ExtractorError('Account is locked.', expected=True) + raise ExtractorError('Your account has been locked', expected=True) if props := get_first(sjs_data, ( 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., 'rootView', From 0a8b675da904c734c49e7022cc87c389d0ec0b2c Mon Sep 17 00:00:00 2001 From: ringus1 Date: Thu, 1 Feb 2024 09:56:34 +0100 Subject: [PATCH 15/18] Move getting cookies before making first request --- yt_dlp/extractor/facebook.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index b6366e78c..0e39eddcb 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -481,13 +481,14 @@ def _perform_login(self, username, password): return def _extract_from_url(self, url, video_id): + cookies = self._get_cookies(url) # Saving before making first request, as they might get discarded + webpage = self._download_webpage( url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id) sjs_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall( r'data-sjs>({.*?ScheduledServerJS.*?})', webpage)] - cookies = self._get_cookies(url) if cookies.get('c_user') and cookies.get('xs'): # user passed logged-in cookies or attempted to login: if get_first(sjs_data, ( 'require', ..., ..., ..., '__bbox', 'define', From eb0cfc7420c3d8681dd657f185ed5292ad55a8ba Mon Sep 17 00:00:00 2001 From: ringus1 Date: Mon, 5 Feb 2024 11:45:23 +0100 Subject: [PATCH 16/18] Raising error only if video parsing fails --- yt_dlp/extractor/facebook.py | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 0e39eddcb..3bb29d0c8 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -445,10 +445,9 @@ def _perform_login(self, username, password): try: login_results = self._download_webpage(request, None, note='Logging in', errnote='unable to fetch login page') - if "Your Request Couldn" in login_results: - raise ExtractorError('Failed to perform login request.') - - if re.search(r'', login_results) is not None: + if 'Your Request Couldn' in login_results: + self.raise_login_required('Failed to login with credentials', method='cookies') + elif re.search(r'', login_results) is not None: error = self._html_search_regex( r'(?s)]+class=(["\']).*?login_error_box.*?\1[^>]*>]*>.*?]*>(?P.+?)', login_results, 'login error', default=None, group='error') @@ -481,7 +480,10 @@ def _perform_login(self, username, password): return def _extract_from_url(self, url, video_id): - cookies = self._get_cookies(url) # Saving before making first request, as they might get discarded + cookies = self._get_cookies(url) + # user passed logged-in cookies or attempted to login + login_data = cookies.get('c_user') and cookies.get('xs') + logged_in = False webpage = self._download_webpage( url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id) @@ -489,17 +491,17 @@ def _extract_from_url(self, url, video_id): sjs_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall( r'data-sjs>({.*?ScheduledServerJS.*?})', webpage)] - if cookies.get('c_user') and cookies.get('xs'): # user passed logged-in cookies or attempted to login: - if get_first(sjs_data, ( + if login_data: + logged_in = get_first(sjs_data, ( 'require', ..., ..., ..., '__bbox', 'define', - lambda _, v: 'CurrentUserInitialData' in v, ..., 'ACCOUNT_ID'), default='0') == '0': - raise ExtractorError('Failed to login with provided data.', expected=True) - if any(content in webpage for content in ['180 days left to appeal', 'suspended your account']): - raise ExtractorError('Your account is suspended', expected=True) - if 'send a code to confirm the mobile number you give us' in webpage: - raise ExtractorError('Facebook is requiring mobile number confirmation', expected=True) - if 'your account has been locked' in webpage: - raise ExtractorError('Your account has been locked', expected=True) + lambda _, v: 'CurrentUserInitialData' in v, ..., 'ACCOUNT_ID'), default='0') != '0' + if logged_in: + if any(content in webpage for content in ['180 days left to appeal', 'suspended your account']): + raise ExtractorError('Your account is suspended', expected=True) + if 'send a code to confirm the mobile number you give us' in webpage: + raise ExtractorError('Facebook is requiring mobile number confirmation', expected=True) + if 'your account has been locked' in webpage: + raise ExtractorError('Your account has been locked', expected=True) if props := get_first(sjs_data, ( 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., 'rootView', @@ -821,6 +823,10 @@ def parse_attachment(attachment, key='media'): video_data = extract_from_jsmods_instances(tahoe_js_data) if not video_data: + if not login_data: + raise ExtractorError('Cannot parse data. Try logging in.', expected=True) + if not logged_in: + raise ExtractorError('Failed to login with provided data.', expected=True) raise ExtractorError('Cannot parse data') if len(video_data) > 1: From 88ac2d39158069a412dd568f628f478c31684557 Mon Sep 17 00:00:00 2001 From: ringus1 Date: Mon, 5 Feb 2024 11:46:58 +0100 Subject: [PATCH 17/18] Flake fix --- yt_dlp/extractor/facebook.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 3bb29d0c8..a19351786 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -493,8 +493,8 @@ def _extract_from_url(self, url, video_id): if login_data: logged_in = get_first(sjs_data, ( - 'require', ..., ..., ..., '__bbox', 'define', - lambda _, v: 'CurrentUserInitialData' in v, ..., 'ACCOUNT_ID'), default='0') != '0' + 'require', ..., ..., ..., '__bbox', 'define', + lambda _, v: 'CurrentUserInitialData' in v, ..., 'ACCOUNT_ID'), default='0') != '0' if logged_in: if any(content in webpage for content in ['180 days left to appeal', 'suspended your account']): raise ExtractorError('Your account is suspended', expected=True) From 163533ee179e9f83c9896927773db3676ef876b6 Mon Sep 17 00:00:00 2001 From: ringus1 Date: Tue, 13 Feb 2024 12:03:07 +0100 Subject: [PATCH 18/18] PR changes --- yt_dlp/extractor/facebook.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index a19351786..fcb8aa241 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -447,7 +447,7 @@ def _perform_login(self, username, password): note='Logging in', errnote='unable to fetch login page') if 'Your Request Couldn' in login_results: self.raise_login_required('Failed to login with credentials', method='cookies') - elif re.search(r'', login_results) is not None: + elif re.search(r']*name="login"[^<]*', login_results): error = self._html_search_regex( r'(?s)]+class=(["\']).*?login_error_box.*?\1[^>]*>]*>.*?]*>(?P.+?)', login_results, 'login error', default=None, group='error') @@ -495,12 +495,16 @@ def _extract_from_url(self, url, video_id): logged_in = get_first(sjs_data, ( 'require', ..., ..., ..., '__bbox', 'define', lambda _, v: 'CurrentUserInitialData' in v, ..., 'ACCOUNT_ID'), default='0') != '0' - if logged_in: - if any(content in webpage for content in ['180 days left to appeal', 'suspended your account']): + if logged_in and (info := get_first(sjs_data, ('require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., + '__bbox', 'result', 'data', (('ufac_client', 'state', + (('set_contact_point_state_renderer', 'title'), + ('intro_state_renderer', 'header_title'))), + ('epsilon_checkpoint', 'screen', 'title'))))): + if any(content in info for content in ['days left to appeal', 'suspended your account']): raise ExtractorError('Your account is suspended', expected=True) - if 'send a code to confirm the mobile number you give us' in webpage: + if 'Enter mobile number' == info: raise ExtractorError('Facebook is requiring mobile number confirmation', expected=True) - if 'your account has been locked' in webpage: + if 'your account has been locked' in info: raise ExtractorError('Your account has been locked', expected=True) if props := get_first(sjs_data, ( @@ -510,9 +514,7 @@ def _extract_from_url(self, url, video_id): f'Content unavailable. Facebook said: {props.get("body") or props["title"]}', expected=True) def extract_metadata(webpage): - post_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall( - r'data-sjs>({.*?ScheduledServerJS.*?})', webpage)] - post = traverse_obj(post_data, ( + post = traverse_obj(sjs_data, ( ..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or [] media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: ( k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)