mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-28 07:36:52 +01:00
[extractor/twitter] Add login support (#7258)
Closes #6951 Authored by: bashonly
This commit is contained in:
parent
44c0d66442
commit
d1795f4a6a
@ -3,7 +3,6 @@
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .periscope import PeriscopeBaseIE, PeriscopeIE
|
from .periscope import PeriscopeBaseIE, PeriscopeIE
|
||||||
from ..compat import functools # isort: split
|
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
@ -30,11 +29,67 @@
|
|||||||
|
|
||||||
|
|
||||||
class TwitterBaseIE(InfoExtractor):
|
class TwitterBaseIE(InfoExtractor):
|
||||||
|
_NETRC_MACHINE = 'twitter'
|
||||||
_API_BASE = 'https://api.twitter.com/1.1/'
|
_API_BASE = 'https://api.twitter.com/1.1/'
|
||||||
_GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
|
_GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
|
||||||
_BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
|
_BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
|
||||||
_AUTH = {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'}
|
_AUTH = {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'}
|
||||||
_guest_token = None
|
_guest_token = None
|
||||||
|
_flow_token = None
|
||||||
|
|
||||||
|
_LOGIN_INIT_DATA = json.dumps({
|
||||||
|
'input_flow_data': {
|
||||||
|
'flow_context': {
|
||||||
|
'debug_overrides': {},
|
||||||
|
'start_location': {
|
||||||
|
'location': 'unknown'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
'subtask_versions': {
|
||||||
|
'action_list': 2,
|
||||||
|
'alert_dialog': 1,
|
||||||
|
'app_download_cta': 1,
|
||||||
|
'check_logged_in_account': 1,
|
||||||
|
'choice_selection': 3,
|
||||||
|
'contacts_live_sync_permission_prompt': 0,
|
||||||
|
'cta': 7,
|
||||||
|
'email_verification': 2,
|
||||||
|
'end_flow': 1,
|
||||||
|
'enter_date': 1,
|
||||||
|
'enter_email': 2,
|
||||||
|
'enter_password': 5,
|
||||||
|
'enter_phone': 2,
|
||||||
|
'enter_recaptcha': 1,
|
||||||
|
'enter_text': 5,
|
||||||
|
'enter_username': 2,
|
||||||
|
'generic_urt': 3,
|
||||||
|
'in_app_notification': 1,
|
||||||
|
'interest_picker': 3,
|
||||||
|
'js_instrumentation': 1,
|
||||||
|
'menu_dialog': 1,
|
||||||
|
'notifications_permission_prompt': 2,
|
||||||
|
'open_account': 2,
|
||||||
|
'open_home_timeline': 1,
|
||||||
|
'open_link': 1,
|
||||||
|
'phone_verification': 4,
|
||||||
|
'privacy_options': 1,
|
||||||
|
'security_key': 3,
|
||||||
|
'select_avatar': 4,
|
||||||
|
'select_banner': 2,
|
||||||
|
'settings_list': 7,
|
||||||
|
'show_code': 1,
|
||||||
|
'sign_up': 2,
|
||||||
|
'sign_up_review': 4,
|
||||||
|
'tweet_selection_urt': 1,
|
||||||
|
'update_users': 1,
|
||||||
|
'upload_media': 1,
|
||||||
|
'user_recommendations_list': 4,
|
||||||
|
'user_recommendations_urt': 1,
|
||||||
|
'wait_spinner': 3,
|
||||||
|
'web_modal': 1
|
||||||
|
}
|
||||||
|
}, separators=(',', ':')).encode()
|
||||||
|
|
||||||
def _extract_variant_formats(self, variant, video_id):
|
def _extract_variant_formats(self, variant, video_id):
|
||||||
variant_url = variant.get('url')
|
variant_url = variant.get('url')
|
||||||
@ -86,18 +141,151 @@ def _search_dimensions_in_video_url(a_format, video_url):
|
|||||||
'height': int(m.group('height')),
|
'height': int(m.group('height')),
|
||||||
})
|
})
|
||||||
|
|
||||||
@functools.cached_property
|
@property
|
||||||
def is_logged_in(self):
|
def is_logged_in(self):
|
||||||
return bool(self._get_cookies(self._API_BASE).get('auth_token'))
|
return bool(self._get_cookies(self._API_BASE).get('auth_token'))
|
||||||
|
|
||||||
def _call_api(self, path, video_id, query={}, graphql=False):
|
def _fetch_guest_token(self, headers, display_id):
|
||||||
cookies = self._get_cookies(self._API_BASE)
|
headers.pop('x-guest-token', None)
|
||||||
|
self._guest_token = traverse_obj(self._download_json(
|
||||||
|
f'{self._API_BASE}guest/activate.json', display_id,
|
||||||
|
'Downloading guest token', data=b'', headers=headers), 'guest_token')
|
||||||
|
if not self._guest_token:
|
||||||
|
raise ExtractorError('Could not retrieve guest token')
|
||||||
|
|
||||||
|
def _set_base_headers(self):
|
||||||
headers = self._AUTH.copy()
|
headers = self._AUTH.copy()
|
||||||
|
csrf_token = try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value)
|
||||||
|
if csrf_token:
|
||||||
|
headers['x-csrf-token'] = csrf_token
|
||||||
|
return headers
|
||||||
|
|
||||||
csrf_cookie = cookies.get('ct0')
|
def _call_login_api(self, note, headers, query={}, data=None):
|
||||||
if csrf_cookie:
|
response = self._download_json(
|
||||||
headers['x-csrf-token'] = csrf_cookie.value
|
f'{self._API_BASE}onboarding/task.json', None, note,
|
||||||
|
headers=headers, query=query, data=data, expected_status=400)
|
||||||
|
error = traverse_obj(response, ('errors', 0, 'message', {str}))
|
||||||
|
if error:
|
||||||
|
raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
|
||||||
|
elif traverse_obj(response, 'status') != 'success':
|
||||||
|
raise ExtractorError('Login was unsuccessful')
|
||||||
|
|
||||||
|
subtask = traverse_obj(
|
||||||
|
response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
|
||||||
|
if not subtask:
|
||||||
|
raise ExtractorError('Twitter API did not return next login subtask')
|
||||||
|
|
||||||
|
self._flow_token = response['flow_token']
|
||||||
|
|
||||||
|
return subtask
|
||||||
|
|
||||||
|
def _perform_login(self, username, password):
|
||||||
|
if self.is_logged_in:
|
||||||
|
return
|
||||||
|
|
||||||
|
self._request_webpage('https://twitter.com/', None, 'Requesting cookies')
|
||||||
|
headers = self._set_base_headers()
|
||||||
|
self._fetch_guest_token(headers, None)
|
||||||
|
headers.update({
|
||||||
|
'content-type': 'application/json',
|
||||||
|
'x-guest-token': self._guest_token,
|
||||||
|
'x-twitter-client-language': 'en',
|
||||||
|
'x-twitter-active-user': 'yes',
|
||||||
|
'Referer': 'https://twitter.com/',
|
||||||
|
'Origin': 'https://twitter.com',
|
||||||
|
})
|
||||||
|
|
||||||
|
def build_login_json(*subtask_inputs):
|
||||||
|
return json.dumps({
|
||||||
|
'flow_token': self._flow_token,
|
||||||
|
'subtask_inputs': subtask_inputs
|
||||||
|
}, separators=(',', ':')).encode()
|
||||||
|
|
||||||
|
def input_dict(subtask_id, text):
|
||||||
|
return {
|
||||||
|
'subtask_id': subtask_id,
|
||||||
|
'enter_text': {
|
||||||
|
'text': text,
|
||||||
|
'link': 'next_link'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
next_subtask = self._call_login_api(
|
||||||
|
'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
|
||||||
|
|
||||||
|
while not self.is_logged_in:
|
||||||
|
if next_subtask == 'LoginJsInstrumentationSubtask':
|
||||||
|
next_subtask = self._call_login_api(
|
||||||
|
'Submitting JS instrumentation response', headers, data=build_login_json({
|
||||||
|
'subtask_id': next_subtask,
|
||||||
|
'js_instrumentation': {
|
||||||
|
'response': '{}',
|
||||||
|
'link': 'next_link'
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
|
||||||
|
elif next_subtask == 'LoginEnterUserIdentifierSSO':
|
||||||
|
next_subtask = self._call_login_api(
|
||||||
|
'Submitting username', headers, data=build_login_json({
|
||||||
|
'subtask_id': next_subtask,
|
||||||
|
'settings_list': {
|
||||||
|
'setting_responses': [{
|
||||||
|
'key': 'user_identifier',
|
||||||
|
'response_data': {
|
||||||
|
'text_data': {
|
||||||
|
'result': username
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}],
|
||||||
|
'link': 'next_link'
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
|
||||||
|
elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
|
||||||
|
next_subtask = self._call_login_api(
|
||||||
|
'Submitting alternate identifier', headers,
|
||||||
|
data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
|
||||||
|
'one of username, phone number or email that was not used as --username'))))
|
||||||
|
|
||||||
|
elif next_subtask == 'LoginEnterPassword':
|
||||||
|
next_subtask = self._call_login_api(
|
||||||
|
'Submitting password', headers, data=build_login_json({
|
||||||
|
'subtask_id': next_subtask,
|
||||||
|
'enter_password': {
|
||||||
|
'password': password,
|
||||||
|
'link': 'next_link'
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
|
||||||
|
elif next_subtask == 'AccountDuplicationCheck':
|
||||||
|
next_subtask = self._call_login_api(
|
||||||
|
'Submitting account duplication check', headers, data=build_login_json({
|
||||||
|
'subtask_id': next_subtask,
|
||||||
|
'check_logged_in_account': {
|
||||||
|
'link': 'AccountDuplicationCheck_false'
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
|
||||||
|
elif next_subtask == 'LoginTwoFactorAuthChallenge':
|
||||||
|
next_subtask = self._call_login_api(
|
||||||
|
'Submitting 2FA token', headers, data=build_login_json(input_dict(
|
||||||
|
next_subtask, self._get_tfa_info('two-factor authentication token'))))
|
||||||
|
|
||||||
|
elif next_subtask == 'LoginAcid':
|
||||||
|
next_subtask = self._call_login_api(
|
||||||
|
'Submitting confirmation code', headers, data=build_login_json(input_dict(
|
||||||
|
next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
|
||||||
|
|
||||||
|
elif next_subtask == 'LoginSuccessSubtask':
|
||||||
|
raise ExtractorError('Twitter API did not grant auth token cookie')
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
|
||||||
|
|
||||||
|
self.report_login()
|
||||||
|
|
||||||
|
def _call_api(self, path, video_id, query={}, graphql=False):
|
||||||
|
headers = self._set_base_headers()
|
||||||
if self.is_logged_in:
|
if self.is_logged_in:
|
||||||
headers.update({
|
headers.update({
|
||||||
'x-twitter-auth-type': 'OAuth2Session',
|
'x-twitter-auth-type': 'OAuth2Session',
|
||||||
@ -106,15 +294,10 @@ def _call_api(self, path, video_id, query={}, graphql=False):
|
|||||||
})
|
})
|
||||||
|
|
||||||
for first_attempt in (True, False):
|
for first_attempt in (True, False):
|
||||||
if not self.is_logged_in and not self._guest_token:
|
if not self.is_logged_in:
|
||||||
headers.pop('x-guest-token', None)
|
if not self._guest_token:
|
||||||
self._guest_token = traverse_obj(self._download_json(
|
self._fetch_guest_token(headers, video_id)
|
||||||
f'{self._API_BASE}guest/activate.json', video_id,
|
|
||||||
'Downloading guest token', data=b'', headers=headers), 'guest_token')
|
|
||||||
if self._guest_token:
|
|
||||||
headers['x-guest-token'] = self._guest_token
|
headers['x-guest-token'] = self._guest_token
|
||||||
elif not self.is_logged_in:
|
|
||||||
raise ExtractorError('Could not retrieve guest token')
|
|
||||||
|
|
||||||
allowed_status = {400, 401, 403, 404} if graphql else {403}
|
allowed_status = {400, 401, 403, 404} if graphql else {403}
|
||||||
result = self._download_json(
|
result = self._download_json(
|
||||||
|
Loading…
Reference in New Issue
Block a user