mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-30 16:42:57 +01:00
[extractor/twitter] Fix unauthenticated extraction (#7476)
Closes #7473 Authored by: bashonly
This commit is contained in:
parent
1cffd621cb
commit
49296437a8
@ -1852,9 +1852,6 @@ #### tiktok
|
|||||||
#### rokfinchannel
|
#### rokfinchannel
|
||||||
* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
|
* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
|
||||||
|
|
||||||
#### twitter
|
|
||||||
* `legacy_api`: Force usage of the legacy Twitter API instead of the GraphQL API for tweet extraction. Has no effect if login cookies are passed
|
|
||||||
|
|
||||||
#### stacommu, wrestleuniverse
|
#### stacommu, wrestleuniverse
|
||||||
* `device_id`: UUID value assigned by the website and used to enforce device limits for paid livestream content. Can be found in browser local storage
|
* `device_id`: UUID value assigned by the website and used to enforce device limits for paid livestream content. Can be found in browser local storage
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
import urllib.error
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .periscope import PeriscopeBaseIE, PeriscopeIE
|
from .periscope import PeriscopeBaseIE, PeriscopeIE
|
||||||
@ -34,7 +35,6 @@ class TwitterBaseIE(InfoExtractor):
|
|||||||
_GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
|
_GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
|
||||||
_BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
|
_BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
|
||||||
_AUTH = {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'}
|
_AUTH = {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'}
|
||||||
_guest_token = None
|
|
||||||
_flow_token = None
|
_flow_token = None
|
||||||
|
|
||||||
_LOGIN_INIT_DATA = json.dumps({
|
_LOGIN_INIT_DATA = json.dumps({
|
||||||
@ -145,14 +145,6 @@ def _search_dimensions_in_video_url(a_format, video_url):
|
|||||||
def is_logged_in(self):
|
def is_logged_in(self):
|
||||||
return bool(self._get_cookies(self._API_BASE).get('auth_token'))
|
return bool(self._get_cookies(self._API_BASE).get('auth_token'))
|
||||||
|
|
||||||
def _fetch_guest_token(self, headers, display_id):
|
|
||||||
headers.pop('x-guest-token', None)
|
|
||||||
self._guest_token = traverse_obj(self._download_json(
|
|
||||||
f'{self._API_BASE}guest/activate.json', display_id,
|
|
||||||
'Downloading guest token', data=b'', headers=headers), 'guest_token')
|
|
||||||
if not self._guest_token:
|
|
||||||
raise ExtractorError('Could not retrieve guest token')
|
|
||||||
|
|
||||||
def _set_base_headers(self):
|
def _set_base_headers(self):
|
||||||
headers = self._AUTH.copy()
|
headers = self._AUTH.copy()
|
||||||
csrf_token = try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value)
|
csrf_token = try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value)
|
||||||
@ -183,12 +175,15 @@ def _perform_login(self, username, password):
|
|||||||
if self.is_logged_in:
|
if self.is_logged_in:
|
||||||
return
|
return
|
||||||
|
|
||||||
self._request_webpage('https://twitter.com/', None, 'Requesting cookies')
|
webpage = self._download_webpage('https://twitter.com/', None, 'Downloading login page')
|
||||||
headers = self._set_base_headers()
|
headers = self._set_base_headers()
|
||||||
self._fetch_guest_token(headers, None)
|
guest_token = self._search_regex(
|
||||||
|
r'\.cookie\s*=\s*["\']gt=(\d+);', webpage, 'gt', default=None) or self._download_json(
|
||||||
|
f'{self._API_BASE}guest/activate.json', None, 'Downloading guest token',
|
||||||
|
data=b'', headers=headers)['guest_token']
|
||||||
headers.update({
|
headers.update({
|
||||||
'content-type': 'application/json',
|
'content-type': 'application/json',
|
||||||
'x-guest-token': self._guest_token,
|
'x-guest-token': guest_token,
|
||||||
'x-twitter-client-language': 'en',
|
'x-twitter-client-language': 'en',
|
||||||
'x-twitter-active-user': 'yes',
|
'x-twitter-active-user': 'yes',
|
||||||
'Referer': 'https://twitter.com/',
|
'Referer': 'https://twitter.com/',
|
||||||
@ -285,33 +280,20 @@ def input_dict(subtask_id, text):
|
|||||||
self.report_login()
|
self.report_login()
|
||||||
|
|
||||||
def _call_api(self, path, video_id, query={}, graphql=False):
|
def _call_api(self, path, video_id, query={}, graphql=False):
|
||||||
headers = self._set_base_headers()
|
if not self.is_logged_in:
|
||||||
if self.is_logged_in:
|
self.raise_login_required()
|
||||||
headers.update({
|
|
||||||
|
result = self._download_json(
|
||||||
|
(self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path, video_id,
|
||||||
|
f'Downloading {"GraphQL" if graphql else "legacy API"} JSON', headers={
|
||||||
|
**self._set_base_headers(),
|
||||||
'x-twitter-auth-type': 'OAuth2Session',
|
'x-twitter-auth-type': 'OAuth2Session',
|
||||||
'x-twitter-client-language': 'en',
|
'x-twitter-client-language': 'en',
|
||||||
'x-twitter-active-user': 'yes',
|
'x-twitter-active-user': 'yes',
|
||||||
})
|
}, query=query, expected_status={400, 401, 403, 404} if graphql else {403})
|
||||||
|
|
||||||
for first_attempt in (True, False):
|
|
||||||
if not self.is_logged_in:
|
|
||||||
if not self._guest_token:
|
|
||||||
self._fetch_guest_token(headers, video_id)
|
|
||||||
headers['x-guest-token'] = self._guest_token
|
|
||||||
|
|
||||||
allowed_status = {400, 401, 403, 404} if graphql else {403}
|
|
||||||
result = self._download_json(
|
|
||||||
(self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
|
|
||||||
video_id, headers=headers, query=query, expected_status=allowed_status,
|
|
||||||
note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
|
|
||||||
|
|
||||||
if result.get('errors'):
|
if result.get('errors'):
|
||||||
errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
|
errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
|
||||||
if not self.is_logged_in and first_attempt and 'bad guest token' in errors.lower():
|
|
||||||
self.to_screen('Guest token has expired. Refreshing guest token')
|
|
||||||
self._guest_token = None
|
|
||||||
continue
|
|
||||||
|
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
f'Error(s) while querying API: {errors or "Unknown error"}', expected=True)
|
f'Error(s) while querying API: {errors or "Unknown error"}', expected=True)
|
||||||
|
|
||||||
@ -457,6 +439,7 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
|
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
# comment_count, repost_count, view_count are only available with auth (applies to all tests)
|
||||||
'url': 'https://twitter.com/freethenipple/status/643211948184596480',
|
'url': 'https://twitter.com/freethenipple/status/643211948184596480',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '643211870443208704',
|
'id': '643211870443208704',
|
||||||
@ -471,10 +454,7 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'timestamp': 1442188653,
|
'timestamp': 1442188653,
|
||||||
'upload_date': '20150913',
|
'upload_date': '20150913',
|
||||||
'uploader_url': 'https://twitter.com/freethenipple',
|
'uploader_url': 'https://twitter.com/freethenipple',
|
||||||
'comment_count': int,
|
|
||||||
'repost_count': int,
|
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
},
|
},
|
||||||
@ -505,8 +485,6 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'timestamp': 1447395772,
|
'timestamp': 1447395772,
|
||||||
'upload_date': '20151113',
|
'upload_date': '20151113',
|
||||||
'uploader_url': 'https://twitter.com/starwars',
|
'uploader_url': 'https://twitter.com/starwars',
|
||||||
'comment_count': int,
|
|
||||||
'repost_count': int,
|
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'tags': ['TV', 'StarWars', 'TheForceAwakens'],
|
'tags': ['TV', 'StarWars', 'TheForceAwakens'],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
@ -550,10 +528,7 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'timestamp': 1455777459,
|
'timestamp': 1455777459,
|
||||||
'upload_date': '20160218',
|
'upload_date': '20160218',
|
||||||
'uploader_url': 'https://twitter.com/jaydingeer',
|
'uploader_url': 'https://twitter.com/jaydingeer',
|
||||||
'comment_count': int,
|
|
||||||
'repost_count': int,
|
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
|
||||||
'tags': ['Damndaniel'],
|
'tags': ['Damndaniel'],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
@ -591,10 +566,7 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'upload_date': '20160412',
|
'upload_date': '20160412',
|
||||||
'uploader_url': 'https://twitter.com/CaptainAmerica',
|
'uploader_url': 'https://twitter.com/CaptainAmerica',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'comment_count': int,
|
|
||||||
'repost_count': int,
|
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
@ -641,10 +613,7 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'timestamp': 1505803395,
|
'timestamp': 1505803395,
|
||||||
'upload_date': '20170919',
|
'upload_date': '20170919',
|
||||||
'uploader_url': 'https://twitter.com/Prefet971',
|
'uploader_url': 'https://twitter.com/Prefet971',
|
||||||
'comment_count': int,
|
|
||||||
'repost_count': int,
|
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
|
||||||
'tags': ['Maria'],
|
'tags': ['Maria'],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
@ -667,10 +636,7 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'timestamp': 1527623489,
|
'timestamp': 1527623489,
|
||||||
'upload_date': '20180529',
|
'upload_date': '20180529',
|
||||||
'uploader_url': 'https://twitter.com/LisPower1',
|
'uploader_url': 'https://twitter.com/LisPower1',
|
||||||
'comment_count': int,
|
|
||||||
'repost_count': int,
|
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
@ -692,10 +658,7 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'timestamp': 1548184644,
|
'timestamp': 1548184644,
|
||||||
'upload_date': '20190122',
|
'upload_date': '20190122',
|
||||||
'uploader_url': 'https://twitter.com/Twitter',
|
'uploader_url': 'https://twitter.com/Twitter',
|
||||||
'comment_count': int,
|
|
||||||
'repost_count': int,
|
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
@ -713,6 +676,7 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
'add_ie': ['TwitterBroadcast'],
|
'add_ie': ['TwitterBroadcast'],
|
||||||
|
'skip': 'Requires authentication',
|
||||||
}, {
|
}, {
|
||||||
# unified card
|
# unified card
|
||||||
'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
|
'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
|
||||||
@ -729,8 +693,6 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'timestamp': 1610651040,
|
'timestamp': 1610651040,
|
||||||
'upload_date': '20210114',
|
'upload_date': '20210114',
|
||||||
'uploader_url': 'https://twitter.com/BrooklynNets',
|
'uploader_url': 'https://twitter.com/BrooklynNets',
|
||||||
'comment_count': int,
|
|
||||||
'repost_count': int,
|
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
@ -753,10 +715,7 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'duration': 30.03,
|
'duration': 30.03,
|
||||||
'timestamp': 1665025050,
|
'timestamp': 1665025050,
|
||||||
'comment_count': int,
|
|
||||||
'repost_count': int,
|
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
@ -765,15 +724,13 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
|
'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1577719286659006464',
|
'id': '1577719286659006464',
|
||||||
'title': 'Ultima | #\u0432\u029f\u043c - Test',
|
'title': 'Ultima📛 | #вʟм - Test',
|
||||||
'description': 'Test https://t.co/Y3KEZD7Dad',
|
'description': 'Test https://t.co/Y3KEZD7Dad',
|
||||||
'uploader': 'Ultima | #\u0432\u029f\u043c',
|
'uploader': 'Ultima📛 | #вʟм',
|
||||||
'uploader_id': 'UltimaShadowX',
|
'uploader_id': 'UltimaShadowX',
|
||||||
'uploader_url': 'https://twitter.com/UltimaShadowX',
|
'uploader_url': 'https://twitter.com/UltimaShadowX',
|
||||||
'upload_date': '20221005',
|
'upload_date': '20221005',
|
||||||
'timestamp': 1664992565,
|
'timestamp': 1664992565,
|
||||||
'comment_count': int,
|
|
||||||
'repost_count': int,
|
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
@ -795,10 +752,7 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'duration': 21.321,
|
'duration': 21.321,
|
||||||
'timestamp': 1664477766,
|
'timestamp': 1664477766,
|
||||||
'upload_date': '20220929',
|
'upload_date': '20220929',
|
||||||
'comment_count': int,
|
|
||||||
'repost_count': int,
|
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
|
||||||
'tags': ['HurricaneIan'],
|
'tags': ['HurricaneIan'],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
@ -825,6 +779,20 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
},
|
},
|
||||||
'skip': 'Requires authentication',
|
'skip': 'Requires authentication',
|
||||||
}, {
|
}, {
|
||||||
|
# Single Vimeo video result without auth
|
||||||
|
'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '551578322',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Dusty & The Mayor',
|
||||||
|
'uploader': 'Michael Chau',
|
||||||
|
'uploader_id': 'user29061007',
|
||||||
|
'uploader_url': 'https://vimeo.com/user29061007',
|
||||||
|
'duration': 478,
|
||||||
|
'thumbnail': 'https://i.vimeocdn.com/video/1139658575-0dfdce6e9a2401fe09feb24bf0d14e6f24a53c12f447ff688ace61009ad4c1ba-d_1280',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Playlist result only with auth
|
||||||
'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
|
'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
|
||||||
'playlist_mincount': 2,
|
'playlist_mincount': 2,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -842,6 +810,7 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'uploader_url': 'https://twitter.com/Srirachachau',
|
'uploader_url': 'https://twitter.com/Srirachachau',
|
||||||
'timestamp': 1621447860,
|
'timestamp': 1621447860,
|
||||||
},
|
},
|
||||||
|
'skip': 'Requires authentication',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
|
'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
|
||||||
'playlist_mincount': 2,
|
'playlist_mincount': 2,
|
||||||
@ -860,6 +829,7 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'upload_date': '20221007',
|
'upload_date': '20221007',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
|
'skip': 'Requires authentication',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
|
'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
|
||||||
'playlist_count': 2,
|
'playlist_count': 2,
|
||||||
@ -873,8 +843,6 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'upload_date': '20221007',
|
'upload_date': '20221007',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'uploader_url': 'https://twitter.com/primevideouk',
|
'uploader_url': 'https://twitter.com/primevideouk',
|
||||||
'comment_count': int,
|
|
||||||
'repost_count': int,
|
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'tags': ['TheRingsOfPower'],
|
'tags': ['TheRingsOfPower'],
|
||||||
},
|
},
|
||||||
@ -896,6 +864,7 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
},
|
},
|
||||||
'add_ie': ['TwitterSpaces'],
|
'add_ie': ['TwitterSpaces'],
|
||||||
'params': {'skip_download': 'm3u8'},
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
'skip': 'Requires authentication',
|
||||||
}, {
|
}, {
|
||||||
# URL specifies video number but --yes-playlist
|
# URL specifies video number but --yes-playlist
|
||||||
'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
|
'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
|
||||||
@ -905,9 +874,7 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'title': 'md5:be05989b0722e114103ed3851a0ffae2',
|
'title': 'md5:be05989b0722e114103ed3851a0ffae2',
|
||||||
'timestamp': 1670459604.0,
|
'timestamp': 1670459604.0,
|
||||||
'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
|
'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
|
||||||
'comment_count': int,
|
|
||||||
'uploader_id': 'CTVJLaidlaw',
|
'uploader_id': 'CTVJLaidlaw',
|
||||||
'repost_count': int,
|
|
||||||
'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
|
'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
|
||||||
'upload_date': '20221208',
|
'upload_date': '20221208',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
@ -926,14 +893,11 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'timestamp': 1670459604.0,
|
'timestamp': 1670459604.0,
|
||||||
'uploader_id': 'CTVJLaidlaw',
|
'uploader_id': 'CTVJLaidlaw',
|
||||||
'uploader': 'Jocelyn Laidlaw',
|
'uploader': 'Jocelyn Laidlaw',
|
||||||
'repost_count': int,
|
|
||||||
'comment_count': int,
|
|
||||||
'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
|
'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
|
||||||
'duration': 102.226,
|
'duration': 102.226,
|
||||||
'uploader_url': 'https://twitter.com/CTVJLaidlaw',
|
'uploader_url': 'https://twitter.com/CTVJLaidlaw',
|
||||||
'display_id': '1600649710662213632',
|
'display_id': '1600649710662213632',
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
|
||||||
'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
|
'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
|
||||||
'upload_date': '20221208',
|
'upload_date': '20221208',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
@ -959,9 +923,6 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'repost_count': int,
|
|
||||||
'comment_count': int,
|
|
||||||
'view_count': int,
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
|
'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
|
||||||
@ -974,10 +935,7 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'like_count': int,
|
'like_count': int,
|
||||||
'uploader_id': 'hlo_again',
|
'uploader_id': 'hlo_again',
|
||||||
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
|
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
|
||||||
'repost_count': int,
|
|
||||||
'duration': 9.531,
|
'duration': 9.531,
|
||||||
'comment_count': int,
|
|
||||||
'view_count': int,
|
|
||||||
'upload_date': '20221203',
|
'upload_date': '20221203',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'timestamp': 1670092210.0,
|
'timestamp': 1670092210.0,
|
||||||
@ -994,14 +952,11 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'uploader_url': 'https://twitter.com/MunTheShinobi',
|
'uploader_url': 'https://twitter.com/MunTheShinobi',
|
||||||
'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
|
'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
|
||||||
'view_count': int,
|
|
||||||
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
|
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'uploader': 'Mün The Shinobi',
|
'uploader': 'Mün The Shinobi',
|
||||||
'repost_count': int,
|
|
||||||
'upload_date': '20221206',
|
'upload_date': '20221206',
|
||||||
'title': 'Mün The Shinobi - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
|
'title': 'Mün The Shinobi - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
|
||||||
'comment_count': int,
|
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'uploader_id': 'MunTheShinobi',
|
'uploader_id': 'MunTheShinobi',
|
||||||
@ -1009,14 +964,14 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'timestamp': 1670306984.0,
|
'timestamp': 1670306984.0,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# url to retweet id, legacy API
|
# url to retweet id
|
||||||
'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
|
'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1623274794488659969',
|
'id': '1623274794488659969',
|
||||||
'display_id': '1623739803874349067',
|
'display_id': '1623739803874349067',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
|
'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
|
||||||
'description': 'md5:e873616a4a8fe0f93e71872678a672f3',
|
'description': 'md5:224d62f54b0cdef8e33d4c56c41ac503',
|
||||||
'uploader': 'Johnny Bullets',
|
'uploader': 'Johnny Bullets',
|
||||||
'uploader_id': 'Johnnybull3ts',
|
'uploader_id': 'Johnnybull3ts',
|
||||||
'uploader_url': 'https://twitter.com/Johnnybull3ts',
|
'uploader_url': 'https://twitter.com/Johnnybull3ts',
|
||||||
@ -1027,10 +982,7 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'upload_date': '20230208',
|
'upload_date': '20230208',
|
||||||
'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
|
'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'repost_count': int,
|
|
||||||
'comment_count': int,
|
|
||||||
},
|
},
|
||||||
'params': {'extractor_args': {'twitter': {'legacy_api': ['']}}},
|
|
||||||
}, {
|
}, {
|
||||||
# onion route
|
# onion route
|
||||||
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
|
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
|
||||||
@ -1081,8 +1033,6 @@ def _graphql_to_legacy(self, data, twid):
|
|||||||
|
|
||||||
if 'tombstone' in result:
|
if 'tombstone' in result:
|
||||||
cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
|
cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
|
||||||
if cause and 'adult content' in cause:
|
|
||||||
self.raise_login_required(cause)
|
|
||||||
raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
|
raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
|
||||||
|
|
||||||
status = result.get('legacy', {})
|
status = result.get('legacy', {})
|
||||||
@ -1138,19 +1088,22 @@ def _build_graphql_query(self, media_id):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
twid, selected_index = self._match_valid_url(url).group('id', 'index')
|
twid, selected_index = self._match_valid_url(url).group('id', 'index')
|
||||||
if self._configuration_arg('legacy_api') and not self.is_logged_in:
|
if not self.is_logged_in:
|
||||||
status = traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
|
try:
|
||||||
'cards_platform': 'Web-12',
|
status = self._download_json(
|
||||||
'include_cards': 1,
|
'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
|
||||||
'include_reply_count': 1,
|
headers={'User-Agent': 'Googlebot'}, query={'id': twid})
|
||||||
'include_user_entities': 0,
|
self.to_screen(f'Some metadata is missing without authentication. {self._login_hint()}')
|
||||||
'tweet_mode': 'extended',
|
except ExtractorError as e:
|
||||||
}), 'retweeted_status', None)
|
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 404:
|
||||||
|
self.raise_login_required('Requested tweet may only be available when logged in')
|
||||||
|
raise
|
||||||
else:
|
else:
|
||||||
result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
|
status = self._graphql_to_legacy(
|
||||||
status = self._graphql_to_legacy(result, twid)
|
self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid), twid)
|
||||||
|
|
||||||
title = description = status['full_text'].replace('\n', ' ')
|
title = description = traverse_obj(
|
||||||
|
status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
|
||||||
# strip 'https -_t.co_BJYgOjSeGA' junk from filenames
|
# strip 'https -_t.co_BJYgOjSeGA' junk from filenames
|
||||||
title = re.sub(r'\s+(https?://[^ ]+)', '', title)
|
title = re.sub(r'\s+(https?://[^ ]+)', '', title)
|
||||||
user = status.get('user') or {}
|
user = status.get('user') or {}
|
||||||
@ -1176,12 +1129,16 @@ def _real_extract(self, url):
|
|||||||
|
|
||||||
def extract_from_video_info(media):
|
def extract_from_video_info(media):
|
||||||
media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
|
media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
|
||||||
|
if not media_id:
|
||||||
|
# workaround for non-authenticated responses
|
||||||
|
media_id = traverse_obj(media, (
|
||||||
|
'video_info', 'variants', ..., 'url',
|
||||||
|
{lambda x: re.search(r'_video/(\d+)/', x)[1]}), get_all=False)
|
||||||
self.write_debug(f'Extracting from video info: {media_id}')
|
self.write_debug(f'Extracting from video info: {media_id}')
|
||||||
video_info = media.get('video_info') or {}
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for variant in video_info.get('variants', []):
|
for variant in traverse_obj(media, ('video_info', 'variants', ...)):
|
||||||
fmts, subs = self._extract_variant_formats(variant, twid)
|
fmts, subs = self._extract_variant_formats(variant, twid)
|
||||||
subtitles = self._merge_subtitles(subtitles, subs)
|
subtitles = self._merge_subtitles(subtitles, subs)
|
||||||
formats.extend(fmts)
|
formats.extend(fmts)
|
||||||
@ -1201,12 +1158,12 @@ def add_thumbnail(name, size):
|
|||||||
add_thumbnail('orig', media.get('original_info') or {})
|
add_thumbnail('orig', media.get('original_info') or {})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': media_id,
|
'id': media_id or twid,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
|
'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
|
||||||
'duration': float_or_none(video_info.get('duration_millis'), 1000),
|
'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
|
||||||
# The codec of http formats are unknown
|
# The codec of http formats are unknown
|
||||||
'_format_sort_fields': ('res', 'br', 'size', 'proto'),
|
'_format_sort_fields': ('res', 'br', 'size', 'proto'),
|
||||||
}
|
}
|
||||||
@ -1286,12 +1243,15 @@ def get_binding_value(k):
|
|||||||
}
|
}
|
||||||
|
|
||||||
videos = traverse_obj(status, (
|
videos = traverse_obj(status, (
|
||||||
(None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
|
('mediaDetails', ((None, 'quoted_status'), 'extended_entities', 'media')),
|
||||||
|
lambda _, m: m['type'] != 'photo', {dict}))
|
||||||
|
|
||||||
if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
|
if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
|
||||||
selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
|
selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
|
||||||
else:
|
else:
|
||||||
desired_obj = traverse_obj(status, ('extended_entities', 'media', int(selected_index) - 1, {dict}))
|
desired_obj = traverse_obj(status, (
|
||||||
|
('mediaDetails', ((None, 'quoted_status'), 'extended_entities', 'media')),
|
||||||
|
int(selected_index) - 1, {dict}), get_all=False)
|
||||||
if not desired_obj:
|
if not desired_obj:
|
||||||
raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
|
raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
|
||||||
elif desired_obj.get('type') != 'video':
|
elif desired_obj.get('type') != 'video':
|
||||||
|
Loading…
Reference in New Issue
Block a user