[extractor/youtube] Extract heatmap data (#7100)

Closes #3888
Authored by: tntmod54321
This commit is contained in:
Audrey 2023-05-26 08:24:39 -04:00 committed by GitHub
parent 4ad58667c1
commit 5caf30dbc3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 19 additions and 0 deletions

View File

@ -350,6 +350,10 @@ class InfoExtractor:
* "start_time" - The start time of the chapter in seconds * "start_time" - The start time of the chapter in seconds
* "end_time" - The end time of the chapter in seconds * "end_time" - The end time of the chapter in seconds
* "title" (optional, string) * "title" (optional, string)
heatmap: A list of dictionaries, with the following entries:
* "start_time" - The start time of the data point in seconds
* "end_time" - The end time of the data point in seconds
* "value" - The normalized value of the data point (float between 0 and 1)
playable_in_embed: Whether this video is allowed to play in embedded playable_in_embed: Whether this video is allowed to play in embedded
players on other sites. Can be True (=always allowed), players on other sites. Can be True (=always allowed),
False (=never allowed), None (=unknown), or a string False (=never allowed), None (=unknown), or a string

View File

@ -1273,6 +1273,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Philipp Hagemeister', 'uploader': 'Philipp Hagemeister',
'uploader_url': 'https://www.youtube.com/@PhilippHagemeister', 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
'uploader_id': '@PhilippHagemeister', 'uploader_id': '@PhilippHagemeister',
'heatmap': 'count:100',
} }
}, },
{ {
@ -1426,6 +1427,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'FlyingKitty', 'uploader': 'FlyingKitty',
'uploader_url': 'https://www.youtube.com/@FlyingKitty900', 'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
'uploader_id': '@FlyingKitty900', 'uploader_id': '@FlyingKitty900',
'comment_count': int,
}, },
}, },
{ {
@ -3244,6 +3246,17 @@ def _extract_chapters_from_engagement_panel(self, data, duration):
chapter_time, chapter_title, duration) chapter_time, chapter_title, duration)
for contents in content_list)), []) for contents in content_list)), [])
def _extract_heatmap_from_player_overlay(self, data):
content_list = traverse_obj(data, (
'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer', 'decoratedPlayerBarRenderer', 'playerBar',
'multiMarkersPlayerBarRenderer', 'markersMap', ..., 'value', 'heatmap', 'heatmapRenderer', 'heatMarkers', {list}))
return next(filter(None, (
traverse_obj(contents, (..., 'heatMarkerRenderer', {
'start_time': ('timeRangeStartMillis', {functools.partial(float_or_none, scale=1000)}),
'end_time': {lambda x: (x['timeRangeStartMillis'] + x['markerDurationMillis']) / 1000},
'value': ('heatMarkerIntensityScoreNormalized', {float_or_none}),
})) for contents in content_list)), None)
def _extract_comment(self, comment_renderer, parent=None): def _extract_comment(self, comment_renderer, parent=None):
comment_id = comment_renderer.get('commentId') comment_id = comment_renderer.get('commentId')
if not comment_id: if not comment_id:
@ -4313,6 +4326,8 @@ def process_language(container, base_url, lang_code, sub_name, query):
or self._extract_chapters_from_description(video_description, duration) or self._extract_chapters_from_description(video_description, duration)
or None) or None)
info['heatmap'] = self._extract_heatmap_from_player_overlay(initial_data)
contents = traverse_obj( contents = traverse_obj(
initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'), initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
expected_type=list, default=[]) expected_type=list, default=[])