From 867c66ff97b0639485a2b6ebc28f2e0df0bf8187 Mon Sep 17 00:00:00 2001 From: Matthew Date: Fri, 7 Oct 2022 20:00:40 +1300 Subject: [PATCH] [extractor/youtube] Extract concurrent view count for livestreams (#5152) Adds new field `concurrent_view_count` Closes https://github.com/yt-dlp/yt-dlp/issues/4843 Authored by: coletdjnz --- README.md | 1 + yt_dlp/extractor/common.py | 1 + yt_dlp/extractor/youtube.py | 27 +++++++++++++++++++-------- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index e0a1ea059..9b59e096a 100644 --- a/README.md +++ b/README.md @@ -1226,6 +1226,7 @@ # OUTPUT TEMPLATE - `duration` (numeric): Length of the video in seconds - `duration_string` (string): Length of the video (HH:mm:ss) - `view_count` (numeric): How many users have watched the video on the platform + - `concurrent_view_count` (numeric): How many users are currently watching the video on the platform. - `like_count` (numeric): Number of positive ratings of the video - `dislike_count` (numeric): Number of negative ratings of the video - `repost_count` (numeric): Number of reposts of the video diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 944b196a1..31a45b37a 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -284,6 +284,7 @@ class InfoExtractor: captions instead of normal subtitles duration: Length of the video in seconds, as an integer or float. view_count: How many users have watched the video on the platform. + concurrent_view_count: How many users are currently watching the video on the platform. like_count: Number of positive ratings of the video dislike_count: Number of negative ratings of the video repost_count: Number of reposts of the video diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 4456110f6..6f153bb3c 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -912,8 +912,7 @@ def _extract_video(self, renderer): traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str), video_id, default=None, group='duration')) - view_count = self._get_count(renderer, 'viewCountText') - + view_count = self._get_count(renderer, 'viewCountText', 'shortViewCountText') uploader = self._get_text(renderer, 'ownerText', 'shortBylineText') channel_id = traverse_obj( renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), @@ -932,6 +931,12 @@ def _extract_video(self, renderer): if overlay_style == 'SHORTS' or '/shorts/' in navigation_url: url = f'https://www.youtube.com/shorts/{video_id}' + live_status = ( + 'is_upcoming' if scheduled_timestamp is not None + else 'was_live' if 'streamed' in time_text.lower() + else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW) + else None) + return { '_type': 'url', 'ie_key': YoutubeIE.ie_key(), @@ -940,17 +945,12 @@ def _extract_video(self, renderer): 'title': title, 'description': description, 'duration': duration, - 'view_count': view_count, 'uploader': uploader, 'channel_id': channel_id, 'thumbnails': thumbnails, 'upload_date': (strftime_or_none(self._parse_time_text(time_text), '%Y%m%d') if self._configuration_arg('approximate_date', ie_key='youtubetab') else None), - 'live_status': ('is_upcoming' if scheduled_timestamp is not None - else 'was_live' if 'streamed' in time_text.lower() - else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW) - else None), 'release_timestamp': scheduled_timestamp, 'availability': 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC) @@ -958,7 +958,8 @@ def _extract_video(self, renderer): is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None, needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None, needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None, - is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None) + is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None), + 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count': view_count, } @@ -2328,6 +2329,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'view_count': int, 'playable_in_embed': True, 'description': 'md5:2ef1d002cad520f65825346e2084e49d', + 'concurrent_view_count': int, }, 'params': {'skip_download': True} }, { @@ -4115,6 +4117,15 @@ def process_language(container, base_url, lang_code, sub_name, query): 'like_count': str_to_int(like_count), 'dislike_count': str_to_int(dislike_count), }) + vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer')) + if vcr: + vc = self._get_count(vcr, 'viewCount') + # Upcoming premieres with waiting count are treated as live here + if vcr.get('isLive'): + info['concurrent_view_count'] = vc + elif info.get('view_count') is None: + info['view_count'] = vc + vsir = get_first(contents, 'videoSecondaryInfoRenderer') if vsir: vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))