[extractor/uktvplay] Fix _VALID_URL

Closes #5472
[extractor/odnoklassniki] Support boosty.to embeds (#5105 )
2024-06-01 19:08:14 +02:00 · 2022-11-07 21:47:31 +05:30 · 2022-11-07 21:32:42 +05:30 · 2022-11-07 21:29:53 +05:30 · 2022-11-07 20:54:30 +05:30
5 changed files with 134 additions and 30 deletions
--- a/yt_dlp/compat/shutil.py
+++ b/yt_dlp/compat/shutil.py
@ -0,0 +1,30 @@
+# flake8: noqa: F405
+from shutil import *  # noqa: F403
+
+from .compat_utils import passthrough_module
+
+passthrough_module(__name__, 'shutil')
+del passthrough_module
+
+
+import sys
+
+if sys.platform.startswith('freebsd'):
+    import errno
+    import os
+    import shutil
+
+    # Workaround for PermissionError when using restricted ACL mode on FreeBSD
+    def copy2(src, dst, *args, **kwargs):
+        if os.path.isdir(dst):
+            dst = os.path.join(dst, os.path.basename(src))
+        shutil.copyfile(src, dst, *args, **kwargs)
+        try:
+            shutil.copystat(src, dst, *args, **kwargs)
+        except PermissionError as e:
+            if e.errno != getattr(errno, 'EPERM', None):
+                raise
+        return dst
+
+    def move(*args, copy_function=copy2, **kwargs):
+        return shutil.move(*args, copy_function=copy_function, **kwargs)
--- a/yt_dlp/extractor/mxplayer.py
+++ b/yt_dlp/extractor/mxplayer.py
@ -4,6 +4,7 @@
    int_or_none,
    traverse_obj,
    try_get,
+    urljoin,
 )


@ -147,6 +148,17 @@ class MxplayerIE(InfoExtractor):
            'format': 'bv',
            'skip_download': True,
        },
+    }, {
+        'url': 'https://www.mxplayer.in/movie/watch-deewane-huye-paagal-movie-online-4f9175c40a11c3994182a65afdd37ec6?watch=true',
+        'info_dict': {
+            'id': '4f9175c40a11c3994182a65afdd37ec6',
+            'display_id': 'watch-deewane-huye-paagal-movie-online',
+            'title': 'Deewane Huye Paagal',
+            'duration': 9037,
+            'ext': 'mp4',
+            'description': 'md5:d17bd5c651016c4ed2e6f8a4ace15534',
+        },
+        'params': {'skip_download': 'm3u8'},
    }]

    def _real_extract(self, url):
@ -157,21 +169,24 @@ def _real_extract(self, url):
        data_json = self._download_json(
            f'https://api.mxplay.com/v1/web/detail/video?type={video_type}&id={video_id}', display_id)

-        streams = traverse_obj(data_json, ('stream', {'m3u8': ('hls', 'high'), 'mpd': ('dash', 'high')}))
-        formats, dash_subs = self._extract_mpd_formats_and_subtitles(
-            f'https://llvod.mxplay.com/{streams["mpd"]}', display_id, fatal=False)
-        hls_frmts, hls_subs = self._extract_m3u8_formats_and_subtitles(
-            f'https://llvod.mxplay.com/{streams["m3u8"]}', display_id, fatal=False)
-
-        formats.extend(hls_frmts)
-        self._sort_formats(formats)
+        formats, subtitles = [], {}
+        m3u8_url = urljoin('https://llvod.mxplay.com/', traverse_obj(
+            data_json, ('stream', (('thirdParty', 'hlsUrl'), ('hls', 'high'))), get_all=False))
+        if m3u8_url:
+            formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, display_id, 'mp4', fatal=False)
+        mpd_url = urljoin('https://llvod.mxplay.com/', traverse_obj(
+            data_json, ('stream', (('thirdParty', 'dashUrl'), ('dash', 'high'))), get_all=False))
+        if mpd_url:
+            fmts, subs = self._extract_mpd_formats_and_subtitles(mpd_url, display_id, fatal=False)
+            formats.extend(fmts)
+            self._merge_subtitles(subs, target=subtitles)

        season = traverse_obj(data_json, ('container', 'title'))
        return {
            'id': video_id,
            'title': data_json.get('title'),
            'formats': formats,
-            'subtitles': self._merge_subtitles(dash_subs, hls_subs),
+            'subtitles': subtitles,
            'display_id': display_id,
            'duration': data_json.get('duration'),
            'series': traverse_obj(data_json, ('container', 'container', 'title')),
--- a/yt_dlp/extractor/odnoklassniki.py
+++ b/yt_dlp/extractor/odnoklassniki.py
@ -8,10 +8,12 @@
 from ..utils import (
    ExtractorError,
    float_or_none,
-    unified_strdate,
    int_or_none,
    qualities,
+    smuggle_url,
    unescapeHTML,
+    unified_strdate,
+    unsmuggle_url,
    urlencode_postdata,
 )

@ -22,7 +24,7 @@ class OdnoklassnikiIE(InfoExtractor):
                    (?:(?:www|m|mobile)\.)?
                    (?:odnoklassniki|ok)\.ru/
                    (?:
-                        video(?:embed)?/|
+                        video(?P<embed>embed)?/|
                        web-api/video/moviePlayer/|
                        live/|
                        dk\?.*?st\.mvId=
@ -38,7 +40,7 @@ class OdnoklassnikiIE(InfoExtractor):
            'ext': 'mp4',
            'timestamp': 1545580896,
            'view_count': int,
-            'thumbnail': 'https://coub-anubis-a.akamaized.net/coub_storage/coub/simple/cw_image/c5ac87553bd/608e806a1239c210ab692/1545580913_00026.jpg',
+            'thumbnail': 'https://coub-attachments.akamaized.net/coub_storage/coub/simple/cw_image/c5ac87553bd/608e806a1239c210ab692/1545580913_00026.jpg',
            'title': 'Народная забава',
            'uploader': 'Nevata',
            'upload_date': '20181223',
@ -65,11 +67,12 @@ class OdnoklassnikiIE(InfoExtractor):
    }, {
        # metadata in JSON
        'url': 'http://ok.ru/video/20079905452',
-        'md5': '0b62089b479e06681abaaca9d204f152',
+        'md5': '5d2b64756e2af296e3b383a0bc02a6aa',
        'info_dict': {
            'id': '20079905452',
            'ext': 'mp4',
            'title': 'Культура меняет нас (прекрасный ролик!))',
+            'thumbnail': str,
            'duration': 100,
            'upload_date': '20141207',
            'uploader_id': '330537914540',
@ -80,11 +83,12 @@ class OdnoklassnikiIE(InfoExtractor):
    }, {
        # metadataUrl
        'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
-        'md5': '6ff470ea2dd51d5d18c295a355b0b6bc',
+        'md5': 'f8c951122516af72e6e6ffdd3c41103b',
        'info_dict': {
            'id': '63567059965189-0',
            'ext': 'mp4',
            'title': 'Девушка без комплексов ...',
+            'thumbnail': str,
            'duration': 191,
            'upload_date': '20150518',
            'uploader_id': '534380003155',
@ -95,18 +99,32 @@ class OdnoklassnikiIE(InfoExtractor):
        },
    }, {
        # YouTube embed (metadataUrl, provider == USER_YOUTUBE)
-        'url': 'http://ok.ru/video/64211978996595-1',
-        'md5': '2f206894ffb5dbfcce2c5a14b909eea5',
+        'url': 'https://ok.ru/video/3952212382174',
+        'md5': '91749d0bd20763a28d083fa335bbd37a',
        'info_dict': {
-            'id': 'V_VztHT5BzY',
+            'id': '5axVgHHDBvU',
            'ext': 'mp4',
-            'title': 'Космическая среда от 26 августа 2015',
-            'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0',
-            'duration': 440,
-            'upload_date': '20150826',
-            'uploader_id': 'tvroscosmos',
-            'uploader': 'Телестудия Роскосмоса',
+            'title': 'Youtube-dl 101: What is it and HOW to use it! Full Download Walkthrough and Guide',
+            'description': 'md5:b57209eeb9d5c2f20c984dfb58862097',
+            'uploader': 'Lod Mer',
+            'uploader_id': '575186401502',
+            'duration': 1529,
            'age_limit': 0,
+            'upload_date': '20210405',
+            'comment_count': int,
+            'live_status': 'not_live',
+            'view_count': int,
+            'thumbnail': 'https://i.mycdn.me/i?r=AEHujHvw2RjEbemUCNEorZbxYpb_p_9AcN2FmGik64Krkcmz37YtlY093oAM5-HIEAt7Zi9s0CiBOSDmbngC-I-k&fn=external_8',
+            'uploader_url': 'http://www.youtube.com/user/MrKewlkid94',
+            'channel_follower_count': int,
+            'tags': ['youtube-dl', 'youtube playlists', 'download videos', 'download audio'],
+            'channel_id': 'UCVGtvURtEURYHtJFUegdSug',
+            'like_count': int,
+            'availability': 'public',
+            'channel_url': 'https://www.youtube.com/channel/UCVGtvURtEURYHtJFUegdSug',
+            'categories': ['Education'],
+            'playable_in_embed': True,
+            'channel': 'BornToReact',
        },
    }, {
        # YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field)
@ -126,10 +144,12 @@ class OdnoklassnikiIE(InfoExtractor):
        },
        'skip': 'Video has not been found',
    }, {
+        # TODO: HTTP Error 400: Bad Request, it only works if there's no cookies when downloading
        'note': 'Only available in mobile webpage',
        'url': 'https://m.ok.ru/video/2361249957145',
        'info_dict': {
            'id': '2361249957145',
+            'ext': 'mp4',
            'title': 'Быковское крещение',
            'duration': 3038.181,
        },
@ -158,8 +178,37 @@ class OdnoklassnikiIE(InfoExtractor):
        # Paid video
        'url': 'https://ok.ru/video/954886983203',
        'only_matching': True,
+    }, {
+        'url': 'https://ok.ru/videoembed/2932705602075',
+        'info_dict': {
+            'id': '2932705602075',
+            'ext': 'mp4',
+            'thumbnail': 'https://i.mycdn.me/videoPreview?id=1369902483995&type=37&idx=2&tkn=fqlnoQD_xwq5ovIlKfgNyU08qmM&fn=external_8',
+            'title': 'Boosty для тебя!',
+            'uploader_id': '597811038747',
+            'like_count': 0,
+            'duration': 35,
+        },
    }]

+    _WEBPAGE_TESTS = [{
+        'url': 'https://boosty.to/ikakprosto/posts/56cedaca-b56a-4dfd-b3ed-98c79cfa0167',
+        'info_dict': {
+            'id': '3950343629563',
+            'ext': 'mp4',
+            'thumbnail': 'https://i.mycdn.me/videoPreview?id=2776238394107&type=37&idx=11&tkn=F3ejkUFcpuI4DnMRxrDGcH5YcmM&fn=external_8',
+            'title': 'Заяц Бусти.mp4',
+            'uploader_id': '571368965883',
+            'like_count': 0,
+            'duration': 10444,
+        },
+    }]
+
+    @classmethod
+    def _extract_embed_urls(cls, url, webpage):
+        for x in super()._extract_embed_urls(url, webpage):
+            yield smuggle_url(x, {'referrer': url})
+
    def _real_extract(self, url):
        try:
            return self._extract_desktop(url)
@ -174,16 +223,23 @@ def _extract_desktop(self, url):
        start_time = int_or_none(compat_parse_qs(
            compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0])

-        video_id = self._match_id(url)
+        url, smuggled = unsmuggle_url(url, {})
+        video_id, is_embed = self._match_valid_url(url).group('id', 'embed')
+        mode = 'videoembed' if is_embed else 'video'

        webpage = self._download_webpage(
-            'http://ok.ru/video/%s' % video_id, video_id,
-            note='Downloading desktop webpage')
+            f'https://ok.ru/{mode}/{video_id}', video_id,
+            note='Downloading desktop webpage',
+            headers={'Referer': smuggled['referrer']} if smuggled.get('referrer') else {})

        error = self._search_regex(
            r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<',
            webpage, 'error', default=None)
-        if error:
+        # Direct link from boosty
+        if (error == 'The author of this video has not been found or is blocked'
+                and not smuggled.get('referrer') and mode == 'videoembed'):
+            return self._extract_desktop(smuggle_url(url, {'referrer': 'https://boosty.to'}))
+        elif error:
            raise ExtractorError(error, expected=True)

        player = self._parse_json(
@ -270,7 +326,7 @@ def _extract_desktop(self, url):
        if provider == 'LIVE_TV_APP':
            info['title'] = title

-        quality = qualities(('4', '0', '1', '2', '3', '5'))
+        quality = qualities(('4', '0', '1', '2', '3', '5', '6', '7'))

        formats = [{
            'url': f['url'],
--- a/yt_dlp/extractor/uktvplay.py
+++ b/yt_dlp/extractor/uktvplay.py
@ -2,7 +2,7 @@


 class UKTVPlayIE(InfoExtractor):
-    _VALID_URL = r'https?://uktvplay\.(?:uktv\.)?co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*watch-online/)(?P<id>\d+)'
+    _VALID_URL = r'https?://uktvplay\.(?:uktv\.)?co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*)(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://uktvplay.uktv.co.uk/shows/world-at-war/c/200/watch-online/?video=2117008346001',
        'info_dict': {
@ -22,6 +22,9 @@ class UKTVPlayIE(InfoExtractor):
    }, {
        'url': 'https://uktvplay.uktv.co.uk/shows/africa/watch-online/5983349675001',
        'only_matching': True,
+    }, {
+        'url': 'https://uktvplay.co.uk/shows/hornby-a-model-world/series-1/episode-1/6276739790001?autoplaying=true',
+        'only_matching': True,
    }]
    # BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/1242911124001/OrCyvJ2gyL_default/index.html?videoId=%s'
    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1242911124001/H1xnMOqP_default/index.html?videoId=%s'
--- a/yt_dlp/postprocessor/movefilesafterdownload.py
+++ b/yt_dlp/postprocessor/movefilesafterdownload.py
@ -1,7 +1,7 @@
 import os
-import shutil

 from .common import PostProcessor
+from ..compat import shutil
 from ..utils import (
    PostProcessingError,
    decodeFilename,
Author	SHA1	Message	Date
pukkandan	581e86b512	[extractor/uktvplay] Fix `_VALID_URL` Closes #5472	2022-11-07 21:47:31 +05:30
megapro17	8196182a12	[extractor/odnoklassniki] Support boosty.to embeds (#5105 ) Closes #4212 Authored by: megapro17, Lesmiscore, pukkandan	2022-11-07 21:32:42 +05:30
m4tu4g	9b383177c9	[extractor/mxplayer] Improve extractor (#5303 ) Closes #5276 Authored by: m4tu4g	2022-11-07 21:29:53 +05:30
ClosedPort22	fbb0ee7747	[compat] Fix `shutils.move` in restricted ACL mode on BSD (#5309 ) Authored by: ClosedPort22, pukkandan	2022-11-07 20:54:30 +05:30