From 9bb2c7673e45aee95023d980ff307d7a90ac58c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Oct 2017 03:38:34 +0700 Subject: [PATCH] [redditr] Fix extraction for URLs with query (closes #14495) --- youtube_dl/extractor/reddit.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/reddit.py b/youtube_dl/extractor/reddit.py index 4d44b9d74..f36bc648c 100644 --- a/youtube_dl/extractor/reddit.py +++ b/youtube_dl/extractor/reddit.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -45,7 +47,7 @@ def _real_extract(self, url): class RedditRIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?reddit\.com/r/[^/]+/comments/(?P[^/]+)' + _VALID_URL = r'(?Phttps?://(?:www\.)?reddit\.com/r/[^/]+/comments/(?P[^/?#&]+))' _TESTS = [{ 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/', 'info_dict': { @@ -83,10 +85,13 @@ class RedditRIE(InfoExtractor): }] def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + url, video_id = mobj.group('url', 'id') + video_id = self._match_id(url) data = self._download_json( - url + '.json', video_id)[0]['data']['children'][0]['data'] + url + '/.json', video_id)[0]['data']['children'][0]['data'] video_url = data['url']