yt-dlp/yt_dlp/extractor/doodstream.py

74 lines
2.7 KiB
Python
Raw Normal View History

2020-08-16 15:40:59 +02:00
import string
import random
import time
from .common import InfoExtractor
class DoodStreamIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dood\.(?:to|watch)/[ed]/(?P<id>[a-z0-9]+)'
_TESTS = [{
'url': 'http://dood.to/e/5s1wmbdacezb',
'md5': '4568b83b31e13242b3f1ff96c55f0595',
'info_dict': {
'id': '5s1wmbdacezb',
'ext': 'mp4',
'title': 'Kat Wonders - Monthly May 2020',
'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com',
'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg',
}
}, {
'url': 'http://dood.watch/d/5s1wmbdacezb',
'md5': '4568b83b31e13242b3f1ff96c55f0595',
'info_dict': {
'id': '5s1wmbdacezb',
'ext': 'mp4',
'title': 'Kat Wonders - Monthly May 2020',
'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com',
'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg',
}
2020-08-16 15:40:59 +02:00
}, {
'url': 'https://dood.to/d/jzrxn12t2s7n',
'md5': '3207e199426eca7c2aa23c2872e6728a',
'info_dict': {
'id': 'jzrxn12t2s7n',
'ext': 'mp4',
'title': 'Stacy Cruz Cute ALLWAYSWELL',
'description': 'Stacy Cruz Cute ALLWAYSWELL | DoodStream.com',
'thumbnail': 'https://img.doodcdn.com/snaps/8edqd5nppkac3x8u.jpg',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
url = f'https://dood.to/e/{video_id}'
2020-08-16 15:40:59 +02:00
webpage = self._download_webpage(url, video_id)
title = self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None)
thumb = self._html_search_meta(['og:image', 'twitter:image'], webpage, default=None)
2020-08-16 15:40:59 +02:00
token = self._html_search_regex(r'[?&]token=([a-z0-9]+)[&\']', webpage, 'token')
2020-08-17 17:07:22 +02:00
description = self._html_search_meta(
['og:description', 'description', 'twitter:description'], webpage, default=None)
2020-08-17 17:07:22 +02:00
headers = {
2020-08-18 00:12:14 +02:00
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/66.0',
2020-08-17 17:07:22 +02:00
'referer': url
}
2020-08-16 15:40:59 +02:00
pass_md5 = self._html_search_regex(r'(/pass_md5.*?)\'', webpage, 'pass_md5')
final_url = ''.join((
self._download_webpage(f'https://dood.to{pass_md5}', video_id, headers=headers),
*(random.choice(string.ascii_letters + string.digits) for _ in range(10)),
f'?token={token}&expiry={int(time.time() * 1000)}',
))
2020-08-16 15:40:59 +02:00
return {
'id': video_id,
'title': title,
'url': final_url,
'http_headers': headers,
'ext': 'mp4',
'description': description,
'thumbnail': thumb,
}