1
1
mirror of https://github.com/ytdl-org/youtube-dl synced 2024-11-30 10:22:55 +01:00

[shahid] add support for show pages(closes #7401)

This commit is contained in:
Remita Amine 2017-12-19 02:00:13 +01:00
parent 3961c6cb9d
commit 78466fcab5
4 changed files with 219 additions and 131 deletions

View File

@ -0,0 +1,78 @@
# coding: utf-8
from __future__ import unicode_literals
import datetime
import hashlib
import hmac
from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlencode
class AWSIE(InfoExtractor):
_AWS_ALGORITHM = 'AWS4-HMAC-SHA256'
_AWS_REGION = 'us-east-1'
def _aws_execute_api(self, aws_dict, video_id, query=None):
query = query or {}
amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
date = amz_date[:8]
headers = {
'Accept': 'application/json',
'Host': self._AWS_PROXY_HOST,
'X-Amz-Date': amz_date,
}
session_token = aws_dict.get('session_token')
if session_token:
headers['X-Amz-Security-Token'] = session_token
headers['X-Api-Key'] = self._AWS_API_KEY
def aws_hash(s):
return hashlib.sha256(s.encode('utf-8')).hexdigest()
# Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
canonical_querystring = compat_urllib_parse_urlencode(query)
canonical_headers = ''
for header_name, header_value in headers.items():
canonical_headers += '%s:%s\n' % (header_name.lower(), header_value)
signed_headers = ';'.join([header.lower() for header in headers.keys()])
canonical_request = '\n'.join([
'GET',
aws_dict['uri'],
canonical_querystring,
canonical_headers,
signed_headers,
aws_hash('')
])
# Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
credential_scope_list = [date, self._AWS_REGION, 'execute-api', 'aws4_request']
credential_scope = '/'.join(credential_scope_list)
string_to_sign = '\n'.join([self._AWS_ALGORITHM, amz_date, credential_scope, aws_hash(canonical_request)])
# Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
def aws_hmac(key, msg):
return hmac.new(key, msg.encode('utf-8'), hashlib.sha256)
def aws_hmac_digest(key, msg):
return aws_hmac(key, msg).digest()
def aws_hmac_hexdigest(key, msg):
return aws_hmac(key, msg).hexdigest()
k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8')
for value in credential_scope_list:
k_signing = aws_hmac_digest(k_signing, value)
signature = aws_hmac_hexdigest(k_signing, string_to_sign)
# Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html
headers['Authorization'] = ', '.join([
'%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope),
'SignedHeaders=%s' % signed_headers,
'Signature=%s' % signature,
])
return self._download_json(
'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''),
video_id, headers=headers)

View File

@ -927,7 +927,10 @@ from .sendtonews import SendtoNewsIE
from .servingsys import ServingSysIE
from .servus import ServusIE
from .sexu import SexuIE
from .shahid import ShahidIE
from .shahid import (
ShahidIE,
ShahidShowIE,
)
from .shared import (
SharedIE,
VivoIE,

View File

@ -1,13 +1,11 @@
# coding: utf-8
from __future__ import unicode_literals
import datetime
import json
import hashlib
import hmac
import re
from .common import InfoExtractor
from .aws import AWSIE
from .anvato import AnvatoIE
from ..utils import (
smuggle_url,
@ -16,7 +14,7 @@ from ..utils import (
)
class ScrippsNetworksWatchIE(InfoExtractor):
class ScrippsNetworksWatchIE(AWSIE):
IE_NAME = 'scrippsnetworks:watch'
_VALID_URL = r'''(?x)
https?://
@ -64,44 +62,27 @@ class ScrippsNetworksWatchIE(InfoExtractor):
'travelchannel': 'trav',
'geniuskitchen': 'genius',
}
_SNI_HOST = 'web.api.video.snidigital.com'
_AWS_REGION = 'us-east-1'
_AWS_IDENTITY_ID_JSON = json.dumps({
'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % _AWS_REGION
})
_AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback'
_AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1'
_AWS_SERVICE = 'execute-api'
_AWS_REQUEST = 'aws4_request'
_AWS_SIGNED_HEADERS = ';'.join([
'host', 'x-amz-date', 'x-amz-security-token', 'x-api-key'])
_AWS_CANONICAL_REQUEST_TEMPLATE = '''GET
%(uri)s
_AWS_PROXY_HOST = 'web.api.video.snidigital.com'
host:%(host)s
x-amz-date:%(date)s
x-amz-security-token:%(token)s
x-api-key:%(key)s
%(signed_headers)s
%(payload_hash)s'''
_AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
site_id, video_id = mobj.group('site', 'id')
def aws_hash(s):
return hashlib.sha256(s.encode('utf-8')).hexdigest()
aws_identity_id_json = json.dumps({
'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % self._AWS_REGION
}).encode('utf-8')
token = self._download_json(
'https://cognito-identity.us-east-1.amazonaws.com/', video_id,
data=self._AWS_IDENTITY_ID_JSON.encode('utf-8'),
'https://cognito-identity.%s.amazonaws.com/' % self._AWS_REGION, video_id,
data=aws_identity_id_json,
headers={
'Accept': '*/*',
'Content-Type': 'application/x-amz-json-1.1',
'Referer': url,
'X-Amz-Content-Sha256': aws_hash(self._AWS_IDENTITY_ID_JSON),
'X-Amz-Content-Sha256': hashlib.sha256(aws_identity_id_json).hexdigest(),
'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken',
'X-Amz-User-Agent': self._AWS_USER_AGENT,
})['Token']
@ -124,64 +105,12 @@ x-api-key:%(key)s
sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key,
fatal=True)
access_key_id = get('AccessKeyId')
secret_access_key = get('SecretAccessKey')
session_token = get('SessionToken')
# Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
uri = '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id)
datetime_now = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
date = datetime_now[:8]
canonical_string = self._AWS_CANONICAL_REQUEST_TEMPLATE % {
'uri': uri,
'host': self._SNI_HOST,
'date': datetime_now,
'token': session_token,
'key': self._AWS_API_KEY,
'signed_headers': self._AWS_SIGNED_HEADERS,
'payload_hash': aws_hash(''),
}
# Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
credential_string = '/'.join([date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST])
string_to_sign = '\n'.join([
'AWS4-HMAC-SHA256', datetime_now, credential_string,
aws_hash(canonical_string)])
# Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
def aws_hmac(key, msg):
return hmac.new(key, msg.encode('utf-8'), hashlib.sha256)
def aws_hmac_digest(key, msg):
return aws_hmac(key, msg).digest()
def aws_hmac_hexdigest(key, msg):
return aws_hmac(key, msg).hexdigest()
k_secret = 'AWS4' + secret_access_key
k_date = aws_hmac_digest(k_secret.encode('utf-8'), date)
k_region = aws_hmac_digest(k_date, self._AWS_REGION)
k_service = aws_hmac_digest(k_region, self._AWS_SERVICE)
k_signing = aws_hmac_digest(k_service, self._AWS_REQUEST)
signature = aws_hmac_hexdigest(k_signing, string_to_sign)
auth_header = ', '.join([
'AWS4-HMAC-SHA256 Credential=%s' % '/'.join(
[access_key_id, date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST]),
'SignedHeaders=%s' % self._AWS_SIGNED_HEADERS,
'Signature=%s' % signature,
])
mcp_id = self._download_json(
'https://%s%s' % (self._SNI_HOST, uri), video_id, headers={
'Accept': '*/*',
'Referer': url,
'Authorization': auth_header,
'X-Amz-Date': datetime_now,
'X-Amz-Security-Token': session_token,
'X-Api-Key': self._AWS_API_KEY,
})['results'][0]['mcpId']
mcp_id = self._aws_execute_api({
'uri': '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id),
'access_key': get('AccessKeyId'),
'secret_key': get('SecretAccessKey'),
'session_token': get('SessionToken'),
}, video_id)['results'][0]['mcpId']
return self.url_result(
smuggle_url(

View File

@ -1,22 +1,53 @@
# coding: utf-8
from __future__ import unicode_literals
import re
import json
import math
import re
from .common import InfoExtractor
from .aws import AWSIE
from ..compat import compat_HTTPError
from ..utils import (
clean_html,
ExtractorError,
InAdvancePagedList,
int_or_none,
parse_iso8601,
str_or_none,
urlencode_postdata,
clean_html,
)
class ShahidIE(InfoExtractor):
class ShahidBaseIE(AWSIE):
_AWS_PROXY_HOST = 'api2.shahid.net'
_AWS_API_KEY = '2RRtuMHx95aNI1Kvtn2rChEuwsCogUd4samGPjLh'
def _handle_error(self, e):
fail_data = self._parse_json(
e.cause.read().decode('utf-8'), None, fatal=False)
if fail_data:
faults = fail_data.get('faults', [])
faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')])
if faults_message:
raise ExtractorError(faults_message, expected=True)
def _call_api(self, path, video_id, request=None):
query = {}
if request:
query['request'] = json.dumps(request)
try:
return self._aws_execute_api({
'uri': '/proxy/v2/' + path,
'access_key': 'AKIAI6X4TYCIXM2B7MUQ',
'secret_key': '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',
}, video_id, query)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError):
self._handle_error(e)
raise
class ShahidIE(ShahidBaseIE):
_NETRC_MACHINE = 'shahid'
_VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'
_TESTS = [{
@ -41,34 +72,25 @@ class ShahidIE(InfoExtractor):
'only_matching': True
}]
def _api2_request(self, *args, **kwargs):
try:
return self._download_json(*args, **kwargs)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError):
fail_data = self._parse_json(
e.cause.read().decode('utf-8'), None, fatal=False)
if fail_data:
faults = fail_data.get('faults', [])
faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')])
if faults_message:
raise ExtractorError(faults_message, expected=True)
raise
def _real_initialize(self):
email, password = self._get_login_info()
if email is None:
return
user_data = self._api2_request(
'https://shahid.mbc.net/wd/service/users/login',
None, 'Logging in', data=json.dumps({
'email': email,
'password': password,
'basic': 'false',
}).encode('utf-8'), headers={
'Content-Type': 'application/json; charset=UTF-8',
})['user']
try:
user_data = self._download_json(
'https://shahid.mbc.net/wd/service/users/login',
None, 'Logging in', data=json.dumps({
'email': email,
'password': password,
'basic': 'false',
}).encode('utf-8'), headers={
'Content-Type': 'application/json; charset=UTF-8',
})['user']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError):
self._handle_error(e)
raise
self._download_webpage(
'https://shahid.mbc.net/populateContext',
@ -81,25 +103,13 @@ class ShahidIE(InfoExtractor):
'sessionId': user_data['sessionId'],
}))
def _get_api_data(self, response):
data = response.get('data', {})
error = data.get('error')
if error:
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())),
expected=True)
return data
def _real_extract(self, url):
page_type, video_id = re.match(self._VALID_URL, url).groups()
if page_type == 'clip':
page_type = 'episode'
playout = self._api2_request(
'https://api2.shahid.net/proxy/v2/playout/url/' + video_id,
video_id, 'Downloading player JSON')['playout']
playout = self._call_api(
'playout/url/' + video_id, video_id)['playout']
if playout.get('drm'):
raise ExtractorError('This video is DRM protected.', expected=True)
@ -107,13 +117,27 @@ class ShahidIE(InfoExtractor):
formats = self._extract_m3u8_formats(playout['url'], video_id, 'mp4')
self._sort_formats(formats)
video = self._get_api_data(self._download_json(
# video = self._call_api(
# 'product/id', video_id, {
# 'id': video_id,
# 'productType': 'ASSET',
# 'productSubType': page_type.upper()
# })['productModel']
response = self._download_json(
'http://api.shahid.net/api/v1_1/%s/%s' % (page_type, video_id),
video_id, 'Downloading video JSON', query={
'apiKey': 'sh@hid0nlin3',
'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
}))[page_type]
})
data = response.get('data', {})
error = data.get('error')
if error:
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())),
expected=True)
video = data[page_type]
title = video['title']
categories = [
category['name']
@ -135,3 +159,57 @@ class ShahidIE(InfoExtractor):
'episode_id': video_id,
'formats': formats,
}
class ShahidShowIE(ShahidBaseIE):
_VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:show|serie)s/[^/]+/(?:show|series)-(?P<id>\d+)'
_TESTS = [{
'url': 'https://shahid.mbc.net/ar/shows/%D8%B1%D8%A7%D9%85%D8%B2-%D9%82%D8%B1%D8%B4-%D8%A7%D9%84%D8%A8%D8%AD%D8%B1/show-79187',
'info_dict': {
'id': '79187',
'title': 'رامز قرش البحر',
'description': 'md5:c88fa7e0f02b0abd39d417aee0d046ff',
},
'playlist_mincount': 32,
}, {
'url': 'https://shahid.mbc.net/ar/series/How-to-live-Longer-(The-Big-Think)/series-291861',
'only_matching': True
}]
_PAGE_SIZE = 30
def _real_extract(self, url):
show_id = self._match_id(url)
product = self._call_api(
'playableAsset', show_id, {'showId': show_id})['productModel']
playlist = product['playlist']
playlist_id = playlist['id']
show = product.get('show', {})
def page_func(page_num):
playlist = self._call_api(
'product/playlist', show_id, {
'playListId': playlist_id,
'pageNumber': page_num,
'pageSize': 30,
'sorts': [{
'order': 'DESC',
'type': 'SORTDATE'
}],
})
for product in playlist.get('productList', {}).get('products', []):
product_url = product.get('productUrl', []).get('url')
if not product_url:
continue
yield self.url_result(
product_url, 'Shahid',
str_or_none(product.get('id')),
product.get('title'))
entries = InAdvancePagedList(
page_func,
math.ceil(playlist['count'] / self._PAGE_SIZE),
self._PAGE_SIZE)
return self.playlist_result(
entries, show_id, show.get('title'), show.get('description'))