From 6f8c2635a573c84ef66c02f73b4aeff1cc36ae4e Mon Sep 17 00:00:00 2001 From: fonkap Date: Sat, 11 Feb 2023 03:54:45 +0100 Subject: [PATCH] [StreamsbIE] Add extractor for streamsb.com (viewsb.com) (#31517) * Add extractor for streamsb.com (viewsb.com) * make data url using app.js version --------- Co-authored-by: dirkf --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/streamsb.py | 61 ++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 youtube_dl/extractor/streamsb.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d8428f46f..3a87f9e33 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1206,6 +1206,7 @@ from .storyfire import ( from .streamable import StreamableIE from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE +from .streamsb import StreamsbIE from .streetvoice import StreetVoiceIE from .stretchinternet import StretchInternetIE from .stv import STVPlayerIE diff --git a/youtube_dl/extractor/streamsb.py b/youtube_dl/extractor/streamsb.py new file mode 100644 index 000000000..bffcb3de1 --- /dev/null +++ b/youtube_dl/extractor/streamsb.py @@ -0,0 +1,61 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import binascii +import random +import re +import string + +from .common import InfoExtractor +from ..utils import urljoin, url_basename + + +def to_ascii_hex(str1): + return binascii.hexlify(str1.encode('utf-8')).decode('ascii') + + +def generate_random_string(length): + return ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(length)) + + +class StreamsbIE(InfoExtractor): + _DOMAINS = ('viewsb.com', ) + _VALID_URL = r'https://(?P%s)/(?P.+)' % '|'.join(_DOMAINS) + _TEST = { + 'url': 'https://viewsb.com/dxfvlu4qanjx', + 'md5': '488d111a63415369bf90ea83adc8a325', + 'info_dict': { + 'id': 'dxfvlu4qanjx', + 'ext': 'mp4', + 'title': 'Sintel' + } + } + + def _real_extract(self, url): + domain, video_id = re.match(self._VALID_URL, url).group('domain', 'id') + webpage = self._download_webpage(url, video_id) + + iframe_rel_url = self._search_regex(r'''(?i)]+\bsrc\s*=\s*('|")(?P/.*\.html)\1''', webpage, 'iframe', group='path') + iframe_url = urljoin('https://' + domain, iframe_rel_url) + + iframe_data = self._download_webpage(iframe_url, video_id) + app_version = self._search_regex(r''']+\bsrc\s*=\s*["|'].*/app\.min\.(\d+)\.js''', iframe_data, 'app version', fatal=False) or '50' + + video_code = url_basename(iframe_url).rsplit('.')[0] + + length = 12 + req = '||'.join((generate_random_string(length), video_code, generate_random_string(length), 'streamsb')) + ereq = 'https://{0}/sources{1}/{2}'.format(domain, app_version, to_ascii_hex(req)) + + video_data = self._download_webpage(ereq, video_id, headers={ + 'Referer': iframe_url, + 'watchsb': 'sbstream', + }) + player_data = self._parse_json(video_data, video_id) + title = player_data['stream_data']['title'] + formats = self._extract_m3u8_formats(player_data['stream_data']['file'], video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) + return { + 'id': video_id, + 'formats': formats, + 'title': title, + }