From 90d3ce2fadafaf3b7a19d1d6a4cee9f2cd197bb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ram=C3=B3n=20Sola?= Date: Thu, 26 Nov 2020 16:26:21 +0100 Subject: [PATCH 01/12] [atresplayer] fix authentication --- youtube_dl/extractor/atresplayer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index c2cec9845..026541ed3 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -9,6 +9,7 @@ from ..utils import ( ExtractorError, int_or_none, urlencode_postdata, + urljoin, ) @@ -58,7 +59,7 @@ class AtresPlayerIE(InfoExtractor): return self._request_webpage( - self._API_BASE + 'login', None, 'Downloading login page') + urljoin(self._API_BASE, 'login'), None, 'Downloading login page') try: target_url = self._download_json( @@ -72,6 +73,7 @@ class AtresPlayerIE(InfoExtractor): except ExtractorError as e: self._handle_error(e, 400) + target_url = urljoin('https://account.atresmedia.com', target_url) self._request_webpage(target_url, None, 'Following Target URL') def _real_extract(self, url): From bea7f991b4e5214b5fef1c2f6b2a624557533041 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ram=C3=B3n=20Sola?= Date: Thu, 26 Nov 2020 17:27:50 +0100 Subject: [PATCH 02/12] [atresplayer] fix extraction --- youtube_dl/extractor/atresplayer.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 026541ed3..82a9ad083 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -79,9 +79,19 @@ class AtresPlayerIE(InfoExtractor): def _real_extract(self, url): display_id, video_id = re.match(self._VALID_URL, url).groups() + page = self._download_webpage(url, video_id, 'Downloading video page') + preloaded_state_regex = r'window\.__PRELOADED_STATE__\s*=\s*(\{(.*?)\});' + preloaded_state_text = self._html_search_regex(preloaded_state_regex, page, 'preloaded state') + preloaded_state = self._parse_json(preloaded_state_text, video_id) + link_info = next(iter(preloaded_state['links'].values())) + try: - episode = self._download_json( - self._API_BASE + 'client/v1/player/episode/' + video_id, video_id) + metadata = self._download_json(link_info['href'], video_id) + except ExtractorError as e: + self._handle_error(e, 403) + + try: + episode = self._download_json(metadata['urlVideo'], video_id) except ExtractorError as e: self._handle_error(e, 403) From 4110422166a68000909b7243fb6ebdb50270a26a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ram=C3=B3n=20Sola?= Date: Thu, 26 Nov 2020 18:48:27 +0100 Subject: [PATCH 03/12] [atresplayer] preliminary support for subtitles --- youtube_dl/extractor/atresplayer.py | 35 +++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 82a9ad083..45419d205 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -6,10 +6,13 @@ import re from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( + base_url, ExtractorError, int_or_none, urlencode_postdata, urljoin, + xpath_text, + xpath_with_ns, ) @@ -76,6 +79,27 @@ class AtresPlayerIE(InfoExtractor): target_url = urljoin('https://account.atresmedia.com', target_url) self._request_webpage(target_url, None, 'Following Target URL') + def _get_mpd_subtitles(self, mpd_xml, mpd_url): + subs = {} + + def _add_ns(name): + return xpath_with_ns(name, { + 'mpd': 'urn:mpeg:dash:schema:mpd:2011' + }) + + text_nodes = mpd_xml.findall( + _add_ns('./mpd:Period/mpd:AdaptationSet[@contentType="text"]')) + for node in text_nodes: + lang = node.attrib['lang'] + url = xpath_text( + node, _add_ns('./mpd:Representation[@mimeType="text/vtt"]/mpd:BaseURL')) + if url: + subs.update({lang: [{ + 'ext': 'vtt', + 'url': urljoin(mpd_url, url), + }]}) + return subs + def _real_extract(self, url): display_id, video_id = re.match(self._VALID_URL, url).groups() @@ -98,6 +122,7 @@ class AtresPlayerIE(InfoExtractor): title = episode['titulo'] formats = [] + subtitles = {} for source in episode.get('sources', []): src = source.get('src') if not src: @@ -108,8 +133,13 @@ class AtresPlayerIE(InfoExtractor): src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) elif src_type == 'application/dash+xml': - formats.extend(self._extract_mpd_formats( - src, video_id, mpd_id='dash', fatal=False)) + mpd_doc, mpd_handle = self._download_xml_handle( + src, video_id, note='Downloading MPD manifest', fatal=False) + if mpd_doc is not None: + mpd_base_url = base_url(mpd_handle.geturl()) + subtitles.update(self._get_mpd_subtitles(mpd_doc, mpd_base_url)) + formats.extend(self._parse_mpd_formats( + mpd_doc, mpd_id='dash', mpd_base_url=mpd_base_url, mpd_url=src)) self._sort_formats(formats) heartbeat = episode.get('heartbeat') or {} @@ -127,4 +157,5 @@ class AtresPlayerIE(InfoExtractor): 'channel': get_meta('channel'), 'season': get_meta('season'), 'episode_number': int_or_none(get_meta('episodeNumber')), + 'subtitles': subtitles, } From 4a47be85cd0a0d0034e70e86e5a203e91f3ab15e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ram=C3=B3n=20Sola?= Date: Tue, 8 Dec 2020 23:01:44 +0100 Subject: [PATCH 04/12] [atresplayer] fix regex to match more valid URLs --- youtube_dl/extractor/atresplayer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 45419d205..79a59b13d 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -17,7 +17,7 @@ from ..utils import ( class AtresPlayerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P.+?)_(?P[0-9a-f]{24})' + _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+(?:/[^/]+)?/(?P.+?)_(?P[0-9a-f]{24})' _NETRC_MACHINE = 'atresplayer' _TESTS = [ { From 265b7ec6e9f62c7c73cdf7fbdc9cc90895b642e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ram=C3=B3n=20Sola?= Date: Sat, 26 Dec 2020 18:46:45 +0100 Subject: [PATCH 05/12] [atresplayer] fix subtitles URL lookup for some videos --- youtube_dl/extractor/atresplayer.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 79a59b13d..1c6024fd7 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -11,6 +11,7 @@ from ..utils import ( int_or_none, urlencode_postdata, urljoin, + xpath_element, xpath_text, xpath_with_ns, ) @@ -87,16 +88,19 @@ class AtresPlayerIE(InfoExtractor): 'mpd': 'urn:mpeg:dash:schema:mpd:2011' }) + def _is_mime_type(node, mime_type): + return node.attrib.get('mimeType') == mime_type + text_nodes = mpd_xml.findall( - _add_ns('./mpd:Period/mpd:AdaptationSet[@contentType="text"]')) - for node in text_nodes: - lang = node.attrib['lang'] - url = xpath_text( - node, _add_ns('./mpd:Representation[@mimeType="text/vtt"]/mpd:BaseURL')) - if url: + _add_ns('mpd:Period/mpd:AdaptationSet[@contentType="text"]')) + for adaptation_set in text_nodes: + lang = adaptation_set.attrib['lang'] + representation = xpath_element(adaptation_set, _add_ns('mpd:Representation')) + subs_url = xpath_text(representation, _add_ns('mpd:BaseURL')) + if subs_url and (_is_mime_type(adaptation_set, 'text/vtt') or _is_mime_type(representation, 'text/vtt')): subs.update({lang: [{ 'ext': 'vtt', - 'url': urljoin(mpd_url, url), + 'url': urljoin(mpd_url, subs_url), }]}) return subs From e0e9bb2aa1fa7bc37d873e865579ecd44310b563 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ram=C3=B3n=20Sola?= Date: Fri, 15 Jan 2021 16:23:14 +0100 Subject: [PATCH 06/12] [atresplayer] fix MPD download failure: don't unpack too early --- youtube_dl/extractor/atresplayer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 1c6024fd7..f317559d2 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -137,9 +137,10 @@ class AtresPlayerIE(InfoExtractor): src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) elif src_type == 'application/dash+xml': - mpd_doc, mpd_handle = self._download_xml_handle( + mpd = self._download_xml_handle( src, video_id, note='Downloading MPD manifest', fatal=False) - if mpd_doc is not None: + if mpd: + mpd_doc, mpd_handle = mpd mpd_base_url = base_url(mpd_handle.geturl()) subtitles.update(self._get_mpd_subtitles(mpd_doc, mpd_base_url)) formats.extend(self._parse_mpd_formats( From 587ce9ac92a3e020e3196db42a96351a948561da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ram=C3=B3n=20Sola?= Date: Fri, 21 May 2021 01:24:06 +0200 Subject: [PATCH 07/12] [atresplayer] make login page download errors non-fatal --- youtube_dl/extractor/atresplayer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index f317559d2..3d35adb50 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -63,7 +63,7 @@ class AtresPlayerIE(InfoExtractor): return self._request_webpage( - urljoin(self._API_BASE, 'login'), None, 'Downloading login page') + urljoin(self._API_BASE, 'login'), None, 'Downloading login page', fatal=False) try: target_url = self._download_json( From a4a8c926ab887ccfba40e904591ccc2aa004ceaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ram=C3=B3n=20Sola?= Date: Mon, 24 May 2021 05:05:13 +0200 Subject: [PATCH 08/12] [atresplayer] login no longer worked, fixed --- youtube_dl/extractor/atresplayer.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 3d35adb50..b67f08ab9 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -62,24 +62,21 @@ class AtresPlayerIE(InfoExtractor): if username is None: return - self._request_webpage( - urljoin(self._API_BASE, 'login'), None, 'Downloading login page', fatal=False) - try: - target_url = self._download_json( - 'https://account.atresmedia.com/api/login', None, + self._download_json( + urljoin(self._API_BASE, 'login'), None, 'Logging in', headers={ 'Content-Type': 'application/x-www-form-urlencoded' }, data=urlencode_postdata({ 'username': username, 'password': password, - }))['targetUrl'] + 'type': 'credentials', + })) except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + raise ExtractorError('Authentication failure', expected=True) self._handle_error(e, 400) - target_url = urljoin('https://account.atresmedia.com', target_url) - self._request_webpage(target_url, None, 'Following Target URL') - def _get_mpd_subtitles(self, mpd_xml, mpd_url): subs = {} From 1a24c71595d7266ec91a599b2470c5d7d3c2fa8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ram=C3=B3n=20Sola?= Date: Thu, 29 Jul 2021 13:27:45 +0200 Subject: [PATCH 09/12] [atresplayer] changed auth endpoint, _API_BASE no longer needed --- youtube_dl/extractor/atresplayer.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index b67f08ab9..28013eb13 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -44,7 +44,6 @@ class AtresPlayerIE(InfoExtractor): 'only_matching': True, }, ] - _API_BASE = 'https://api.atresplayer.com/' def _real_initialize(self): self._login() @@ -63,14 +62,13 @@ class AtresPlayerIE(InfoExtractor): return try: - self._download_json( - urljoin(self._API_BASE, 'login'), None, + self._download_webpage( + 'https://account.atresplayer.com/auth/v1/login', None, 'Logging in', headers={ 'Content-Type': 'application/x-www-form-urlencoded' }, data=urlencode_postdata({ 'username': username, 'password': password, - 'type': 'credentials', })) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: From 00f4ab1a8d75e0650412b4e3060d52f4059e408c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ram=C3=B3n=20Sola?= Date: Thu, 29 Jul 2021 14:27:47 +0200 Subject: [PATCH 10/12] [atresplayer] wrong credentials cause HTTP status 400 instead --- youtube_dl/extractor/atresplayer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 28013eb13..178514767 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -71,7 +71,7 @@ class AtresPlayerIE(InfoExtractor): 'password': password, })) except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: raise ExtractorError('Authentication failure', expected=True) self._handle_error(e, 400) From 25aa60a913bdc2904574bc54f84dabb783e82a6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ram=C3=B3n=20Sola?= Date: Mon, 4 Oct 2021 13:16:00 +0200 Subject: [PATCH 11/12] [atresplayer] Handle invalid_request (auth fail) in handle_error --- youtube_dl/extractor/atresplayer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 178514767..a9ac4a669 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -53,6 +53,8 @@ class AtresPlayerIE(InfoExtractor): error = self._parse_json(e.cause.read(), None) if error.get('error') == 'required_registered': self.raise_login_required() + if error.get('error') == 'invalid_request': + raise ExtractorError('Authentication failed', expected=True) raise ExtractorError(error['error_description'], expected=True) raise @@ -71,8 +73,6 @@ class AtresPlayerIE(InfoExtractor): 'password': password, })) except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: - raise ExtractorError('Authentication failure', expected=True) self._handle_error(e, 400) def _get_mpd_subtitles(self, mpd_xml, mpd_url): From 7e51ac7712d122beff12d2ef8483b1951f325747 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ram=C3=B3n=20Sola?= Date: Mon, 4 Oct 2021 13:17:11 +0200 Subject: [PATCH 12/12] [atresplayer] call _merge_subtitles instead of update --- youtube_dl/extractor/atresplayer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index a9ac4a669..75370c5ba 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -137,7 +137,7 @@ class AtresPlayerIE(InfoExtractor): if mpd: mpd_doc, mpd_handle = mpd mpd_base_url = base_url(mpd_handle.geturl()) - subtitles.update(self._get_mpd_subtitles(mpd_doc, mpd_base_url)) + subtitles = self._merge_subtitles(subtitles, self._get_mpd_subtitles(mpd_doc, mpd_base_url)) formats.extend(self._parse_mpd_formats( mpd_doc, mpd_id='dash', mpd_base_url=mpd_base_url, mpd_url=src)) self._sort_formats(formats)