From e8de54bce50f6f77a4d7e8e80675f7003d5bf630 Mon Sep 17 00:00:00 2001 From: dirkf Date: Mon, 13 Mar 2023 19:45:54 +0000 Subject: [PATCH] [core] Handle `/../` sequences in HTTP URLs * use Python's RFC implementation for embedded sequences * hack: strip unbalanced leading `../` from path, like eg Firefox See https://github.com/yt-dlp/yt-dlp/issues/3355 --- youtube_dl/YoutubeDL.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 8e8546596..bcf781744 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -39,6 +39,7 @@ from .compat import ( compat_str, compat_tokenize_tokenize, compat_urllib_error, + compat_urllib_parse, compat_urllib_request, compat_urllib_request_DataHandler, ) @@ -60,6 +61,7 @@ from .utils import ( format_bytes, formatSeconds, GeoRestrictedError, + HEADRequest, int_or_none, ISO3166Utils, locked_file, @@ -74,6 +76,7 @@ from .utils import ( preferredencoding, prepend_extension, process_communicate_or_kill, + PUTRequest, register_socks_protocols, render_table, replace_extension, @@ -2297,6 +2300,27 @@ class YoutubeDL(object): """ Start an HTTP download """ if isinstance(req, compat_basestring): req = sanitized_Request(req) + # an embedded /../ sequence is not automatically handled by urllib2 + # see https://github.com/yt-dlp/yt-dlp/issues/3355 + url = req.get_full_url() + parts = url.partition('/../') + if parts[1]: + url = compat_urllib_parse.urljoin(parts[0] + parts[1][:1], parts[1][1:] + parts[2]) + if url: + # worse, URL path may have initial /../ against RFCs: work-around + # by stripping such prefixes, like eg Firefox + parts = compat_urllib_parse.urlsplit(url) + path = parts.path + while path.startswith('/../'): + path = path[3:] + url = parts._replace(path=path).geturl() + # get a new Request with the munged URL + if url != req.get_full_url(): + req_type = {'HEAD': HEADRequest, 'PUT': PUTRequest}.get( + req.get_method(), compat_urllib_request.Request) + req = req_type( + url, data=req.data, headers=dict(req.header_items()), + origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) return self._opener.open(req, timeout=self._socket_timeout) def print_debug_header(self):