From 31dbd054c801ec14c1ea29a2167b70c980f1d782 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 3 Sep 2019 01:24:20 +0700 Subject: [PATCH] [platzi] Improve client data extraction (closes #22290) --- youtube_dl/extractor/platzi.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/platzi.py b/youtube_dl/extractor/platzi.py index cd6b966c5..602207beb 100644 --- a/youtube_dl/extractor/platzi.py +++ b/youtube_dl/extractor/platzi.py @@ -107,7 +107,11 @@ class PlatziIE(PlatziBaseIE): data = self._parse_json( self._search_regex( - r'client_data\s*=\s*({.+?})\s*;', webpage, 'client data'), + # client_data may contain "};" so that we have to try more + # strict regex first + (r'client_data\s*=\s*({.+?})\s*;\s*\n', + r'client_data\s*=\s*({.+?})\s*;'), + webpage, 'client data'), lecture_id) material = data['initialState']['material']