From a693386df1957ba03cbf5156a65dd18b2c37ac42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 21 Apr 2018 23:22:10 +0700 Subject: [PATCH] [rentv] Improve extraction (closes #15227) --- youtube_dl/extractor/rentv.py | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/rentv.py b/youtube_dl/extractor/rentv.py index df528b09e..8bcf87126 100644 --- a/youtube_dl/extractor/rentv.py +++ b/youtube_dl/extractor/rentv.py @@ -3,6 +3,10 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_str +from ..utils import ( + determine_ext, + int_or_none, +) class RENTVIE(InfoExtractor): @@ -13,7 +17,9 @@ class RENTVIE(InfoExtractor): 'info_dict': { 'id': '118577', 'ext': 'mp4', - 'title': 'Документальный спецпроект: "Промывка мозгов. Технологии XXI века"' + 'title': 'Документальный спецпроект: "Промывка мозгов. Технологии XXI века"', + 'timestamp': 1472230800, + 'upload_date': '20160826', } }, { 'url': 'http://ren.tv/player/118577', @@ -27,18 +33,31 @@ class RENTVIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage('http://ren.tv/player/' + video_id, video_id) config = self._parse_json(self._search_regex( - r'config\s*=\s*({.+});', webpage, 'config'), video_id) + r'config\s*=\s*({.+})\s*;', webpage, 'config'), video_id) + title = config['title'] formats = [] - for video in config.get('src', ''): - formats.append({ - 'url': video.get('src', '') - }) + for video in config['src']: + src = video.get('src') + if not src or not isinstance(src, compat_str): + continue + ext = determine_ext(src) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + src, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': src, + }) self._sort_formats(formats) return { 'id': video_id, + 'title': title, + 'description': config.get('description'), + 'thumbnail': config.get('image'), + 'duration': int_or_none(config.get('duration')), + 'timestamp': int_or_none(config.get('date')), 'formats': formats, - 'title': config.get('title', ''), - 'thumbnail': config.get('image', '') }