From 1b3feca0a700c4d4d7c4d8b01fc5f033cf48f421 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Apr 2017 14:11:03 +0700 Subject: [PATCH] [raiplay] Extract subtitles --- youtube_dl/extractor/rai.py | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 077546a73..81eb9db85 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -97,6 +97,25 @@ class RaiBaseIE(InfoExtractor): 'formats': formats, }.items() if v is not None) + @staticmethod + def _extract_subtitles(url, subtitle_url): + subtitles = {} + if subtitle_url and isinstance(subtitle_url, compat_str): + subtitle_url = urljoin(url, subtitle_url) + STL_EXT = '.stl' + SRT_EXT = '.srt' + subtitles['it'] = [{ + 'ext': 'stl', + 'url': subtitle_url, + }] + if subtitle_url.endswith(STL_EXT): + srt_url = subtitle_url[:-len(STL_EXT)] + SRT_EXT + subtitles['it'].append({ + 'ext': 'srt', + 'url': srt_url, + }) + return subtitles + class RaiPlayIE(RaiBaseIE): _VALID_URL = r'(?Phttps?://(?:www\.)?raiplay\.it/.+?-(?P%s)\.html)' % RaiBaseIE._UUID_RE @@ -168,6 +187,8 @@ class RaiPlayIE(RaiBaseIE): timestamp = unified_timestamp(try_get( media, lambda x: x['availabilities'][0]['start'], compat_str)) + subtitles = self._extract_subtitles(url, video.get('subtitles')) + info = { 'id': video_id, 'title': title, @@ -183,6 +204,7 @@ class RaiPlayIE(RaiBaseIE): 'season_number': int_or_none(try_get( media, lambda x: x['isPartOf']['numeroStagioni'])), 'season': media.get('stagione') or None, + 'subtitles': subtitles, } info.update(relinker_info) @@ -307,17 +329,7 @@ class RaiIE(RaiBaseIE): 'url': compat_urlparse.urljoin(url, thumbnail_url), }) - subtitles = {} - captions = media.get('subtitlesUrl') - if captions: - STL_EXT = '.stl' - SRT_EXT = '.srt' - if captions.endswith(STL_EXT): - captions = captions[:-len(STL_EXT)] + SRT_EXT - subtitles['it'] = [{ - 'ext': 'srt', - 'url': captions, - }] + subtitles = self._extract_subtitles(url, media.get('subtitlesUrl')) info = { 'id': content_id,