From 1980ff4550a3f040fbc1e054d6b91013e9d8cb96 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 26 May 2021 11:04:39 +0100 Subject: [PATCH 01/17] [vimeo] fix vimeo pro embed extraction(closes #29126) --- youtube_dl/extractor/vimeo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 102687b82..0b386f450 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -647,7 +647,7 @@ class VimeoIE(VimeoBaseInfoExtractor): expected=True) raise - if '://player.vimeo.com/video/' in url: + if '//player.vimeo.com/video/' in url: config = self._parse_json(self._search_regex( r'\bconfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id) if config.get('view') == 4: From 24297a42efc52862cb9510d32b28efd7faf49af6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 30 May 2021 00:36:26 +0700 Subject: [PATCH 02/17] [youtube] Fix get_video_info request (closes #29086, closes #29165) --- youtube_dl/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0c52e5a8b..bf858c39d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1499,6 +1499,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'unable to download video info webpage', query={ 'video_id': video_id, 'eurl': 'https://youtube.googleapis.com/v/' + video_id, + 'html5': 1, }, fatal=False)), lambda x: x['player_response'][0], compat_str) or '{}', video_id) From e13a01061d149f4fac7db1a50124c4745a11c16e Mon Sep 17 00:00:00 2001 From: phlip Date: Fri, 28 May 2021 11:01:59 +1000 Subject: [PATCH 03/17] [twitch:clips] Add access token query to download URLs (closes #29136) --- youtube_dl/extractor/twitch.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index a7867f4d3..7f9738d43 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -49,6 +49,7 @@ class TwitchBaseIE(InfoExtractor): 'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84', 'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e', 'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01', + 'VideoAccessToken_Clip': '36b89d2507fce29e5ca551df756d27c1cfe079e2609642b4390aa4c35796eb11', 'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c', 'VideoMetadata': '226edb3e692509f727fd56821f5653c05740242c82b0388883e0c0e75dcbf687', } @@ -924,6 +925,17 @@ class TwitchClipsIE(TwitchBaseIE): raise ExtractorError( 'This clip is no longer available', expected=True) + access_token = self._download_gql( + video_id, [{ + 'operationName': 'VideoAccessToken_Clip', + 'variables': { + 'slug': video_id, + }, + }], + 'Downloading access token GraphQL') + access_token = try_get( + access_token, lambda x: x[0]['data']['clip']['playbackAccessToken']) + formats = [] for option in clip.get('videoQualities', []): if not isinstance(option, dict): @@ -931,6 +943,14 @@ class TwitchClipsIE(TwitchBaseIE): source = url_or_none(option.get('sourceURL')) if not source: continue + if access_token: + source = "%s%s%s" % ( + source, + "&" if "?" in source else "?", + compat_urllib_parse_urlencode({ + "sig": access_token.get('signature'), + "token": access_token.get('value'), + })) formats.append({ 'url': source, 'format_id': option.get('quality'), From f3cd1d9cec91943a459a0662cbcffe3b2e1f6675 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 30 May 2021 01:46:49 +0700 Subject: [PATCH 04/17] [twitch:clips] Improve extraction (closes #29149) --- youtube_dl/extractor/twitch.py | 48 ++++++++++++++++------------------ 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 7f9738d43..a378bd6dc 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -894,7 +894,25 @@ class TwitchClipsIE(TwitchBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - clip = self._download_base_gql( + clip = self._download_gql( + video_id, [{ + 'operationName': 'VideoAccessToken_Clip', + 'variables': { + 'slug': video_id, + }, + }], + 'Downloading clip access token GraphQL')[0]['data']['clip'] + + if not clip: + raise ExtractorError( + 'This clip is no longer available', expected=True) + + access_query = { + 'sig': clip['playbackAccessToken']['signature'], + 'token': clip['playbackAccessToken']['value'], + } + + data = self._download_base_gql( video_id, { 'query': '''{ clip(slug: "%s") { @@ -919,22 +937,10 @@ class TwitchClipsIE(TwitchBaseIE): } viewCount } -}''' % video_id}, 'Downloading clip GraphQL')['data']['clip'] +}''' % video_id}, 'Downloading clip GraphQL', fatal=False) - if not clip: - raise ExtractorError( - 'This clip is no longer available', expected=True) - - access_token = self._download_gql( - video_id, [{ - 'operationName': 'VideoAccessToken_Clip', - 'variables': { - 'slug': video_id, - }, - }], - 'Downloading access token GraphQL') - access_token = try_get( - access_token, lambda x: x[0]['data']['clip']['playbackAccessToken']) + if data: + clip = try_get(data, lambda x: x['data']['clip'], dict) or clip formats = [] for option in clip.get('videoQualities', []): @@ -943,16 +949,8 @@ class TwitchClipsIE(TwitchBaseIE): source = url_or_none(option.get('sourceURL')) if not source: continue - if access_token: - source = "%s%s%s" % ( - source, - "&" if "?" in source else "?", - compat_urllib_parse_urlencode({ - "sig": access_token.get('signature'), - "token": access_token.get('value'), - })) formats.append({ - 'url': source, + 'url': update_url_query(source, access_query), 'format_id': option.get('quality'), 'height': int_or_none(option.get('quality')), 'fps': int_or_none(option.get('frameRate')), From 6511b8e8d7db78d4ba3706df5122a74e1c9b9b57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 30 May 2021 03:05:22 +0700 Subject: [PATCH 05/17] [ted] Prefer own formats over external sources (closes #29142) --- youtube_dl/extractor/ted.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 63e2455b2..f09f1a3f9 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -123,6 +123,10 @@ class TEDIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + # with own formats and private Youtube external + 'url': 'https://www.ted.com/talks/spencer_wells_a_family_tree_for_humanity', + 'only_matching': True, }] _NATIVE_FORMATS = { @@ -210,16 +214,6 @@ class TEDIE(InfoExtractor): player_talk = talk_info['player_talks'][0] - external = player_talk.get('external') - if isinstance(external, dict): - service = external.get('service') - if isinstance(service, compat_str): - ext_url = None - if service.lower() == 'youtube': - ext_url = external.get('code') - - return self.url_result(ext_url or external['uri']) - resources_ = player_talk.get('resources') or talk_info.get('resources') http_url = None @@ -294,6 +288,16 @@ class TEDIE(InfoExtractor): 'vcodec': 'none', }) + if not formats: + external = player_talk.get('external') + if isinstance(external, dict): + service = external.get('service') + if isinstance(service, compat_str): + ext_url = None + if service.lower() == 'youtube': + ext_url = external.get('code') + return self.url_result(ext_url or external['uri']) + self._sort_formats(formats) video_id = compat_str(talk_info['id']) From 2ee6c7f11074917c08253af4c47f9258aa1e0dad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 30 May 2021 03:43:59 +0700 Subject: [PATCH 06/17] [ustream] Detect https embeds (closes #29133) --- youtube_dl/extractor/ustream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index 9e860aeb7..1e29cbe22 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -75,7 +75,7 @@ class UstreamIE(InfoExtractor): @staticmethod def _extract_url(webpage): mobj = re.search( - r']+?src=(["\'])(?Phttp://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/embed/.+?)\1', webpage) + r']+?src=(["\'])(?Phttps?://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/embed/.+?)\1', webpage) if mobj is not None: return mobj.group('url') From d495292852b6c2f1bd58bc2141ff2b0265c952cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 30 May 2021 06:14:59 +0700 Subject: [PATCH 07/17] [ard] Relax _VALID_URL and fix video ids (closes #22724, closes #29091) --- youtube_dl/extractor/ard.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index d57c5ba0f..d45a9fe52 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -249,14 +249,14 @@ class ARDMediathekIE(ARDMediathekBaseIE): class ARDIE(InfoExtractor): - _VALID_URL = r'(?Phttps?://(?:www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P[^/?#]+)-(?:video-?)?(?P[0-9]+))\.html' + _VALID_URL = r'(?Phttps?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P[^/?#&]+))\.html' _TESTS = [{ # available till 7.01.2022 'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html', 'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1', 'info_dict': { - 'display_id': 'maischberger-die-woche', - 'id': '100', + 'id': 'maischberger-die-woche-video100', + 'display_id': 'maischberger-die-woche-video100', 'ext': 'mp4', 'duration': 3687.0, 'title': 'maischberger. die woche vom 7. Januar 2021', @@ -264,16 +264,25 @@ class ARDIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', }, }, { - 'url': 'https://www.daserste.de/information/reportage-dokumentation/erlebnis-erde/videosextern/woelfe-und-herdenschutzhunde-ungleiche-brueder-102.html', + 'url': 'https://www.daserste.de/information/politik-weltgeschehen/morgenmagazin/videosextern/dominik-kahun-aus-der-nhl-direkt-zur-weltmeisterschaft-100.html', + 'only_matching': True, + }, { + 'url': 'https://www.daserste.de/information/nachrichten-wetter/tagesthemen/videosextern/tagesthemen-17736.html', 'only_matching': True, }, { 'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html', 'only_matching': True, + }, { + 'url': 'https://www.daserste.de/unterhaltung/serie/in-aller-freundschaft-die-jungen-aerzte/Drehpause-100.html', + 'only_matching': True, + }, { + 'url': 'https://www.daserste.de/unterhaltung/film/filmmittwoch-im-ersten/videos/making-ofwendezeit-video-100.html', + 'only_matching': True, }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') + display_id = mobj.group('id') player_url = mobj.group('mainurl') + '~playerXml.xml' doc = self._download_xml(player_url, display_id) @@ -324,7 +333,7 @@ class ARDIE(InfoExtractor): self._sort_formats(formats) return { - 'id': mobj.group('id'), + 'id': xpath_text(video_node, './videoId', default=display_id), 'formats': formats, 'display_id': display_id, 'title': video_node.find('./title').text, From 82f3993ba3f4d435d3bc9e37426ab225f5549510 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 4 Jun 2021 17:51:44 +0100 Subject: [PATCH 08/17] [formula1] fix extraction(closes #29206) --- youtube_dl/extractor/formula1.py | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/formula1.py b/youtube_dl/extractor/formula1.py index fecfc28ae..67662e6de 100644 --- a/youtube_dl/extractor/formula1.py +++ b/youtube_dl/extractor/formula1.py @@ -5,29 +5,23 @@ from .common import InfoExtractor class Formula1IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?formula1\.com/(?:content/fom-website/)?en/video/\d{4}/\d{1,2}/(?P.+?)\.html' - _TESTS = [{ - 'url': 'http://www.formula1.com/content/fom-website/en/video/2016/5/Race_highlights_-_Spain_2016.html', - 'md5': '8c79e54be72078b26b89e0e111c0502b', + _VALID_URL = r'https?://(?:www\.)?formula1\.com/en/latest/video\.[^.]+\.(?P\d+)\.html' + _TEST = { + 'url': 'https://www.formula1.com/en/latest/video.race-highlights-spain-2016.6060988138001.html', + 'md5': 'be7d3a8c2f804eb2ab2aa5d941c359f8', 'info_dict': { - 'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV', + 'id': '6060988138001', 'ext': 'mp4', 'title': 'Race highlights - Spain 2016', + 'timestamp': 1463332814, + 'upload_date': '20160515', + 'uploader_id': '6057949432001', }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - 'add_ie': ['Ooyala'], - }, { - 'url': 'http://www.formula1.com/en/video/2016/5/Race_highlights_-_Spain_2016.html', - 'only_matching': True, - }] + 'add_ie': ['BrightcoveNew'], + } + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/6057949432001/S1WMrhjlh_default/index.html?videoId=%s' def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - ooyala_embed_code = self._search_regex( - r'data-videoid="([^"]+)"', webpage, 'ooyala embed code') + bc_id = self._match_id(url) return self.url_result( - 'ooyala:%s' % ooyala_embed_code, 'Ooyala', ooyala_embed_code) + self.BRIGHTCOVE_URL_TEMPLATE % bc_id, 'BrightcoveNew', bc_id) From 943070af4a9e13ef2b81c5e484d9c799f1845aab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 5 Jun 2021 23:42:25 +0700 Subject: [PATCH 09/17] [orf:tvthek] Fix thumbnails extraction (closes #29217) --- youtube_dl/extractor/orf.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 3fadbcbea..ed8a9a841 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -140,6 +140,25 @@ class ORFTVthekIE(InfoExtractor): }) upload_date = unified_strdate(sd.get('created_date')) + + thumbnails = [] + preview = sd.get('preview_image_url') + if preview: + thumbnails.append({ + 'id': 'preview', + 'url': preview, + 'preference': 0, + }) + image = sd.get('image_full_url') + if not image and len(data_jsb) == 1: + image = self._og_search_thumbnail(webpage) + if image: + thumbnails.append({ + 'id': 'full', + 'url': image, + 'preference': 1, + }) + entries.append({ '_type': 'video', 'id': video_id, @@ -149,7 +168,7 @@ class ORFTVthekIE(InfoExtractor): 'description': sd.get('description'), 'duration': int_or_none(sd.get('duration_in_seconds')), 'upload_date': upload_date, - 'thumbnail': sd.get('image_full_url'), + 'thumbnails': thumbnails, }) return { From fdf91c52a8b58b3b7c12a393629fc962d6ab7618 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 6 Jun 2021 00:11:09 +0700 Subject: [PATCH 10/17] [youporn] Fix formats and view count extraction (closes #29216) --- youtube_dl/extractor/youporn.py | 111 ++++++++++++-------------------- 1 file changed, 42 insertions(+), 69 deletions(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index 33114363d..7084d3d12 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -4,13 +4,12 @@ import re from .common import InfoExtractor from ..utils import ( + extract_attributes, int_or_none, str_to_int, - unescapeHTML, unified_strdate, url_or_none, ) -from ..aes import aes_decrypt_text class YouPornIE(InfoExtractor): @@ -34,6 +33,7 @@ class YouPornIE(InfoExtractor): 'tags': list, 'age_limit': 18, }, + 'skip': 'This video has been disabled', }, { # Unknown uploader 'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4', @@ -78,6 +78,40 @@ class YouPornIE(InfoExtractor): video_id = mobj.group('id') display_id = mobj.group('display_id') or video_id + definitions = self._download_json( + 'https://www.youporn.com/api/video/media_definitions/%s/' % video_id, + display_id) + + formats = [] + for definition in definitions: + if not isinstance(definition, dict): + continue + video_url = url_or_none(definition.get('videoUrl')) + if not video_url: + continue + f = { + 'url': video_url, + 'filesize': int_or_none(definition.get('videoSize')), + } + height = int_or_none(definition.get('quality')) + # Video URL's path looks like this: + # /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4 + # /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4 + # /videos/201703/11/109285532/1080P_4000K_109285532.mp4 + # We will benefit from it by extracting some metadata + mobj = re.search(r'(?P\d{3,4})[pP]_(?P\d+)[kK]_\d+', video_url) + if mobj: + if not height: + height = int(mobj.group('height')) + bitrate = int(mobj.group('bitrate')) + f.update({ + 'format_id': '%dp-%dk' % (height, bitrate), + 'tbr': bitrate, + }) + f['height'] = height + formats.append(f) + self._sort_formats(formats) + webpage = self._download_webpage( 'http://www.youporn.com/watch/%s' % video_id, display_id, headers={'Cookie': 'age_verified=1'}) @@ -88,65 +122,6 @@ class YouPornIE(InfoExtractor): webpage, default=None) or self._html_search_meta( 'title', webpage, fatal=True) - links = [] - - # Main source - definitions = self._parse_json( - self._search_regex( - r'mediaDefinition\s*[=:]\s*(\[.+?\])\s*[;,]', webpage, - 'media definitions', default='[]'), - video_id, fatal=False) - if definitions: - for definition in definitions: - if not isinstance(definition, dict): - continue - video_url = url_or_none(definition.get('videoUrl')) - if video_url: - links.append(video_url) - - # Fallback #1, this also contains extra low quality 180p format - for _, link in re.findall(r']+href=(["\'])(http(?:(?!\1).)+\.mp4(?:(?!\1).)*)\1[^>]+title=["\']Download [Vv]ideo', webpage): - links.append(link) - - # Fallback #2 (unavailable as at 22.06.2017) - sources = self._search_regex( - r'(?s)sources\s*:\s*({.+?})', webpage, 'sources', default=None) - if sources: - for _, link in re.findall(r'[^:]+\s*:\s*(["\'])(http.+?)\1', sources): - links.append(link) - - # Fallback #3 (unavailable as at 22.06.2017) - for _, link in re.findall( - r'(?:videoSrc|videoIpadUrl|html5PlayerSrc)\s*[:=]\s*(["\'])(http.+?)\1', webpage): - links.append(link) - - # Fallback #4, encrypted links (unavailable as at 22.06.2017) - for _, encrypted_link in re.findall( - r'encryptedQuality\d{3,4}URL\s*=\s*(["\'])([\da-zA-Z+/=]+)\1', webpage): - links.append(aes_decrypt_text(encrypted_link, title, 32).decode('utf-8')) - - formats = [] - for video_url in set(unescapeHTML(link) for link in links): - f = { - 'url': video_url, - } - # Video URL's path looks like this: - # /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4 - # /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4 - # /videos/201703/11/109285532/1080P_4000K_109285532.mp4 - # We will benefit from it by extracting some metadata - mobj = re.search(r'(?P\d{3,4})[pP]_(?P\d+)[kK]_\d+', video_url) - if mobj: - height = int(mobj.group('height')) - bitrate = int(mobj.group('bitrate')) - f.update({ - 'format_id': '%dp-%dk' % (height, bitrate), - 'height': height, - 'tbr': bitrate, - }) - formats.append(f) - self._sort_formats(formats) - description = self._html_search_regex( r'(?s)]+\bid=["\']description["\'][^>]*>(.+?)', webpage, 'description', @@ -169,13 +144,12 @@ class YouPornIE(InfoExtractor): age_limit = self._rta_search(webpage) - average_rating = int_or_none(self._search_regex( - r']+class=["\']videoRatingPercentage["\'][^>]*>(\d+)%', - webpage, 'average rating', fatal=False)) - - view_count = str_to_int(self._search_regex( - r'(?s)]+class=(["\']).*?\bvideoInfoViews\b.*?\1[^>]*>.*?(?P[\d,.]+)<', - webpage, 'view count', fatal=False, group='count')) + view_count = None + views = self._search_regex( + r'(]+\bclass=["\']js_videoInfoViews["\']>)', webpage, + 'views', default=None) + if views: + view_count = str_to_int(extract_attributes(views).get('data-value')) comment_count = str_to_int(self._search_regex( r'>All [Cc]omments? \(([\d,.]+)\)', webpage, 'comment count', default=None)) @@ -201,7 +175,6 @@ class YouPornIE(InfoExtractor): 'duration': duration, 'uploader': uploader, 'upload_date': upload_date, - 'average_rating': average_rating, 'view_count': view_count, 'comment_count': comment_count, 'categories': categories, From bb7ac1ed669d67d79fa1a3b9e5c70271892ecbcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 6 Jun 2021 01:16:43 +0700 Subject: [PATCH 11/17] [facebook] Improve login required detection --- youtube_dl/extractor/facebook.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index cb34c59f5..04650af39 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -521,7 +521,10 @@ class FacebookIE(InfoExtractor): raise ExtractorError( 'The video is not available, Facebook said: "%s"' % m_msg.group(1), expected=True) - elif '>You must log in to continue' in webpage: + elif any(p in webpage for p in ( + '>You must log in to continue', + 'id="login_form"', + 'id="loginbutton"')): self.raise_login_required() if not video_data and '/watchparty/' in url: From 5f85eb820cb7eb89dcb567f9cbfefb5d9038b9c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 6 Jun 2021 01:32:15 +0700 Subject: [PATCH 12/17] [ChangeLog] Actualize [ci skip] --- ChangeLog | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ChangeLog b/ChangeLog index 5ea1d3150..06efe32ab 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,21 @@ +version + +Extractors +* [facebook] Improve login required detection +* [youporn] Fix formats and view count extraction (#29216) +* [orf:tvthek] Fix thumbnails extraction (#29217) +* [formula1] Fix extraction (#29206) +* [ard] Relax URL regular expression and fix video ids (#22724, #29091) ++ [ustream] Detect https embeds (#29133) +* [ted] Prefer own formats over external sources (#29142) +* [twitch:clips] Improve extraction (#29149) ++ [twitch:clips] Add access token query to download URLs (#29136) +* [youtube] Fix get_video_info request (#29086, #29165) +* [vimeo] Fix vimeo pro embed extraction (#29126) +* [redbulltv] Fix embed data extraction (#28770) +* [shahid] Relax URL regular expression (#28772, #28930) + + version 2021.05.16 Core From b224cf39d53bd16bcfda2ac493712c3ff449ecb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 6 Jun 2021 01:38:22 +0700 Subject: [PATCH 13/17] release 2021.06.06 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index d67bb482c..4eb505231 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2021.05.16** +- [ ] I've verified that I'm running youtube-dl version **2021.06.06** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2021.05.16 + [debug] youtube-dl version 2021.06.06 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index efe9fef8c..9fed0b489 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2021.05.16** +- [ ] I've verified that I'm running youtube-dl version **2021.06.06** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index e213fc1a9..573e8ded0 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2021.05.16** +- [ ] I've verified that I'm running youtube-dl version **2021.06.06** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 1645087ad..c0031bf7a 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2021.05.16** +- [ ] I've verified that I'm running youtube-dl version **2021.06.06** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2021.05.16 + [debug] youtube-dl version 2021.06.06 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index e6e569af6..1138ab2ca 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2021.05.16** +- [ ] I've verified that I'm running youtube-dl version **2021.06.06** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 06efe32ab..680fffdf8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2021.06.06 Extractors * [facebook] Improve login required detection diff --git a/youtube_dl/version.py b/youtube_dl/version.py index b16a84100..461dd87ca 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2021.05.16' +__version__ = '2021.06.06' From c2350cac243ba1ec1586fe85b0d62d1b700047a2 Mon Sep 17 00:00:00 2001 From: Sergey M Date: Sun, 6 Jun 2021 05:32:27 +0700 Subject: [PATCH 14/17] [README.md] Update MSVC 2010 redist URL (closes #29222) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 059141611..2841ed68f 100644 --- a/README.md +++ b/README.md @@ -893,7 +893,7 @@ Since June 2012 ([#342](https://github.com/ytdl-org/youtube-dl/issues/342)) yout ### The exe throws an error due to missing `MSVCR100.dll` -To run the exe you need to install first the [Microsoft Visual C++ 2010 Redistributable Package (x86)](https://www.microsoft.com/en-US/download/details.aspx?id=5555). +To run the exe you need to install first the [Microsoft Visual C++ 2010 Service Pack 1 Redistributable Package (x86)](https://download.microsoft.com/download/1/6/5/165255E7-1014-4D0A-B094-B6A430A6BFFC/vcredist_x86.exe). ### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files? From d156bc8d59dd469bf70b822926504f213ce237de Mon Sep 17 00:00:00 2001 From: kikuyan Date: Thu, 17 Jun 2021 06:02:06 +0900 Subject: [PATCH 15/17] [orf:tvthek] Add support for MPD formats (closes #28672) (#29236) --- youtube_dl/extractor/orf.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index ed8a9a841..8d537d7ae 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -98,6 +98,9 @@ class ORFTVthekIE(InfoExtractor): elif ext == 'f4m': formats.extend(self._extract_f4m_formats( src, video_id, f4m_id=format_id, fatal=False)) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats( + src, video_id, mpd_id=format_id, fatal=False)) else: formats.append({ 'format_id': format_id, From 8fe5d54eb721f1bbb8c8a0d18810a42d1257e406 Mon Sep 17 00:00:00 2001 From: kikuyan Date: Thu, 17 Jun 2021 06:12:13 +0900 Subject: [PATCH 16/17] [appleconnect] Fix extraction (#29208) --- youtube_dl/extractor/appleconnect.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/appleconnect.py b/youtube_dl/extractor/appleconnect.py index a84b8b1eb..494f8330c 100644 --- a/youtube_dl/extractor/appleconnect.py +++ b/youtube_dl/extractor/appleconnect.py @@ -9,10 +9,10 @@ from ..utils import ( class AppleConnectIE(InfoExtractor): - _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P[\w-]+)' - _TEST = { + _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/(?:id)?sa\.(?P[\w-]+)' + _TESTS = [{ 'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3', - 'md5': 'e7c38568a01ea45402570e6029206723', + 'md5': 'c1d41f72c8bcaf222e089434619316e4', 'info_dict': { 'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3', 'ext': 'm4v', @@ -22,7 +22,10 @@ class AppleConnectIE(InfoExtractor): 'upload_date': '20150710', 'timestamp': 1436545535, }, - } + }, { + 'url': 'https://itunes.apple.com/us/post/sa.0fe0229f-2457-11e5-9f40-1bb645f2d5d9', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -36,7 +39,7 @@ class AppleConnectIE(InfoExtractor): video_data = self._parse_json(video_json, video_id) timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp')) - like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count')) + like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count', default=None)) return { 'id': video_id, From a7f61feab2dbfc50a7ebe8b0ea390bd0e5edf77a Mon Sep 17 00:00:00 2001 From: kikuyan Date: Thu, 17 Jun 2021 12:34:33 +0900 Subject: [PATCH 17/17] [egghead] Add support for app.egghead.io (closes #28404) (#29303) Co-authored-by: Sergey M. --- youtube_dl/extractor/egghead.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/egghead.py b/youtube_dl/extractor/egghead.py index aff9b88c0..9bbd703e0 100644 --- a/youtube_dl/extractor/egghead.py +++ b/youtube_dl/extractor/egghead.py @@ -22,16 +22,19 @@ class EggheadBaseIE(InfoExtractor): class EggheadCourseIE(EggheadBaseIE): IE_DESC = 'egghead.io course' IE_NAME = 'egghead:course' - _VALID_URL = r'https://egghead\.io/courses/(?P[^/?#&]+)' - _TEST = { + _VALID_URL = r'https://(?:app\.)?egghead\.io/(?:course|playlist)s/(?P[^/?#&]+)' + _TESTS = [{ 'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript', 'playlist_count': 29, 'info_dict': { - 'id': '72', + 'id': '432655', 'title': 'Professor Frisby Introduces Composable Functional JavaScript', 'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$', }, - } + }, { + 'url': 'https://app.egghead.io/playlists/professor-frisby-introduces-composable-functional-javascript', + 'only_matching': True, + }] def _real_extract(self, url): playlist_id = self._match_id(url) @@ -65,7 +68,7 @@ class EggheadCourseIE(EggheadBaseIE): class EggheadLessonIE(EggheadBaseIE): IE_DESC = 'egghead.io lesson' IE_NAME = 'egghead:lesson' - _VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P[^/?#&]+)' + _VALID_URL = r'https://(?:app\.)?egghead\.io/(?:api/v1/)?lessons/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box', 'info_dict': { @@ -88,6 +91,9 @@ class EggheadLessonIE(EggheadBaseIE): }, { 'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application', 'only_matching': True, + }, { + 'url': 'https://app.egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box', + 'only_matching': True, }] def _real_extract(self, url):