[bbc] Add support for vxp-playlist-data embeds (Closes #6453)

This commit is contained in:
Sergey M․ 2015-08-04 20:44:22 +06:00
parent d96d604e53
commit a346b1ff57

View File

@ -526,6 +526,18 @@ class BBCIE(BBCCoUkIE):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
} }
}, {
# single video from video playlist embedded with vxp-playlist-data JSON
'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
'info_dict': {
'id': 'p02w6qjc',
'ext': 'mp4',
'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
'duration': 56,
},
'params': {
'skip_download': True,
}
}, { }, {
# single video story with digitalData # single video story with digitalData
'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret', 'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
@ -695,14 +707,37 @@ class BBCIE(BBCCoUkIE):
if not medias: if not medias:
# Single video article (e.g. http://www.bbc.com/news/video_and_audio/international) # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
media_asset_page = self._parse_json( media_asset = self._search_regex(
self._search_regex( r'mediaAssetPage\.init\(\s*({.+?}), "/',
r'mediaAssetPage\.init\(\s*({.+?}), "/', webpage, 'media asset'), webpage, 'media asset', default=None)
playlist_id) if media_asset:
media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False)
medias = [] medias = []
for video in media_asset_page.get('videos', {}).values(): for video in media_asset_page.get('videos', {}).values():
medias.extend(video.values()) medias.extend(video.values())
if not medias:
# Multiple video playlist with single `now playing` entry (e.g.
# http://www.bbc.com/news/video_and_audio/must_see/33767813)
vxp_playlist = self._parse_json(
self._search_regex(
r'<script[^>]+class="vxp-playlist-data"[^>]+type="application/json"[^>]*>([^<]+)</script>',
webpage, 'playlist data'),
playlist_id)
playlist_medias = []
for item in vxp_playlist:
media = item.get('media')
if not media:
continue
playlist_medias.append(media)
# Download single video if found media with asset id matching the video id from URL
if item.get('advert', {}).get('assetId') == playlist_id:
medias = [media]
break
# Fallback to the whole playlist
if not medias:
medias = playlist_medias
entries = [] entries = []
for num, media_meta in enumerate(medias, start=1): for num, media_meta in enumerate(medias, start=1):
formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id) formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)