[vevo] Some improvements (fixes #1580)

Extract the info from http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc={id}
Some videos don't have an smil manifest, extract the video urls directly from the json and use the last version of the video.
Extract all the available formats and set the 'formats' field of the result
This commit is contained in:
Jaime Marquínez Ferrándiz 2013-10-08 21:23:55 +02:00
parent 2ae3edb1cf
commit 88bd97e34c

View File

@ -1,11 +1,15 @@
import re import re
import json import json
import xml.etree.ElementTree
import datetime
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext,
ExtractorError, ExtractorError,
) )
class VevoIE(InfoExtractor): class VevoIE(InfoExtractor):
""" """
Accepts urls from vevo.com or in the format 'vevo:{id}' Accepts urls from vevo.com or in the format 'vevo:{id}'
@ -15,11 +19,11 @@ class VevoIE(InfoExtractor):
_TEST = { _TEST = {
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
u'file': u'GB1101300280.mp4', u'file': u'GB1101300280.mp4',
u'md5': u'06bea460acb744eab74a9d7dcb4bfd61',
u'info_dict': { u'info_dict': {
u"upload_date": u"20130624", u"upload_date": u"20130624",
u"uploader": u"Hurts", u"uploader": u"Hurts",
u"title": u"Somebody to Die For" u"title": u"Somebody to Die For",
u'duration': 230,
} }
} }
@ -27,27 +31,47 @@ class VevoIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
json_url = 'http://www.vevo.com/data/video/%s' % video_id json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
base_url = 'http://smil.lvl3.vevo.com'
videos_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (base_url, video_id, video_id.lower())
info_json = self._download_webpage(json_url, video_id, u'Downloading json info') info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
links_webpage = self._download_webpage(videos_url, video_id, u'Downloading videos urls')
self.report_extraction(video_id) self.report_extraction(video_id)
video_info = json.loads(info_json) video_info = json.loads(info_json)['video']
m_urls = list(re.finditer(r'<video src="(?P<ext>.*?):/?(?P<url>.*?)"', links_webpage)) last_version = {'version': -1}
if m_urls is None or len(m_urls) == 0: for version in video_info['videoVersions']:
raise ExtractorError(u'Unable to extract video url') # These are the HTTP downloads, other types are for different manifests
# They are sorted from worst to best quality if version['sourceType'] == 2:
m_url = m_urls[-1] if version['version'] > last_version['version']:
video_url = base_url + '/' + m_url.group('url') last_version = version
ext = m_url.group('ext') if last_version['version'] == -1:
raise ExtractorError(u'Unable to extract last version of the video')
return {'url': video_url, renditions = xml.etree.ElementTree.fromstring(last_version['data'])
'ext': ext, formats = []
'id': video_id, # Already sorted from worst to best quality
'title': video_info['title'], for rend in renditions.findall('rendition'):
'thumbnail': video_info['img'], attr = rend.attrib
'upload_date': video_info['launchDate'].replace('/',''), f_url = attr['url']
'uploader': video_info['Artists'][0]['title'], formats.append({
} 'url': f_url,
'ext': determine_ext(f_url),
'height': int(attr['frameheight']),
'width': int(attr['frameWidth']),
})
date_epoch = int(self._search_regex(
r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))/1000
upload_date = datetime.datetime.fromtimestamp(date_epoch)
info = {
'id': video_id,
'title': video_info['title'],
'formats': formats,
'thumbnail': video_info['imageUrl'],
'upload_date': upload_date.strftime('%Y%m%d'),
'uploader': video_info['mainArtists'][0]['artistName'],
'duration': video_info['duration'],
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info