Merge branch 'master' into subtitles_rework

This commit is contained in:
Ismael Mejia 2013-08-22 23:29:36 +02:00
commit 18b4e04f1c
44 changed files with 1395 additions and 465 deletions

1
.gitignore vendored
View File

@ -9,7 +9,6 @@ build/
dist/
MANIFEST
README.txt
README.md
youtube-dl.1
youtube-dl.bash-completion
youtube-dl

View File

@ -11,30 +11,42 @@ tests = [
# 90
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`",
"mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"),
# 89
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'",
"/?;:|}<[{=+-_)(*&^%$#@!MqBVCXZASDFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuyt"),
# 88
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
"J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
# 87
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
"!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"),
# 86 - vfl_ymO4Z 2013/06/27
"uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
# 86
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
"ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
# 85 - vflSAFCP9 2013/07/19
"yuioplkjhgfdsazecvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"),
# 85
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
"ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"),
".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
# 84
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
"<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),
# 83 - vflcaqGO8 2013/07/11
"<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543q1mnbvcxzasdfghjklpoiuew2"),
# 83
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
"urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"),
".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
# 82
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
"Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"),
# 81
# 81 - vflLC8JvQ 2013/07/25
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.",
"urty8ioplkjhgfdsazxcvbqm1234567e90QWERTYUIOPLKHGFDSnZXCVBNM!@#$%^&*(-+={[};?/>."),
"C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
# 79 - vflLC8JvQ 2013/07/25 (sporadic)
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/",
"Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
]
tests_age_gate = [
# 86 - vflqinMWD
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
"ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
]
def find_matching(wrong, right):
@ -87,6 +99,8 @@ def genall(tests):
def main():
print(genall(tests))
print(u' Age gate:')
print(genall(tests_age_gate))
if __name__ == '__main__':
main()

View File

@ -50,6 +50,7 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc')
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch_popup?v=BaW_jenozKc'), 'BaW_jenozKc')
def test_no_duplicates(self):
ies = gen_extractors()

38
test/test_playlists.py Normal file
View File

@ -0,0 +1,38 @@
#!/usr/bin/env python
import sys
import unittest
import json
# Allow direct execution
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE
from youtube_dl.utils import *
from helper import FakeYDL
class TestPlaylists(unittest.TestCase):
def assertIsPlaylist(self, info):
"""Make sure the info has '_type' set to 'playlist'"""
self.assertEqual(info['_type'], 'playlist')
def test_dailymotion_playlist(self):
dl = FakeYDL()
ie = DailymotionPlaylistIE(dl)
result = ie.extract('http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'SPORT')
self.assertTrue(len(result['entries']) > 20)
def test_vimeo_channel(self):
dl = FakeYDL()
ie = VimeoChannelIE(dl)
result = ie.extract('http://vimeo.com/channels/tributes')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'Vimeo Tributes')
self.assertTrue(len(result['entries']) > 24)
if __name__ == '__main__':
unittest.main()

View File

@ -1,67 +0,0 @@
#!/usr/bin/env python
import unittest
import sys
# Allow direct execution
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.extractor.youtube import YoutubeIE
from helper import FakeYDL
sig = YoutubeIE(FakeYDL())._decrypt_signature
class TestYoutubeSig(unittest.TestCase):
def test_92(self):
wrong = "F9F9B6E6FD47029957AB911A964CC20D95A181A5D37A2DBEFD67D403DB0E8BE4F4910053E4E8A79.0B70B.0B80B8"
right = "69B6E6FD47029957AB911A9F4CC20D95A181A5D3.A2DBEFD67D403DB0E8BE4F4910053E4E8A7980B7"
self.assertEqual(sig(wrong), right)
def test_90(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`"
right = "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"
self.assertEqual(sig(wrong), right)
def test_88(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<"
right = "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"
self.assertEqual(sig(wrong), right)
def test_87(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<"
right = "!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"
self.assertEqual(sig(wrong), right)
def test_86(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"
right = "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"
self.assertEqual(sig(wrong), right)
def test_85(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<"
right = "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"
self.assertEqual(sig(wrong), right)
def test_84(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"
right = "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"
self.assertEqual(sig(wrong), right)
def test_83(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<"
right = "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"
self.assertEqual(sig(wrong), right)
def test_82(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<"
right = "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"
self.assertEqual(sig(wrong), right)
def test_81(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>."
right = "urty8ioplkjhgfdsazxcvbqm1234567e90QWERTYUIOPLKHGFDSnZXCVBNM!@#$%^&*(-+={[};?/>."
self.assertEqual(sig(wrong), right)
if __name__ == '__main__':
unittest.main()

View File

@ -79,9 +79,13 @@ class FileDownloader(object):
rate = float(current) / dif
eta = int((float(total) - float(current)) / rate)
(eta_mins, eta_secs) = divmod(eta, 60)
if eta_mins > 99:
return '--:--'
return '%02d:%02d' % (eta_mins, eta_secs)
(eta_hours, eta_mins) = divmod(eta_mins, 60)
if eta_hours > 99:
return '--:--:--'
if eta_hours == 0:
return '%02d:%02d' % (eta_mins, eta_secs)
else:
return '%02d:%02d:%02d' % (eta_hours, eta_mins, eta_secs)
@staticmethod
def calc_speed(start, now, bytes):
@ -329,6 +333,35 @@ class FileDownloader(object):
self.report_error(u'mplayer exited with code %d' % retval)
return False
def _download_m3u8_with_ffmpeg(self, filename, url):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
args = ['ffmpeg', '-y', '-i', url, '-f', 'mp4', tmpfilename]
# Check for ffmpeg first
try:
subprocess.call(['ffmpeg', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
except (OSError, IOError):
self.report_error(u'm3u8 download detected but "%s" could not be run' % args[0] )
return False
retval = subprocess.call(args)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
})
return True
else:
self.to_stderr(u"\n")
self.report_error(u'ffmpeg exited with code %d' % retval)
return False
def _do_download(self, filename, info_dict):
url = info_dict['url']
@ -354,6 +387,10 @@ class FileDownloader(object):
if url.startswith('mms') or url.startswith('rtsp'):
return self._download_with_mplayer(filename, url)
# m3u8 manifest are downloaded with ffmpeg
if determine_ext(url) == u'm3u8':
return self._download_m3u8_with_ffmpeg(filename, url)
tmpfilename = self.temp_name(filename)
stream = None

View File

@ -100,7 +100,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
self._nopostoverwrites = nopostoverwrites
def get_audio_codec(self, path):
if not self._exes['ffprobe'] and not self._exes['avprobe']: return None
if not self._exes['ffprobe'] and not self._exes['avprobe']:
raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
try:
cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))]
handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
@ -208,7 +209,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
try:
os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
except:
self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
self._downloader.report_warning(u'Cannot update utime of audio file')
information['filepath'] = new_path
return self._nopostoverwrites,information

View File

@ -261,7 +261,7 @@ class YoutubeDL(object):
self.report_error(u'Erroneous output template')
return None
except ValueError as err:
self.report_error(u'Insufficient system charset ' + repr(preferredencoding()))
self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
return None
def _match_entry(self, info_dict):
@ -535,7 +535,7 @@ class YoutubeDL(object):
try:
success = self.fd._do_download(filename, info_dict)
except (OSError, IOError) as err:
raise UnavailableVideoError()
raise UnavailableVideoError(err)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_error(u'unable to download video data: %s' % str(err))
return
@ -582,7 +582,7 @@ class YoutubeDL(object):
# No clear decision yet, let IE decide
keep_video = keep_video_wish
except PostProcessingError as e:
self.to_stderr(u'ERROR: ' + e.msg)
self.report_error(e.msg)
if keep_video is False and not self.params.get('keepvideo', False):
try:
self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)

View File

@ -27,6 +27,7 @@ __authors__ = (
'Johny Mo Swag',
'Axel Noack',
'Albert Kim',
'Pierre Rudloff',
)
__license__ = 'Public Domain'
@ -343,7 +344,7 @@ def parseOpts(overrideArguments=None):
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
systemConf = _readOptions('/etc/youtube-dl.conf')
userConf = _readOptions(userConfFile)
commandLineConf = sys.argv[1:]
commandLineConf = sys.argv[1:]
argv = systemConf + userConf + commandLineConf
opts, args = parser.parse_args(argv)
if opts.verbose:
@ -377,7 +378,7 @@ def _real_main(argv=None):
# Set user agent
if opts.user_agent is not None:
std_headers['User-Agent'] = opts.user_agent
# Set referer
if opts.referer is not None:
std_headers['Referer'] = opts.referer
@ -398,6 +399,8 @@ def _real_main(argv=None):
batchurls = batchfd.readlines()
batchurls = [x.strip() for x in batchurls]
batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
if opts.verbose:
sys.stderr.write(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
except IOError:
sys.exit(u'ERROR: batch file could not be read')
all_urls = batchurls + args

View File

@ -12,7 +12,7 @@ from .comedycentral import ComedyCentralIE
from .condenast import CondeNastIE
from .criterion import CriterionIE
from .cspan import CSpanIE
from .dailymotion import DailymotionIE
from .dailymotion import DailymotionIE, DailymotionPlaylistIE
from .depositfiles import DepositFilesIE
from .dotsub import DotsubIE
from .dreisat import DreiSatIE
@ -36,23 +36,31 @@ from .ign import IGNIE, OneUPIE
from .ina import InaIE
from .infoq import InfoQIE
from .instagram import InstagramIE
from .jeuxvideo import JeuxVideoIE
from .jukebox import JukeboxIE
from .justintv import JustinTVIE
from .kankan import KankanIE
from .keek import KeekIE
from .liveleak import LiveLeakIE
from .livestream import LivestreamIE
from .metacafe import MetacafeIE
from .mixcloud import MixcloudIE
from .mtv import MTVIE
from .muzu import MuzuTVIE
from .myspass import MySpassIE
from .myvideo import MyVideoIE
from .nba import NBAIE
from .ooyala import OoyalaIE
from .pbs import PBSIE
from .photobucket import PhotobucketIE
from .pornotube import PornotubeIE
from .rbmaradio import RBMARadioIE
from .redtube import RedTubeIE
from .ringtv import RingTVIE
from .roxwel import RoxwelIE
from .rtlnow import RTLnowIE
from .sina import SinaIE
from .slashdot import SlashdotIE
from .soundcloud import SoundcloudIE, SoundcloudSetIE
from .spiegel import SpiegelIE
from .stanfordoc import StanfordOpenClassroomIE
@ -67,10 +75,12 @@ from .tudou import TudouIE
from .tumblr import TumblrIE
from .tutv import TutvIE
from .ustream import UstreamIE
from .unistra import UnistraIE
from .vbox7 import Vbox7IE
from .veoh import VeohIE
from .vevo import VevoIE
from .vimeo import VimeoIE
from .videofyme import VideofyMeIE
from .vimeo import VimeoIE, VimeoChannelIE
from .vine import VineIE
from .c56 import C56IE
from .wat import WatIE
@ -92,6 +102,9 @@ from .youtube import (
YoutubeChannelIE,
YoutubeShowIE,
YoutubeSubscriptionsIE,
YoutubeRecommendedIE,
YoutubeWatchLaterIE,
YoutubeFavouritesIE,
)
from .zdf import ZDFIE

View File

@ -17,13 +17,14 @@ class ArteTvIE(InfoExtractor):
"""
_EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
_VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html'
_LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
_LIVE_URL = r'index-[0-9]+\.html$'
IE_NAME = u'arte.tv'
@classmethod
def suitable(cls, url):
return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL))
return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL, cls._LIVEWEB_URL))
# TODO implement Live Stream
# from ..utils import compat_urllib_parse
@ -68,6 +69,12 @@ class ArteTvIE(InfoExtractor):
lang = mobj.group('lang')
return self._extract_video(url, id, lang)
mobj = re.match(self._LIVEWEB_URL, url)
if mobj is not None:
name = mobj.group('name')
lang = mobj.group('lang')
return self._extract_liveweb(url, name, lang)
if re.search(self._LIVE_URL, video_id) is not None:
raise ExtractorError(u'Arte live streams are not yet supported, sorry')
# self.extractLiveStream(url)
@ -85,7 +92,7 @@ class ArteTvIE(InfoExtractor):
info_dict = {'id': player_info['VID'],
'title': player_info['VTI'],
'description': player_info['VDE'],
'description': player_info.get('VDE'),
'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]),
'thumbnail': player_info['programImage'],
'ext': 'flv',
@ -98,12 +105,14 @@ class ArteTvIE(InfoExtractor):
l = 'F'
elif lang == 'de':
l = 'A'
regexes = [r'VO?%s' % l, r'V%s-ST.' % l]
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
return any(re.match(r, f['versionCode']) for r in regexes)
# Some formats may not be in the same language as the url
formats = filter(_match_lang, formats)
# We order the formats by quality
formats = sorted(formats, key=lambda f: int(f['height']))
# Prefer videos without subtitles in the same language
formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f['versionCode']) is None)
# Pick the best quality
format_info = formats[-1]
if format_info['mediaType'] == u'rtmp':
@ -144,3 +153,22 @@ class ArteTvIE(InfoExtractor):
'url': video_url,
'ext': 'flv',
}
def _extract_liveweb(self, url, name, lang):
"""Extract form http://liveweb.arte.tv/"""
webpage = self._download_webpage(url, name)
video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
video_id, u'Downloading information')
config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
event_doc = config_doc.find('event')
url_node = event_doc.find('video').find('urlHd')
if url_node is None:
url_node = video_doc.find('urlSd')
return {'id': video_id,
'title': event_doc.find('name%s' % lang.capitalize()).text,
'url': url_node.text.replace('MP4', 'mp4'),
'ext': 'flv',
'thumbnail': self._og_search_thumbnail(webpage),
}

View File

@ -1,6 +1,8 @@
import re
import json
from .common import InfoExtractor
from ..utils import determine_ext
class BreakIE(InfoExtractor):
@ -17,17 +19,20 @@ class BreakIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1).split("-")[-1]
webpage = self._download_webpage(url, video_id)
video_url = re.search(r"videoPath: '(.+?)',",webpage).group(1)
key = re.search(r"icon: '(.+?)',",webpage).group(1)
final_url = str(video_url)+"?"+str(key)
thumbnail_url = re.search(r"thumbnailURL: '(.+?)'",webpage).group(1)
title = re.search(r"sVidTitle: '(.+)',",webpage).group(1)
ext = video_url.split('.')[-1]
embed_url = 'http://www.break.com/embed/%s' % video_id
webpage = self._download_webpage(embed_url, video_id)
info_json = self._search_regex(r'var embedVars = ({.*?});', webpage,
u'info json', flags=re.DOTALL)
info = json.loads(info_json)
video_url = info['videoUri']
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)
if m_youtube is not None:
return self.url_result(m_youtube.group(1), 'Youtube')
final_url = video_url + '?' + info['AuthToken']
return [{
'id': video_id,
'url': final_url,
'ext': ext,
'title': title,
'thumbnail': thumbnail_url,
'ext': determine_ext(final_url),
'title': info['contentName'],
'thumbnail': info['thumbUri'],
}]

View File

@ -1,26 +1,36 @@
import re
import socket
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
compat_http_client,
compat_str,
compat_urllib_error,
compat_urllib_parse_urlparse,
compat_urllib_request,
determine_ext,
ExtractorError,
)
class CollegeHumorIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
def report_manifest(self, video_id):
"""Report information extraction."""
self.to_screen(u'%s: Downloading XML manifest' % video_id)
_TESTS = [{
u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
u'file': u'6902724.mp4',
u'md5': u'1264c12ad95dca142a9f0bf7968105a0',
u'info_dict': {
u'title': u'Comic-Con Cosplay Catastrophe',
u'description': u'Fans get creative this year at San Diego. Too creative. And yes, that\'s really Joss Whedon.',
},
},
{
u'url': u'http://www.collegehumor.com/video/3505939/font-conference',
u'file': u'3505939.mp4',
u'md5': u'c51ca16b82bb456a4397987791a835f5',
u'info_dict': {
u'title': u'Font Conference',
u'description': u'This video wasn\'t long enough, so we made it double-spaced.',
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@ -36,39 +46,42 @@ class CollegeHumorIE(InfoExtractor):
self.report_extraction(video_id)
xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
try:
metaXml = compat_urllib_request.urlopen(xmlUrl).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
metaXml = self._download_webpage(xmlUrl, video_id,
u'Downloading info XML',
u'Unable to download video info XML')
mdoc = xml.etree.ElementTree.fromstring(metaXml)
try:
videoNode = mdoc.findall('./video')[0]
youtubeIdNode = videoNode.find('./youtubeID')
if youtubeIdNode is not None:
return self.url_result(youtubeIdNode.text, 'Youtube')
info['description'] = videoNode.findall('./description')[0].text
info['title'] = videoNode.findall('./caption')[0].text
info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
manifest_url = videoNode.findall('./file')[0].text
next_url = videoNode.findall('./file')[0].text
except IndexError:
raise ExtractorError(u'Invalid metadata XML file')
manifest_url += '?hdcore=2.10.3'
self.report_manifest(video_id)
try:
manifestXml = compat_urllib_request.urlopen(manifest_url).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
if next_url.endswith(u'manifest.f4m'):
manifest_url = next_url + '?hdcore=2.10.3'
manifestXml = self._download_webpage(manifest_url, video_id,
u'Downloading XML manifest',
u'Unable to download video info XML')
adoc = xml.etree.ElementTree.fromstring(manifestXml)
try:
media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
node_id = media_node.attrib['url']
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
except IndexError as err:
raise ExtractorError(u'Invalid manifest file')
adoc = xml.etree.ElementTree.fromstring(manifestXml)
try:
media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
node_id = media_node.attrib['url']
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
except IndexError as err:
raise ExtractorError(u'Invalid manifest file')
url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
info['ext'] = 'mp4'
else:
# Old-style direct links
info['url'] = next_url
info['ext'] = determine_ext(info['url'])
url_pr = compat_urllib_parse_urlparse(manifest_url)
url = url_pr.scheme + '://' + url_pr.netloc + '/z' + video_id[:-2] + '/' + node_id + 'Seg1-Frag1'
info['url'] = url
info['ext'] = 'f4f'
return [info]
return info

View File

@ -24,7 +24,9 @@ class ComedyCentralIE(InfoExtractor):
(full-episodes/(?P<episode>.*)|
(?P<clip>
(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))))
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))|
(?P<interview>
extended-interviews/(?P<interID>[0-9]+)/playlist_tds_extended_(?P<interview_title>.*?)/.*?)))
$"""
_TEST = {
u'url': u'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart',
@ -87,6 +89,9 @@ class ComedyCentralIE(InfoExtractor):
else:
epTitle = mobj.group('cntitle')
dlNewest = False
elif mobj.group('interview'):
epTitle = mobj.group('interview_title')
dlNewest = False
else:
dlNewest = not mobj.group('episode')
if dlNewest:

View File

@ -78,7 +78,13 @@ class InfoExtractor(object):
@classmethod
def suitable(cls, url):
"""Receives a URL and returns True if suitable for this IE."""
return re.match(cls._VALID_URL, url) is not None
# This does not use has/getattr intentionally - we want to know whether
# we have cached the regexp for *this* class, whereas getattr would also
# match the superclass
if '_VALID_URL_RE' not in cls.__dict__:
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
return cls._VALID_URL_RE.match(url) is not None
@classmethod
def working(cls):

View File

@ -82,8 +82,8 @@ class DailymotionIE(DailyMotionSubtitlesIE):
# TODO: support choosing qualities
for key in ['stream_h264_hd1080_url', 'stream_h264_hd_url',
'stream_h264_hq_url', 'stream_h264_url',
for key in ['stream_h264_hd1080_url','stream_h264_hd_url',
'stream_h264_hq_url','stream_h264_url',
'stream_h264_ld_url']:
if info.get(key): # key in info and info[key]:
max_quality = key
@ -116,3 +116,31 @@ class DailymotionIE(DailyMotionSubtitlesIE):
'subtitles': video_subtitles,
'thumbnail': info['thumbnail_url']
}]
class DailymotionPlaylistIE(InfoExtractor):
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
_MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
video_ids = []
for pagenum in itertools.count(1):
webpage = self._download_webpage('https://www.dailymotion.com/playlist/%s/%s' % (playlist_id, pagenum),
playlist_id, u'Downloading page %s' % pagenum)
playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)
video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
break
entries = [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
for video_id in video_ids]
return {'_type': 'playlist',
'id': playlist_id,
'title': get_element_by_id(u'playlist_name', webpage),
'entries': entries,
}

View File

@ -8,17 +8,30 @@ class ExfmIE(InfoExtractor):
IE_NAME = u'exfm'
IE_DESC = u'ex.fm'
_VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)'
_SOUNDCLOUD_URL_ = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
_TEST = {
u'url': u'http://ex.fm/song/1bgtzg',
u'file': u'1bgtzg.mp3',
u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf',
u'info_dict': {
u"title": u"We Can't Stop",
u"uploader": u"Miley Cyrus",
u'thumbnail': u'http://i1.sndcdn.com/artworks-000049666230-w9i7ef-t500x500.jpg?9d68d37'
}
}
_SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
_TESTS = [
{
u'url': u'http://ex.fm/song/1bgtzg',
u'file': u'95223130.mp3',
u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf',
u'info_dict': {
u"title": u"We Can't Stop - Miley Cyrus",
u"uploader": u"Miley Cyrus",
u'upload_date': u'20130603',
u'description': u'Download "We Can\'t Stop" \r\niTunes: http://smarturl.it/WeCantStop?IQid=SC\r\nAmazon: http://smarturl.it/WeCantStopAMZ?IQid=SC',
},
u'note': u'Soundcloud song',
},
{
u'url': u'http://ex.fm/song/wddt8',
u'file': u'wddt8.mp3',
u'md5': u'966bd70741ac5b8570d8e45bfaed3643',
u'info_dict': {
u'title': u'Safe and Sound',
u'uploader': u'Capital Cities',
},
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@ -26,11 +39,10 @@ class ExfmIE(InfoExtractor):
info_url = "http://ex.fm/api/v3/song/%s" %(song_id)
webpage = self._download_webpage(info_url, song_id)
info = json.loads(webpage)
song_url = re.match(self._SOUNDCLOUD_URL_,info['song']['url'])
if song_url is not None:
song_url = song_url.group() + "?client_id=b45b1aa10f1ac2941910a7f0d10f8e28"
else:
song_url = info['song']['url']
song_url = info['song']['url']
if re.match(self._SOUNDCLOUD_URL, song_url) is not None:
self.to_screen('Soundcloud song detected')
return self.url_result(song_url.replace('/stream',''), 'Soundcloud')
return [{
'id': song_id,
'url': song_url,

View File

@ -21,17 +21,14 @@ class FunnyOrDieIE(InfoExtractor):
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"',
video_url = self._search_regex(r'type: "video/mp4", src: "(.*?)"',
webpage, u'video URL', flags=re.DOTALL)
title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>",
r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL)
info = {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': title,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
}
return [info]

View File

@ -107,8 +107,13 @@ class GenericIE(InfoExtractor):
return new_url
def _real_extract(self, url):
new_url = self._test_redirect(url)
if new_url: return [self.url_result(new_url)]
try:
new_url = self._test_redirect(url)
if new_url:
return [self.url_result(new_url)]
except compat_urllib_error.HTTPError:
# This may be a stupid server that doesn't like HEAD, our UA, or so
pass
video_id = url.split('/')[-1]
try:
@ -144,6 +149,9 @@ class GenericIE(InfoExtractor):
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
if m_video_type is not None:
mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
if mobj is None:
# HTML5 video
mobj = re.search(r'<video[^<]*>.*?<source .*?src="([^"]+)"', webpage, flags=re.DOTALL)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)

View File

@ -5,7 +5,7 @@ from .common import InfoExtractor
class InaIE(InfoExtractor):
"""Information Extractor for Ina.fr"""
_VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
_VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I?[A-F0-9]+)/.*'
_TEST = {
u'url': u'www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
u'file': u'I12055569.mp4',

View File

@ -0,0 +1,47 @@
# coding: utf-8
import json
import re
import xml.etree.ElementTree
from .common import InfoExtractor
class JeuxVideoIE(InfoExtractor):
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
_TEST = {
u'url': u'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
u'file': u'5182.mp4',
u'md5': u'e0fdb0cd3ce98713ef9c1e1e025779d0',
u'info_dict': {
u'title': u'GC 2013 : Tearaway nous présente ses papiers d\'identité',
u'description': u'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title = re.match(self._VALID_URL, url).group(1)
webpage = self._download_webpage(url, title)
m_download = re.search(r'<param name="flashvars" value="config=(.*?)" />', webpage)
xml_link = m_download.group(1)
id = re.search(r'http://www.jeuxvideo.com/config/\w+/0011/(.*?)/\d+_player\.xml', xml_link).group(1)
xml_config = self._download_webpage(xml_link, title,
'Downloading XML config')
config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8'))
info = re.search(r'<format\.json>(.*?)</format\.json>',
xml_config, re.MULTILINE|re.DOTALL).group(1)
info = json.loads(info)['versions'][0]
video_url = 'http://video720.jeuxvideo.com/' + info['file']
return {'id': id,
'title' : config.find('titre_video').text,
'ext' : 'mp4',
'url' : video_url,
'description': self._og_search_description(webpage),
'thumbnail': config.find('image').text,
}

View File

@ -0,0 +1,37 @@
import re
from .common import InfoExtractor
from ..utils import determine_ext
class KankanIE(InfoExtractor):
_VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
_TEST = {
u'url': u'http://yinyue.kankan.com/vod/48/48863.shtml',
u'file': u'48863.flv',
u'md5': u'29aca1e47ae68fc28804aca89f29507e',
u'info_dict': {
u'title': u'Ready To Go',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
title = self._search_regex(r'G_TITLE=[\'"](.+?)[\'"]', webpage, u'video title')
gcid = self._search_regex(r'lurl:[\'"]http://.+?/.+?/(.+?)/', webpage, u'gcid')
video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid,
video_id, u'Downloading video url info')
ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip')
path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path')
video_url = 'http://%s%s' % (ip, path)
return {'id': video_id,
'title': title,
'url': video_url,
'ext': determine_ext(video_url),
}

View File

@ -4,10 +4,10 @@ from .common import InfoExtractor
class KeekIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
_VALID_URL = r'https?://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
IE_NAME = u'keek'
_TEST = {
u'url': u'http://www.keek.com/ytdl/keeks/NODfbab',
u'url': u'https://www.keek.com/ytdl/keeks/NODfbab',
u'file': u'NODfbab.mp4',
u'md5': u'9b0636f8c0f7614afa4ea5e4c6e57e83',
u'info_dict': {

View File

@ -0,0 +1,64 @@
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
determine_ext,
)
class MuzuTVIE(InfoExtractor):
_VALID_URL = r'https?://www.muzu.tv/(.+?)/(.+?)/(?P<id>\d+)'
IE_NAME = u'muzu.tv'
_TEST = {
u'url': u'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/',
u'file': u'1981454.mp4',
u'md5': u'98f8b2c7bc50578d6a0364fff2bfb000',
u'info_dict': {
u'title': u'Cat Walk (Original Mix)',
u'description': u'md5:90e868994de201b2570e4e5854e19420',
u'uploader': u'MarcAshken featuring SOS',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
info_data = compat_urllib_parse.urlencode({'format': 'json',
'url': url,
})
video_info_page = self._download_webpage('http://www.muzu.tv/api/oembed/?%s' % info_data,
video_id, u'Downloading video info')
info = json.loads(video_info_page)
player_info_page = self._download_webpage('http://player.muzu.tv/player/playerInit?ai=%s' % video_id,
video_id, u'Downloading player info')
video_info = json.loads(player_info_page)['videos'][0]
for quality in ['1080' , '720', '480', '360']:
if video_info.get('v%s' % quality):
break
data = compat_urllib_parse.urlencode({'ai': video_id,
# Even if each time you watch a video the hash changes,
# it seems to work for different videos, and it will work
# even if you use any non empty string as a hash
'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k',
'device': 'web',
'qv': quality,
})
video_url_page = self._download_webpage('http://player.muzu.tv/player/requestVideo?%s' % data,
video_id, u'Downloading video url')
video_url_info = json.loads(video_url_page)
video_url = video_url_info['url']
return {'id': video_id,
'title': info['title'],
'url': video_url,
'ext': determine_ext(video_url),
'thumbnail': info['thumbnail_url'],
'description': info['description'],
'uploader': info['author_name'],
}

View File

@ -2,11 +2,13 @@ import binascii
import base64
import hashlib
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_ord,
compat_urllib_parse,
compat_urllib_request,
ExtractorError,
)
@ -16,7 +18,7 @@ from ..utils import (
class MyVideoIE(InfoExtractor):
"""Information Extractor for myvideo.de."""
_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*'
IE_NAME = u'myvideo'
_TEST = {
u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
@ -85,6 +87,20 @@ class MyVideoIE(InfoExtractor):
'ext': video_ext,
}]
mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
if mobj is not None:
request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
response = self._download_webpage(request, video_id,
u'Downloading video info')
info = json.loads(base64.b64decode(response).decode('utf-8'))
return {'id': video_id,
'title': info['title'],
'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
'play_path': info['filename'],
'ext': 'flv',
'thumbnail': info['thumbnail'][0]['url'],
}
# try encxml
mobj = re.search('var flashvars={(.+?)}', webpage)
if mobj is None:

View File

@ -0,0 +1,52 @@
import re
import json
from .common import InfoExtractor
from ..utils import unescapeHTML
class OoyalaIE(InfoExtractor):
_VALID_URL = r'https?://.+?\.ooyala\.com/.*?embedCode=(?P<id>.+?)(&|$)'
_TEST = {
# From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
u'url': u'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
u'file': u'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8.mp4',
u'md5': u'3f5cceb3a7bf461d6c29dc466cf8033c',
u'info_dict': {
u'title': u'Explaining Data Recovery from Hard Drives and SSDs',
u'description': u'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
},
}
def _extract_result(self, info, more_info):
return {'id': info['embedCode'],
'ext': 'mp4',
'title': unescapeHTML(info['title']),
'url': info['url'],
'description': unescapeHTML(more_info['description']),
'thumbnail': more_info['promo'],
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
embedCode = mobj.group('id')
player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode
player = self._download_webpage(player_url, embedCode)
mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
player, u'mobile player url')
mobile_player = self._download_webpage(mobile_url, embedCode)
videos_info = self._search_regex(r'eval\("\((\[{.*?stream_redirect.*?}\])\)"\);', mobile_player, u'info').replace('\\"','"')
videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"')
videos_info = json.loads(videos_info)
videos_more_info =json.loads(videos_more_info)
if videos_more_info.get('lineup'):
videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
return {'_type': 'playlist',
'id': embedCode,
'title': unescapeHTML(videos_more_info['title']),
'entries': videos,
}
else:
return self._extract_result(videos_info[0], videos_more_info)

View File

@ -0,0 +1,34 @@
import re
import json
from .common import InfoExtractor
class PBSIE(InfoExtractor):
_VALID_URL = r'https?://video.pbs.org/video/(?P<id>\d+)/?'
_TEST = {
u'url': u'http://video.pbs.org/video/2365006249/',
u'file': u'2365006249.mp4',
u'md5': 'ce1888486f0908d555a8093cac9a7362',
u'info_dict': {
u'title': u'A More Perfect Union',
u'description': u'md5:ba0c207295339c8d6eced00b7c363c6a',
u'duration': 3190,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
info_page = self._download_webpage(info_url, video_id)
info =json.loads(info_page)
return {'id': video_id,
'title': info['title'],
'url': info['alternate_encoding']['url'],
'ext': 'mp4',
'description': info['program'].get('description'),
'thumbnail': info.get('image_url'),
'duration': info.get('duration'),
}

View File

@ -0,0 +1,49 @@
import re
import json
from .common import InfoExtractor
from ..utils import unified_strdate, determine_ext
class RoxwelIE(InfoExtractor):
_VALID_URL = r'https?://www\.roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)'
_TEST = {
u'url': u'http://www.roxwel.com/player/passionpittakeawalklive.html',
u'file': u'passionpittakeawalklive.flv',
u'md5': u'd9dea8360a1e7d485d2206db7fe13035',
u'info_dict': {
u'title': u'Take A Walk (live)',
u'uploader': u'Passion Pit',
u'description': u'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ',
},
u'skip': u'Requires rtmpdump',
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
filename = mobj.group('filename')
info_url = 'http://www.roxwel.com/api/videos/%s' % filename
info_page = self._download_webpage(info_url, filename,
u'Downloading video info')
self.report_extraction(filename)
info = json.loads(info_page)
rtmp_rates = sorted([int(r.replace(