mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-11-12 10:25:48 +00:00
Merge branch 'master' into subtitles_rework
This commit is contained in:
commit
18b4e04f1c
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -9,7 +9,6 @@ build/
|
|||
dist/
|
||||
MANIFEST
|
||||
README.txt
|
||||
README.md
|
||||
youtube-dl.1
|
||||
youtube-dl.bash-completion
|
||||
youtube-dl
|
||||
|
|
|
@ -11,30 +11,42 @@ tests = [
|
|||
# 90
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`",
|
||||
"mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"),
|
||||
# 89
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'",
|
||||
"/?;:|}<[{=+-_)(*&^%$#@!MqBVCXZASDFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuyt"),
|
||||
# 88
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
|
||||
"J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
|
||||
# 87
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
|
||||
"!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"),
|
||||
# 86 - vfl_ymO4Z 2013/06/27
|
||||
"uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
|
||||
# 86
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
|
||||
"ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
|
||||
# 85 - vflSAFCP9 2013/07/19
|
||||
"yuioplkjhgfdsazecvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"),
|
||||
# 85
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
|
||||
"ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"),
|
||||
".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
|
||||
# 84
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
|
||||
"<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),
|
||||
# 83 - vflcaqGO8 2013/07/11
|
||||
"<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543q1mnbvcxzasdfghjklpoiuew2"),
|
||||
# 83
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
|
||||
"urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"),
|
||||
".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
|
||||
# 82
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
|
||||
"Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"),
|
||||
# 81
|
||||
# 81 - vflLC8JvQ 2013/07/25
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.",
|
||||
"urty8ioplkjhgfdsazxcvbqm1234567e90QWERTYUIOPLKHGFDSnZXCVBNM!@#$%^&*(-+={[};?/>."),
|
||||
"C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
|
||||
# 79 - vflLC8JvQ 2013/07/25 (sporadic)
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/",
|
||||
"Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
|
||||
]
|
||||
|
||||
tests_age_gate = [
|
||||
# 86 - vflqinMWD
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
|
||||
"ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
|
||||
]
|
||||
|
||||
def find_matching(wrong, right):
|
||||
|
@ -87,6 +99,8 @@ def genall(tests):
|
|||
|
||||
def main():
|
||||
print(genall(tests))
|
||||
print(u' Age gate:')
|
||||
print(genall(tests_age_gate))
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
|
@ -50,6 +50,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||
self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch_popup?v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||
|
||||
def test_no_duplicates(self):
|
||||
ies = gen_extractors()
|
||||
|
|
38
test/test_playlists.py
Normal file
38
test/test_playlists.py
Normal file
|
@ -0,0 +1,38 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import sys
|
||||
import unittest
|
||||
import json
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE
|
||||
from youtube_dl.utils import *
|
||||
|
||||
from helper import FakeYDL
|
||||
|
||||
class TestPlaylists(unittest.TestCase):
|
||||
def assertIsPlaylist(self, info):
|
||||
"""Make sure the info has '_type' set to 'playlist'"""
|
||||
self.assertEqual(info['_type'], 'playlist')
|
||||
|
||||
def test_dailymotion_playlist(self):
|
||||
dl = FakeYDL()
|
||||
ie = DailymotionPlaylistIE(dl)
|
||||
result = ie.extract('http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], u'SPORT')
|
||||
self.assertTrue(len(result['entries']) > 20)
|
||||
|
||||
def test_vimeo_channel(self):
|
||||
dl = FakeYDL()
|
||||
ie = VimeoChannelIE(dl)
|
||||
result = ie.extract('http://vimeo.com/channels/tributes')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], u'Vimeo Tributes')
|
||||
self.assertTrue(len(result['entries']) > 24)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
|
@ -1,67 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import unittest
|
||||
import sys
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.extractor.youtube import YoutubeIE
|
||||
from helper import FakeYDL
|
||||
|
||||
sig = YoutubeIE(FakeYDL())._decrypt_signature
|
||||
|
||||
class TestYoutubeSig(unittest.TestCase):
|
||||
def test_92(self):
|
||||
wrong = "F9F9B6E6FD47029957AB911A964CC20D95A181A5D37A2DBEFD67D403DB0E8BE4F4910053E4E8A79.0B70B.0B80B8"
|
||||
right = "69B6E6FD47029957AB911A9F4CC20D95A181A5D3.A2DBEFD67D403DB0E8BE4F4910053E4E8A7980B7"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_90(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`"
|
||||
right = "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_88(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<"
|
||||
right = "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_87(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<"
|
||||
right = "!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_86(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"
|
||||
right = "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_85(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<"
|
||||
right = "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_84(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"
|
||||
right = "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_83(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<"
|
||||
right = "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_82(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<"
|
||||
right = "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_81(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>."
|
||||
right = "urty8ioplkjhgfdsazxcvbqm1234567e90QWERTYUIOPLKHGFDSnZXCVBNM!@#$%^&*(-+={[};?/>."
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
|
@ -79,9 +79,13 @@ class FileDownloader(object):
|
|||
rate = float(current) / dif
|
||||
eta = int((float(total) - float(current)) / rate)
|
||||
(eta_mins, eta_secs) = divmod(eta, 60)
|
||||
if eta_mins > 99:
|
||||
return '--:--'
|
||||
return '%02d:%02d' % (eta_mins, eta_secs)
|
||||
(eta_hours, eta_mins) = divmod(eta_mins, 60)
|
||||
if eta_hours > 99:
|
||||
return '--:--:--'
|
||||
if eta_hours == 0:
|
||||
return '%02d:%02d' % (eta_mins, eta_secs)
|
||||
else:
|
||||
return '%02d:%02d:%02d' % (eta_hours, eta_mins, eta_secs)
|
||||
|
||||
@staticmethod
|
||||
def calc_speed(start, now, bytes):
|
||||
|
@ -329,6 +333,35 @@ class FileDownloader(object):
|
|||
self.report_error(u'mplayer exited with code %d' % retval)
|
||||
return False
|
||||
|
||||
def _download_m3u8_with_ffmpeg(self, filename, url):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
args = ['ffmpeg', '-y', '-i', url, '-f', 'mp4', tmpfilename]
|
||||
# Check for ffmpeg first
|
||||
try:
|
||||
subprocess.call(['ffmpeg', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'm3u8 download detected but "%s" could not be run' % args[0] )
|
||||
return False
|
||||
|
||||
retval = subprocess.call(args)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'ffmpeg exited with code %d' % retval)
|
||||
return False
|
||||
|
||||
|
||||
def _do_download(self, filename, info_dict):
|
||||
url = info_dict['url']
|
||||
|
@ -354,6 +387,10 @@ class FileDownloader(object):
|
|||
if url.startswith('mms') or url.startswith('rtsp'):
|
||||
return self._download_with_mplayer(filename, url)
|
||||
|
||||
# m3u8 manifest are downloaded with ffmpeg
|
||||
if determine_ext(url) == u'm3u8':
|
||||
return self._download_m3u8_with_ffmpeg(filename, url)
|
||||
|
||||
tmpfilename = self.temp_name(filename)
|
||||
stream = None
|
||||
|
||||
|
|
|
@ -100,7 +100,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
|
|||
self._nopostoverwrites = nopostoverwrites
|
||||
|
||||
def get_audio_codec(self, path):
|
||||
if not self._exes['ffprobe'] and not self._exes['avprobe']: return None
|
||||
if not self._exes['ffprobe'] and not self._exes['avprobe']:
|
||||
raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
|
||||
try:
|
||||
cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))]
|
||||
handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
|
||||
|
@ -208,7 +209,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
|
|||
try:
|
||||
os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
|
||||
except:
|
||||
self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
|
||||
self._downloader.report_warning(u'Cannot update utime of audio file')
|
||||
|
||||
information['filepath'] = new_path
|
||||
return self._nopostoverwrites,information
|
||||
|
|
|
@ -261,7 +261,7 @@ class YoutubeDL(object):
|
|||
self.report_error(u'Erroneous output template')
|
||||
return None
|
||||
except ValueError as err:
|
||||
self.report_error(u'Insufficient system charset ' + repr(preferredencoding()))
|
||||
self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
|
||||
return None
|
||||
|
||||
def _match_entry(self, info_dict):
|
||||
|
@ -535,7 +535,7 @@ class YoutubeDL(object):
|
|||
try:
|
||||
success = self.fd._do_download(filename, info_dict)
|
||||
except (OSError, IOError) as err:
|
||||
raise UnavailableVideoError()
|
||||
raise UnavailableVideoError(err)
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self.report_error(u'unable to download video data: %s' % str(err))
|
||||
return
|
||||
|
@ -582,7 +582,7 @@ class YoutubeDL(object):
|
|||
# No clear decision yet, let IE decide
|
||||
keep_video = keep_video_wish
|
||||
except PostProcessingError as e:
|
||||
self.to_stderr(u'ERROR: ' + e.msg)
|
||||
self.report_error(e.msg)
|
||||
if keep_video is False and not self.params.get('keepvideo', False):
|
||||
try:
|
||||
self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
|
||||
|
|
|
@ -27,6 +27,7 @@ __authors__ = (
|
|||
'Johny Mo Swag',
|
||||
'Axel Noack',
|
||||
'Albert Kim',
|
||||
'Pierre Rudloff',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
|
@ -343,7 +344,7 @@ def parseOpts(overrideArguments=None):
|
|||
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
|
||||
systemConf = _readOptions('/etc/youtube-dl.conf')
|
||||
userConf = _readOptions(userConfFile)
|
||||
commandLineConf = sys.argv[1:]
|
||||
commandLineConf = sys.argv[1:]
|
||||
argv = systemConf + userConf + commandLineConf
|
||||
opts, args = parser.parse_args(argv)
|
||||
if opts.verbose:
|
||||
|
@ -377,7 +378,7 @@ def _real_main(argv=None):
|
|||
# Set user agent
|
||||
if opts.user_agent is not None:
|
||||
std_headers['User-Agent'] = opts.user_agent
|
||||
|
||||
|
||||
# Set referer
|
||||
if opts.referer is not None:
|
||||
std_headers['Referer'] = opts.referer
|
||||
|
@ -398,6 +399,8 @@ def _real_main(argv=None):
|
|||
batchurls = batchfd.readlines()
|
||||
batchurls = [x.strip() for x in batchurls]
|
||||
batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
|
||||
if opts.verbose:
|
||||
sys.stderr.write(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
|
||||
except IOError:
|
||||
sys.exit(u'ERROR: batch file could not be read')
|
||||
all_urls = batchurls + args
|
||||
|
|
|
@ -12,7 +12,7 @@ from .comedycentral import ComedyCentralIE
|
|||
from .condenast import CondeNastIE
|
||||
from .criterion import CriterionIE
|
||||
from .cspan import CSpanIE
|
||||
from .dailymotion import DailymotionIE
|
||||
from .dailymotion import DailymotionIE, DailymotionPlaylistIE
|
||||
from .depositfiles import DepositFilesIE
|
||||
from .dotsub import DotsubIE
|
||||
from .dreisat import DreiSatIE
|
||||
|
@ -36,23 +36,31 @@ from .ign import IGNIE, OneUPIE
|
|||
from .ina import InaIE
|
||||
from .infoq import InfoQIE
|
||||
from .instagram import InstagramIE
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
from .jukebox import JukeboxIE
|
||||
from .justintv import JustinTVIE
|
||||
from .kankan import KankanIE
|
||||
from .keek import KeekIE
|
||||
from .liveleak import LiveLeakIE
|
||||
from .livestream import LivestreamIE
|
||||
from .metacafe import MetacafeIE
|
||||
from .mixcloud import MixcloudIE
|
||||
from .mtv import MTVIE
|
||||
from .muzu import MuzuTVIE
|
||||
from .myspass import MySpassIE
|
||||
from .myvideo import MyVideoIE
|
||||
from .nba import NBAIE
|
||||
from .ooyala import OoyalaIE
|
||||
from .pbs import PBSIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .pornotube import PornotubeIE
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .redtube import RedTubeIE
|
||||
from .ringtv import RingTVIE
|
||||
from .roxwel import RoxwelIE
|
||||
from .rtlnow import RTLnowIE
|
||||
from .sina import SinaIE
|
||||
from .slashdot import SlashdotIE
|
||||
from .soundcloud import SoundcloudIE, SoundcloudSetIE
|
||||
from .spiegel import SpiegelIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
|
@ -67,10 +75,12 @@ from .tudou import TudouIE
|
|||
from .tumblr import TumblrIE
|
||||
from .tutv import TutvIE
|
||||
from .ustream import UstreamIE
|
||||
from .unistra import UnistraIE
|
||||
from .vbox7 import Vbox7IE
|
||||
from .veoh import VeohIE
|
||||
from .vevo import VevoIE
|
||||
from .vimeo import VimeoIE
|
||||
from .videofyme import VideofyMeIE
|
||||
from .vimeo import VimeoIE, VimeoChannelIE
|
||||
from .vine import VineIE
|
||||
from .c56 import C56IE
|
||||
from .wat import WatIE
|
||||
|
@ -92,6 +102,9 @@ from .youtube import (
|
|||
YoutubeChannelIE,
|
||||
YoutubeShowIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeRecommendedIE,
|
||||
YoutubeWatchLaterIE,
|
||||
YoutubeFavouritesIE,
|
||||
)
|
||||
from .zdf import ZDFIE
|
||||
|
||||
|
|
|
@ -17,13 +17,14 @@ class ArteTvIE(InfoExtractor):
|
|||
"""
|
||||
_EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
|
||||
_VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html'
|
||||
_LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
|
||||
_LIVE_URL = r'index-[0-9]+\.html$'
|
||||
|
||||
IE_NAME = u'arte.tv'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL))
|
||||
return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL, cls._LIVEWEB_URL))
|
||||
|
||||
# TODO implement Live Stream
|
||||
# from ..utils import compat_urllib_parse
|
||||
|
@ -68,6 +69,12 @@ class ArteTvIE(InfoExtractor):
|
|||
lang = mobj.group('lang')
|
||||
return self._extract_video(url, id, lang)
|
||||
|
||||
mobj = re.match(self._LIVEWEB_URL, url)
|
||||
if mobj is not None:
|
||||
name = mobj.group('name')
|
||||
lang = mobj.group('lang')
|
||||
return self._extract_liveweb(url, name, lang)
|
||||
|
||||
if re.search(self._LIVE_URL, video_id) is not None:
|
||||
raise ExtractorError(u'Arte live streams are not yet supported, sorry')
|
||||
# self.extractLiveStream(url)
|
||||
|
@ -85,7 +92,7 @@ class ArteTvIE(InfoExtractor):
|
|||
|
||||
info_dict = {'id': player_info['VID'],
|
||||
'title': player_info['VTI'],
|
||||
'description': player_info['VDE'],
|
||||
'description': player_info.get('VDE'),
|
||||
'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]),
|
||||
'thumbnail': player_info['programImage'],
|
||||
'ext': 'flv',
|
||||
|
@ -98,12 +105,14 @@ class ArteTvIE(InfoExtractor):
|
|||
l = 'F'
|
||||
elif lang == 'de':
|
||||
l = 'A'
|
||||
regexes = [r'VO?%s' % l, r'V%s-ST.' % l]
|
||||
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
|
||||
return any(re.match(r, f['versionCode']) for r in regexes)
|
||||
# Some formats may not be in the same language as the url
|
||||
formats = filter(_match_lang, formats)
|
||||
# We order the formats by quality
|
||||
formats = sorted(formats, key=lambda f: int(f['height']))
|
||||
# Prefer videos without subtitles in the same language
|
||||
formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f['versionCode']) is None)
|
||||
# Pick the best quality
|
||||
format_info = formats[-1]
|
||||
if format_info['mediaType'] == u'rtmp':
|
||||
|
@ -144,3 +153,22 @@ class ArteTvIE(InfoExtractor):
|
|||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
}
|
||||
|
||||
def _extract_liveweb(self, url, name, lang):
|
||||
"""Extract form http://liveweb.arte.tv/"""
|
||||
webpage = self._download_webpage(url, name)
|
||||
video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
|
||||
config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
|
||||
video_id, u'Downloading information')
|
||||
config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
||||
event_doc = config_doc.find('event')
|
||||
url_node = event_doc.find('video').find('urlHd')
|
||||
if url_node is None:
|
||||
url_node = video_doc.find('urlSd')
|
||||
|
||||
return {'id': video_id,
|
||||
'title': event_doc.find('name%s' % lang.capitalize()).text,
|
||||
'url': url_node.text.replace('MP4', 'mp4'),
|
||||
'ext': 'flv',
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
|
||||
|
||||
class BreakIE(InfoExtractor):
|
||||
|
@ -17,17 +19,20 @@ class BreakIE(InfoExtractor):
|
|||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1).split("-")[-1]
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = re.search(r"videoPath: '(.+?)',",webpage).group(1)
|
||||
key = re.search(r"icon: '(.+?)',",webpage).group(1)
|
||||
final_url = str(video_url)+"?"+str(key)
|
||||
thumbnail_url = re.search(r"thumbnailURL: '(.+?)'",webpage).group(1)
|
||||
title = re.search(r"sVidTitle: '(.+)',",webpage).group(1)
|
||||
ext = video_url.split('.')[-1]
|
||||
embed_url = 'http://www.break.com/embed/%s' % video_id
|
||||
webpage = self._download_webpage(embed_url, video_id)
|
||||
info_json = self._search_regex(r'var embedVars = ({.*?});', webpage,
|
||||
u'info json', flags=re.DOTALL)
|
||||
info = json.loads(info_json)
|
||||
video_url = info['videoUri']
|
||||
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)
|
||||
if m_youtube is not None:
|
||||
return self.url_result(m_youtube.group(1), 'Youtube')
|
||||
final_url = video_url + '?' + info['AuthToken']
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': final_url,
|
||||
'ext': ext,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail_url,
|
||||
'ext': determine_ext(final_url),
|
||||
'title': info['contentName'],
|
||||
'thumbnail': info['thumbUri'],
|
||||
}]
|
||||
|
|
|
@ -1,26 +1,36 @@
|
|||
import re
|
||||
import socket
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
determine_ext,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class CollegeHumorIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
|
||||
|
||||
def report_manifest(self, video_id):
|
||||
"""Report information extraction."""
|
||||
self.to_screen(u'%s: Downloading XML manifest' % video_id)
|
||||
_TESTS = [{
|
||||
u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
|
||||
u'file': u'6902724.mp4',
|
||||
u'md5': u'1264c12ad95dca142a9f0bf7968105a0',
|
||||
u'info_dict': {
|
||||
u'title': u'Comic-Con Cosplay Catastrophe',
|
||||
u'description': u'Fans get creative this year at San Diego. Too creative. And yes, that\'s really Joss Whedon.',
|
||||
},
|
||||
},
|
||||
{
|
||||
u'url': u'http://www.collegehumor.com/video/3505939/font-conference',
|
||||
u'file': u'3505939.mp4',
|
||||
u'md5': u'c51ca16b82bb456a4397987791a835f5',
|
||||
u'info_dict': {
|
||||
u'title': u'Font Conference',
|
||||
u'description': u'This video wasn\'t long enough, so we made it double-spaced.',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@ -36,39 +46,42 @@ class CollegeHumorIE(InfoExtractor):
|
|||
|
||||
self.report_extraction(video_id)
|
||||
xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
|
||||
try:
|
||||
metaXml = compat_urllib_request.urlopen(xmlUrl).read()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
|
||||
metaXml = self._download_webpage(xmlUrl, video_id,
|
||||
u'Downloading info XML',
|
||||
u'Unable to download video info XML')
|
||||
|
||||
mdoc = xml.etree.ElementTree.fromstring(metaXml)
|
||||
try:
|
||||
videoNode = mdoc.findall('./video')[0]
|
||||
youtubeIdNode = videoNode.find('./youtubeID')
|
||||
if youtubeIdNode is not None:
|
||||
return self.url_result(youtubeIdNode.text, 'Youtube')
|
||||
info['description'] = videoNode.findall('./description')[0].text
|
||||
info['title'] = videoNode.findall('./caption')[0].text
|
||||
info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
|
||||
manifest_url = videoNode.findall('./file')[0].text
|
||||
next_url = videoNode.findall('./file')[0].text
|
||||
except IndexError:
|
||||
raise ExtractorError(u'Invalid metadata XML file')
|
||||
|
||||
manifest_url += '?hdcore=2.10.3'
|
||||
self.report_manifest(video_id)
|
||||
try:
|
||||
manifestXml = compat_urllib_request.urlopen(manifest_url).read()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
|
||||
if next_url.endswith(u'manifest.f4m'):
|
||||
manifest_url = next_url + '?hdcore=2.10.3'
|
||||
manifestXml = self._download_webpage(manifest_url, video_id,
|
||||
u'Downloading XML manifest',
|
||||
u'Unable to download video info XML')
|
||||
|
||||
adoc = xml.etree.ElementTree.fromstring(manifestXml)
|
||||
try:
|
||||
media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
|
||||
node_id = media_node.attrib['url']
|
||||
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
|
||||
except IndexError as err:
|
||||
raise ExtractorError(u'Invalid manifest file')
|
||||
adoc = xml.etree.ElementTree.fromstring(manifestXml)
|
||||
try:
|
||||
media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
|
||||
node_id = media_node.attrib['url']
|
||||
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
|
||||
except IndexError as err:
|
||||
raise ExtractorError(u'Invalid manifest file')
|
||||
url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
|
||||
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
|
||||
info['ext'] = 'mp4'
|
||||
else:
|
||||
# Old-style direct links
|
||||
info['url'] = next_url
|
||||
info['ext'] = determine_ext(info['url'])
|
||||
|
||||
url_pr = compat_urllib_parse_urlparse(manifest_url)
|
||||
url = url_pr.scheme + '://' + url_pr.netloc + '/z' + video_id[:-2] + '/' + node_id + 'Seg1-Frag1'
|
||||
|
||||
info['url'] = url
|
||||
info['ext'] = 'f4f'
|
||||
return [info]
|
||||
return info
|
||||
|
|
|
@ -24,7 +24,9 @@ class ComedyCentralIE(InfoExtractor):
|
|||
(full-episodes/(?P<episode>.*)|
|
||||
(?P<clip>
|
||||
(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))))
|
||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))|
|
||||
(?P<interview>
|
||||
extended-interviews/(?P<interID>[0-9]+)/playlist_tds_extended_(?P<interview_title>.*?)/.*?)))
|
||||
$"""
|
||||
_TEST = {
|
||||
u'url': u'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart',
|
||||
|
@ -87,6 +89,9 @@ class ComedyCentralIE(InfoExtractor):
|
|||
else:
|
||||
epTitle = mobj.group('cntitle')
|
||||
dlNewest = False
|
||||
elif mobj.group('interview'):
|
||||
epTitle = mobj.group('interview_title')
|
||||
dlNewest = False
|
||||
else:
|
||||
dlNewest = not mobj.group('episode')
|
||||
if dlNewest:
|
||||
|
|
|
@ -78,7 +78,13 @@ class InfoExtractor(object):
|
|||
@classmethod
|
||||
def suitable(cls, url):
|
||||
"""Receives a URL and returns True if suitable for this IE."""
|
||||
return re.match(cls._VALID_URL, url) is not None
|
||||
|
||||
# This does not use has/getattr intentionally - we want to know whether
|
||||
# we have cached the regexp for *this* class, whereas getattr would also
|
||||
# match the superclass
|
||||
if '_VALID_URL_RE' not in cls.__dict__:
|
||||
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
||||
return cls._VALID_URL_RE.match(url) is not None
|
||||
|
||||
@classmethod
|
||||
def working(cls):
|
||||
|
|
|
@ -82,8 +82,8 @@ class DailymotionIE(DailyMotionSubtitlesIE):
|
|||
|
||||
# TODO: support choosing qualities
|
||||
|
||||
for key in ['stream_h264_hd1080_url', 'stream_h264_hd_url',
|
||||
'stream_h264_hq_url', 'stream_h264_url',
|
||||
for key in ['stream_h264_hd1080_url','stream_h264_hd_url',
|
||||
'stream_h264_hq_url','stream_h264_url',
|
||||
'stream_h264_ld_url']:
|
||||
if info.get(key): # key in info and info[key]:
|
||||
max_quality = key
|
||||
|
@ -116,3 +116,31 @@ class DailymotionIE(DailyMotionSubtitlesIE):
|
|||
'subtitles': video_subtitles,
|
||||
'thumbnail': info['thumbnail_url']
|
||||
}]
|
||||
|
||||
|
||||
class DailymotionPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
|
||||
_MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
video_ids = []
|
||||
|
||||
for pagenum in itertools.count(1):
|
||||
webpage = self._download_webpage('https://www.dailymotion.com/playlist/%s/%s' % (playlist_id, pagenum),
|
||||
playlist_id, u'Downloading page %s' % pagenum)
|
||||
|
||||
playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)
|
||||
video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el))
|
||||
|
||||
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
||||
break
|
||||
|
||||
entries = [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
|
||||
for video_id in video_ids]
|
||||
return {'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': get_element_by_id(u'playlist_name', webpage),
|
||||
'entries': entries,
|
||||
}
|
||||
|
|
|
@ -8,17 +8,30 @@ class ExfmIE(InfoExtractor):
|
|||
IE_NAME = u'exfm'
|
||||
IE_DESC = u'ex.fm'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)'
|
||||
_SOUNDCLOUD_URL_ = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
|
||||
_TEST = {
|
||||
u'url': u'http://ex.fm/song/1bgtzg',
|
||||
u'file': u'1bgtzg.mp3',
|
||||
u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf',
|
||||
u'info_dict': {
|
||||
u"title": u"We Can't Stop",
|
||||
u"uploader": u"Miley Cyrus",
|
||||
u'thumbnail': u'http://i1.sndcdn.com/artworks-000049666230-w9i7ef-t500x500.jpg?9d68d37'
|
||||
}
|
||||
}
|
||||
_SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
|
||||
_TESTS = [
|
||||
{
|
||||
u'url': u'http://ex.fm/song/1bgtzg',
|
||||
u'file': u'95223130.mp3',
|
||||
u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf',
|
||||
u'info_dict': {
|
||||
u"title": u"We Can't Stop - Miley Cyrus",
|
||||
u"uploader": u"Miley Cyrus",
|
||||
u'upload_date': u'20130603',
|
||||
u'description': u'Download "We Can\'t Stop" \r\niTunes: http://smarturl.it/WeCantStop?IQid=SC\r\nAmazon: http://smarturl.it/WeCantStopAMZ?IQid=SC',
|
||||
},
|
||||
u'note': u'Soundcloud song',
|
||||
},
|
||||
{
|
||||
u'url': u'http://ex.fm/song/wddt8',
|
||||
u'file': u'wddt8.mp3',
|
||||
u'md5': u'966bd70741ac5b8570d8e45bfaed3643',
|
||||
u'info_dict': {
|
||||
u'title': u'Safe and Sound',
|
||||
u'uploader': u'Capital Cities',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@ -26,11 +39,10 @@ class ExfmIE(InfoExtractor):
|
|||
info_url = "http://ex.fm/api/v3/song/%s" %(song_id)
|
||||
webpage = self._download_webpage(info_url, song_id)
|
||||
info = json.loads(webpage)
|
||||
song_url = re.match(self._SOUNDCLOUD_URL_,info['song']['url'])
|
||||
if song_url is not None:
|
||||
song_url = song_url.group() + "?client_id=b45b1aa10f1ac2941910a7f0d10f8e28"
|
||||
else:
|
||||
song_url = info['song']['url']
|
||||
song_url = info['song']['url']
|
||||
if re.match(self._SOUNDCLOUD_URL, song_url) is not None:
|
||||
self.to_screen('Soundcloud song detected')
|
||||
return self.url_result(song_url.replace('/stream',''), 'Soundcloud')
|
||||
return [{
|
||||
'id': song_id,
|
||||
'url': song_url,
|
||||
|
|
|
@ -21,17 +21,14 @@ class FunnyOrDieIE(InfoExtractor):
|
|||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._html_search_regex(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"',
|
||||
video_url = self._search_regex(r'type: "video/mp4", src: "(.*?)"',
|
||||
webpage, u'video URL', flags=re.DOTALL)
|
||||
|
||||
title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>",
|
||||
r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': title,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
||||
return [info]
|
||||
|
|
|
@ -107,8 +107,13 @@ class GenericIE(InfoExtractor):
|
|||
return new_url
|
||||
|
||||
def _real_extract(self, url):
|
||||
new_url = self._test_redirect(url)
|
||||
if new_url: return [self.url_result(new_url)]
|
||||
try:
|
||||
new_url = self._test_redirect(url)
|
||||
if new_url:
|
||||
return [self.url_result(new_url)]
|
||||
except compat_urllib_error.HTTPError:
|
||||
# This may be a stupid server that doesn't like HEAD, our UA, or so
|
||||
pass
|
||||
|
||||
video_id = url.split('/')[-1]
|
||||
try:
|
||||
|
@ -144,6 +149,9 @@ class GenericIE(InfoExtractor):
|
|||
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
|
||||
if m_video_type is not None:
|
||||
mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
|
||||
if mobj is None:
|
||||
# HTML5 video
|
||||
mobj = re.search(r'<video[^<]*>.*?<source .*?src="([^"]+)"', webpage, flags=re.DOTALL)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@ from .common import InfoExtractor
|
|||
|
||||
class InaIE(InfoExtractor):
|
||||
"""Information Extractor for Ina.fr"""
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I?[A-F0-9]+)/.*'
|
||||
_TEST = {
|
||||
u'url': u'www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
|
||||
u'file': u'I12055569.mp4',
|
||||
|
|
47
youtube_dl/extractor/jeuxvideo.py
Normal file
47
youtube_dl/extractor/jeuxvideo.py
Normal file
|
@ -0,0 +1,47 @@
|
|||
# coding: utf-8
|
||||
|
||||
import json
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
class JeuxVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
|
||||
u'file': u'5182.mp4',
|
||||
u'md5': u'e0fdb0cd3ce98713ef9c1e1e025779d0',
|
||||
u'info_dict': {
|
||||
u'title': u'GC 2013 : Tearaway nous présente ses papiers d\'identité',
|
||||
u'description': u'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = re.match(self._VALID_URL, url).group(1)
|
||||
webpage = self._download_webpage(url, title)
|
||||
m_download = re.search(r'<param name="flashvars" value="config=(.*?)" />', webpage)
|
||||
|
||||
xml_link = m_download.group(1)
|
||||
|
||||
id = re.search(r'http://www.jeuxvideo.com/config/\w+/0011/(.*?)/\d+_player\.xml', xml_link).group(1)
|
||||
|
||||
xml_config = self._download_webpage(xml_link, title,
|
||||
'Downloading XML config')
|
||||
config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8'))
|
||||
info = re.search(r'<format\.json>(.*?)</format\.json>',
|
||||
xml_config, re.MULTILINE|re.DOTALL).group(1)
|
||||
info = json.loads(info)['versions'][0]
|
||||
|
||||
video_url = 'http://video720.jeuxvideo.com/' + info['file']
|
||||
|
||||
return {'id': id,
|
||||
'title' : config.find('titre_video').text,
|
||||
'ext' : 'mp4',
|
||||
'url' : video_url,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': config.find('image').text,
|
||||
}
|
37
youtube_dl/extractor/kankan.py
Normal file
37
youtube_dl/extractor/kankan.py
Normal file
|
@ -0,0 +1,37 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
|
||||
|
||||
class KankanIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://yinyue.kankan.com/vod/48/48863.shtml',
|
||||
u'file': u'48863.flv',
|
||||
u'md5': u'29aca1e47ae68fc28804aca89f29507e',
|
||||
u'info_dict': {
|
||||
u'title': u'Ready To Go',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._search_regex(r'G_TITLE=[\'"](.+?)[\'"]', webpage, u'video title')
|
||||
gcid = self._search_regex(r'lurl:[\'"]http://.+?/.+?/(.+?)/', webpage, u'gcid')
|
||||
|
||||
video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid,
|
||||
video_id, u'Downloading video url info')
|
||||
ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip')
|
||||
path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path')
|
||||
video_url = 'http://%s%s' % (ip, path)
|
||||
|
||||
return {'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'ext': determine_ext(video_url),
|
||||
}
|
|
@ -4,10 +4,10 @@ from .common import InfoExtractor
|
|||
|
||||
|
||||
class KeekIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
|
||||
IE_NAME = u'keek'
|
||||
_TEST = {
|
||||
u'url': u'http://www.keek.com/ytdl/keeks/NODfbab',
|
||||
u'url': u'https://www.keek.com/ytdl/keeks/NODfbab',
|
||||
u'file': u'NODfbab.mp4',
|
||||
u'md5': u'9b0636f8c0f7614afa4ea5e4c6e57e83',
|
||||
u'info_dict': {
|
||||
|
|
64
youtube_dl/extractor/muzu.py
Normal file
64
youtube_dl/extractor/muzu.py
Normal file
|
@ -0,0 +1,64 @@
|
|||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class MuzuTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.muzu.tv/(.+?)/(.+?)/(?P<id>\d+)'
|
||||
IE_NAME = u'muzu.tv'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/',
|
||||
u'file': u'1981454.mp4',
|
||||
u'md5': u'98f8b2c7bc50578d6a0364fff2bfb000',
|
||||
u'info_dict': {
|
||||
u'title': u'Cat Walk (Original Mix)',
|
||||
u'description': u'md5:90e868994de201b2570e4e5854e19420',
|
||||
u'uploader': u'MarcAshken featuring SOS',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
info_data = compat_urllib_parse.urlencode({'format': 'json',
|
||||
'url': url,
|
||||
})
|
||||
video_info_page = self._download_webpage('http://www.muzu.tv/api/oembed/?%s' % info_data,
|
||||
video_id, u'Downloading video info')
|
||||
info = json.loads(video_info_page)
|
||||
|
||||
player_info_page = self._download_webpage('http://player.muzu.tv/player/playerInit?ai=%s' % video_id,
|
||||
video_id, u'Downloading player info')
|
||||
video_info = json.loads(player_info_page)['videos'][0]
|
||||
for quality in ['1080' , '720', '480', '360']:
|
||||
if video_info.get('v%s' % quality):
|
||||
break
|
||||
|
||||
data = compat_urllib_parse.urlencode({'ai': video_id,
|
||||
# Even if each time you watch a video the hash changes,
|
||||
# it seems to work for different videos, and it will work
|
||||
# even if you use any non empty string as a hash
|
||||
'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k',
|
||||
'device': 'web',
|
||||
'qv': quality,
|
||||
})
|
||||
video_url_page = self._download_webpage('http://player.muzu.tv/player/requestVideo?%s' % data,
|
||||
video_id, u'Downloading video url')
|
||||
video_url_info = json.loads(video_url_page)
|
||||
video_url = video_url_info['url']
|
||||
|
||||
return {'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': video_url,
|
||||
'ext': determine_ext(video_url),
|
||||
'thumbnail': info['thumbnail_url'],
|
||||
'description': info['description'],
|
||||
'uploader': info['author_name'],
|
||||
}
|
|
@ -2,11 +2,13 @@ import binascii
|
|||
import base64
|
||||
import hashlib
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_ord,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
@ -16,7 +18,7 @@ from ..utils import (
|
|||
class MyVideoIE(InfoExtractor):
|
||||
"""Information Extractor for myvideo.de."""
|
||||
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*'
|
||||
IE_NAME = u'myvideo'
|
||||
_TEST = {
|
||||
u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
|
||||
|
@ -85,6 +87,20 @@ class MyVideoIE(InfoExtractor):
|
|||
'ext': video_ext,
|
||||
}]
|
||||
|
||||
mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
|
||||
if mobj is not None:
|
||||
request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
|
||||
response = self._download_webpage(request, video_id,
|
||||
u'Downloading video info')
|
||||
info = json.loads(base64.b64decode(response).decode('utf-8'))
|
||||
return {'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
|
||||
'play_path': info['filename'],
|
||||
'ext': 'flv',
|
||||
'thumbnail': info['thumbnail'][0]['url'],
|
||||
}
|
||||
|
||||
# try encxml
|
||||
mobj = re.search('var flashvars={(.+?)}', webpage)
|
||||
if mobj is None:
|
||||
|
|
52
youtube_dl/extractor/ooyala.py
Normal file
52
youtube_dl/extractor/ooyala.py
Normal file
|
@ -0,0 +1,52 @@
|
|||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unescapeHTML
|
||||
|
||||
class OoyalaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://.+?\.ooyala\.com/.*?embedCode=(?P<id>.+?)(&|$)'
|
||||
|
||||
_TEST = {
|
||||
# From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
|
||||
u'url': u'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
|
||||
u'file': u'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8.mp4',
|
||||
u'md5': u'3f5cceb3a7bf461d6c29dc466cf8033c',
|
||||
u'info_dict': {
|
||||
u'title': u'Explaining Data Recovery from Hard Drives and SSDs',
|
||||
u'description': u'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
|
||||
},
|
||||
}
|
||||
|
||||
def _extract_result(self, info, more_info):
|
||||
return {'id': info['embedCode'],
|
||||
'ext': 'mp4',
|
||||
'title': unescapeHTML(info['title']),
|
||||
'url': info['url'],
|
||||
'description': unescapeHTML(more_info['description']),
|
||||
'thumbnail': more_info['promo'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
embedCode = mobj.group('id')
|
||||
player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode
|
||||
player = self._download_webpage(player_url, embedCode)
|
||||
mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
|
||||
player, u'mobile player url')
|
||||
mobile_player = self._download_webpage(mobile_url, embedCode)
|
||||
videos_info = self._search_regex(r'eval\("\((\[{.*?stream_redirect.*?}\])\)"\);', mobile_player, u'info').replace('\\"','"')
|
||||
videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"')
|
||||
videos_info = json.loads(videos_info)
|
||||
videos_more_info =json.loads(videos_more_info)
|
||||
|
||||
if videos_more_info.get('lineup'):
|
||||
videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
|
||||
return {'_type': 'playlist',
|
||||
'id': embedCode,
|
||||
'title': unescapeHTML(videos_more_info['title']),
|
||||
'entries': videos,
|
||||
}
|
||||
else:
|
||||
return self._extract_result(videos_info[0], videos_more_info)
|
||||
|
34
youtube_dl/extractor/pbs.py
Normal file
34
youtube_dl/extractor/pbs.py
Normal file
|
@ -0,0 +1,34 @@
|
|||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class PBSIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video.pbs.org/video/(?P<id>\d+)/?'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://video.pbs.org/video/2365006249/',
|
||||
u'file': u'2365006249.mp4',
|
||||
u'md5': 'ce1888486f0908d555a8093cac9a7362',
|
||||
u'info_dict': {
|
||||
u'title': u'A More Perfect Union',
|
||||
u'description': u'md5:ba0c207295339c8d6eced00b7c363c6a',
|
||||
u'duration': 3190,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
|
||||
info_page = self._download_webpage(info_url, video_id)
|
||||
info =json.loads(info_page)
|
||||
return {'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': info['alternate_encoding']['url'],
|
||||
'ext': 'mp4',
|
||||
'description': info['program'].get('description'),
|
||||
'thumbnail': info.get('image_url'),
|
||||
'duration': info.get('duration'),
|
||||
}
|
49
youtube_dl/extractor/roxwel.py
Normal file
49
youtube_dl/extractor/roxwel.py
Normal file
|
@ -0,0 +1,49 @@
|
|||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate, determine_ext
|
||||
|
||||
|
||||
class RoxwelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.roxwel.com/player/passionpittakeawalklive.html',
|
||||
u'file': u'passionpittakeawalklive.flv',
|
||||
u'md5': u'd9dea8360a1e7d485d2206db7fe13035',
|
||||
u'info_dict': {
|
||||
u'title': u'Take A Walk (live)',
|
||||
u'uploader': u'Passion Pit',
|
||||
u'description': u'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ',
|
||||
},
|
||||
u'skip': u'Requires rtmpdump',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
filename = mobj.group('filename')
|
||||
info_url = 'http://www.roxwel.com/api/videos/%s' % filename
|
||||
info_page = self._download_webpage(info_url, filename,
|
||||
u'Downloading video info')
|
||||
|
||||
self.report_extraction(filename)
|
||||
info = json.loads(info_page)
|
||||
rtmp_rates = sorted([int(r.replace( |