Merge branch 'ytdl-org:master' into df-bitchute-ovrhaul

This commit is contained in:
dirkf 2023-02-04 14:41:35 +00:00 committed by GitHub
commit 7fb0a87d7c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
57 changed files with 5902 additions and 806 deletions

View File

@ -15,12 +15,12 @@ jobs:
run-tests-ext: [sh]
include:
# python 3.2 is only available on windows via setup-python
- os: windows-latest
- os: windows-2019
python-version: 3.2
python-impl: cpython
ytdl-test-set: core
run-tests-ext: bat
- os: windows-latest
- os: windows-2019
python-version: 3.2
python-impl: cpython
ytdl-test-set: download

View File

@ -632,7 +632,7 @@ To use percent literals in an output template use `%%`. To output to stdout use
The current default template is `%(title)s-%(id)s.%(ext)s`.
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title.
#### Output template and Windows batch files

View File

@ -128,6 +128,12 @@ def expect_value(self, got, expected, field):
self.assertTrue(
contains_str in got,
'field %s (value: %r) should contain %r' % (field, got, contains_str))
elif isinstance(expected, compat_str) and re.match(r'^lambda \w+:', expected):
fn = eval(expected)
suite = expected.split(':', 1)[1].strip()
self.assertTrue(
fn(got),
'Expected field %s to meet condition %s, but value %r failed ' % (field, suite, got))
elif isinstance(expected, type):
self.assertTrue(
isinstance(got, expected),
@ -137,7 +143,7 @@ def expect_value(self, got, expected, field):
elif isinstance(expected, list) and isinstance(got, list):
self.assertEqual(
len(expected), len(got),
'Expect a list of length %d, but got a list of length %d for field %s' % (
'Expected a list of length %d, but got a list of length %d for field %s' % (
len(expected), len(got), field))
for index, (item_got, item_expected) in enumerate(zip(got, expected)):
type_got = type(item_got)

View File

@ -8,7 +8,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text
from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text, aes_ecb_encrypt
from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes
import base64
@ -58,6 +58,13 @@ class TestAES(unittest.TestCase):
decrypted = (aes_decrypt_text(encrypted, password, 32))
self.assertEqual(decrypted, self.secret_msg)
def test_ecb_encrypt(self):
data = bytes_to_intlist(self.secret_msg)
encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key))
self.assertEqual(
encrypted,
b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:')
if __name__ == '__main__':
unittest.main()

View File

@ -3,17 +3,18 @@
from __future__ import unicode_literals
import shutil
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import shutil
from test.helper import FakeYDL
from youtube_dl.cache import Cache
from youtube_dl.utils import version_tuple
from youtube_dl.version import __version__
def _is_empty(d):
@ -54,6 +55,17 @@ class TestCache(unittest.TestCase):
self.assertFalse(os.path.exists(self.test_dir))
self.assertEqual(c.load('test_cache', 'k.'), None)
def test_cache_validation(self):
ydl = FakeYDL({
'cachedir': self.test_dir,
})
c = Cache(ydl)
obj = {'x': 1, 'y': ['ä', '\\a', True]}
c.store('test_cache', 'k.', obj)
self.assertEqual(c.load('test_cache', 'k.', min_ver='1970.01.01'), obj)
new_version = '.'.join(('%d' % ((v + 1) if i == 0 else v, )) for i, v in enumerate(version_tuple(__version__)))
self.assertIs(c.load('test_cache', 'k.', min_ver=new_version), None)
if __name__ == '__main__':
unittest.main()

View File

@ -11,6 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.compat import (
compat_casefold,
compat_getenv,
compat_setenv,
compat_etree_Element,
@ -118,9 +119,21 @@ class TestCompat(unittest.TestCase):
<smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>'''
compat_etree_fromstring(xml)
def test_struct_unpack(self):
def test_compat_struct_unpack(self):
self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))
def test_compat_casefold(self):
if hasattr(compat_str, 'casefold'):
# don't bother to test str.casefold() (again)
return
# thanks https://bugs.python.org/file24232/casefolding.patch
self.assertEqual(compat_casefold('hello'), 'hello')
self.assertEqual(compat_casefold('hELlo'), 'hello')
self.assertEqual(compat_casefold('ß'), 'ss')
self.assertEqual(compat_casefold(''), 'fi')
self.assertEqual(compat_casefold('\u03a3'), '\u03c3')
self.assertEqual(compat_casefold('A\u0345\u03a3'), 'a\u03b9\u03c3')
if __name__ == '__main__':
unittest.main()

View File

@ -33,6 +33,7 @@ from youtube_dl.compat import (
from youtube_dl.utils import (
DownloadError,
ExtractorError,
error_to_compat_str,
format_bytes,
UnavailableVideoError,
)
@ -100,22 +101,22 @@ def generator(test_case, tname):
def print_skipping(reason):
print('Skipping %s: %s' % (test_case['name'], reason))
self.skipTest(reason)
if not ie.working():
print_skipping('IE marked as not _WORKING')
return
for tc in test_cases:
info_dict = tc.get('info_dict', {})
if not (info_dict.get('id') and info_dict.get('ext')):
raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
raise Exception('Test definition (%s) requires both \'id\' and \'ext\' keys present to define the output file' % (tname, ))
if 'skip' in test_case:
print_skipping(test_case['skip'])
return
for other_ie in other_ies:
if not other_ie.working():
print_skipping('test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
return
params = get_params(test_case.get('params', {}))
params['outtmpl'] = tname + '_' + params['outtmpl']
@ -161,7 +162,9 @@ def generator(test_case, tname):
except (DownloadError, ExtractorError) as err:
# Check if the exception is not a network related one
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
raise
msg = getattr(err, 'msg', error_to_compat_str(err))
err.msg = '%s (%s)' % (msg, tname, )
raise err
if try_num == RETRIES:
report_warning('%s failed due to network errors, skipping...' % tname)

View File

@ -8,7 +8,10 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.jsinterp import JSInterpreter
import math
import re
from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
class TestJSInterpreter(unittest.TestCase):
@ -19,6 +22,9 @@ class TestJSInterpreter(unittest.TestCase):
jsi = JSInterpreter('function x3(){return 42;}')
self.assertEqual(jsi.call_function('x3'), 42)
jsi = JSInterpreter('function x3(){42}')
self.assertEqual(jsi.call_function('x3'), None)
jsi = JSInterpreter('var x5 = function(){return 42;}')
self.assertEqual(jsi.call_function('x5'), 42)
@ -45,14 +51,32 @@ class TestJSInterpreter(unittest.TestCase):
jsi = JSInterpreter('function f(){return 1 << 5;}')
self.assertEqual(jsi.call_function('f'), 32)
jsi = JSInterpreter('function f(){return 2 ** 5}')
self.assertEqual(jsi.call_function('f'), 32)
jsi = JSInterpreter('function f(){return 19 & 21;}')
self.assertEqual(jsi.call_function('f'), 17)
jsi = JSInterpreter('function f(){return 11 >> 2;}')
self.assertEqual(jsi.call_function('f'), 2)
jsi = JSInterpreter('function f(){return []? 2+3: 4;}')
self.assertEqual(jsi.call_function('f'), 5)
jsi = JSInterpreter('function f(){return 1 == 2}')
self.assertEqual(jsi.call_function('f'), False)
jsi = JSInterpreter('function f(){return 0 && 1 || 2;}')
self.assertEqual(jsi.call_function('f'), 2)
jsi = JSInterpreter('function f(){return 0 ?? 42;}')
self.assertEqual(jsi.call_function('f'), 0)
jsi = JSInterpreter('function f(){return "life, the universe and everything" < 42;}')
self.assertFalse(jsi.call_function('f'))
def test_array_access(self):
jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2] = 7; return x;}')
jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}')
self.assertEqual(jsi.call_function('f'), [5, 2, 7])
def test_parens(self):
@ -62,6 +86,10 @@ class TestJSInterpreter(unittest.TestCase):
jsi = JSInterpreter('function f(){return (1 + 2) * 3;}')
self.assertEqual(jsi.call_function('f'), 9)
def test_quotes(self):
jsi = JSInterpreter(r'function f(){return "a\"\\("}')
self.assertEqual(jsi.call_function('f'), r'a"\(')
def test_assignments(self):
jsi = JSInterpreter('function f(){var x = 20; x = 30 + 1; return x;}')
self.assertEqual(jsi.call_function('f'), 31)
@ -104,18 +132,100 @@ class TestJSInterpreter(unittest.TestCase):
}''')
self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
def test_builtins(self):
jsi = JSInterpreter('''
function x() { return NaN }
''')
self.assertTrue(math.isnan(jsi.call_function('x')))
def test_Date(self):
jsi = JSInterpreter('''
function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; }
''')
self.assertEqual(jsi.call_function('x'), 86000)
jsi = JSInterpreter('''
function x(dt) { return new Date(dt) - 0; }
''')
self.assertEqual(jsi.call_function('x', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000)
# date format m/d/y
jsi = JSInterpreter('''
function x() { return new Date('12/31/1969 18:01:26 MDT') - 0; }
''')
self.assertEqual(jsi.call_function('x'), 86000)
def test_call(self):
jsi = JSInterpreter('''
function x() { return 2; }
function y(a) { return x() + a; }
function y(a) { return x() + (a?a:0); }
function z() { return y(3); }
''')
self.assertEqual(jsi.call_function('z'), 5)
self.assertEqual(jsi.call_function('y'), 2)
def test_if(self):
jsi = JSInterpreter('''
function x() {
let a = 9;
if (0==0) {a++}
return a
}''')
self.assertEqual(jsi.call_function('x'), 10)
jsi = JSInterpreter('''
function x() {
if (0==0) {return 10}
}''')
self.assertEqual(jsi.call_function('x'), 10)
jsi = JSInterpreter('''
function x() {
if (0!=0) {return 1}
else {return 10}
}''')
self.assertEqual(jsi.call_function('x'), 10)
""" # Unsupported
jsi = JSInterpreter('''
function x() {
if (0!=0) return 1;
else {return 10}
}''')
self.assertEqual(jsi.call_function('x'), 10)
"""
def test_elseif(self):
jsi = JSInterpreter('''
function x() {
if (0!=0) {return 1}
else if (1==0) {return 2}
else {return 10}
}''')
self.assertEqual(jsi.call_function('x'), 10)
""" # Unsupported
jsi = JSInterpreter('''
function x() {
if (0!=0) return 1;
else if (1==0) {return 2}
else {return 10}
}''')
self.assertEqual(jsi.call_function('x'), 10)
# etc
"""
def test_for_loop(self):
# function x() { a=0; for (i=0; i-10; i++) {a++} a }
jsi = JSInterpreter('''
function x() { a=0; for (i=0; i-10; i = i + 1) {a++} a }
function x() { a=0; for (i=0; i-10; i++) {a++} return a }
''')
self.assertEqual(jsi.call_function('x'), 10)
def test_while_loop(self):
# function x() { a=0; while (a<10) {a++} a }
jsi = JSInterpreter('''
function x() { a=0; while (a<10) {a++} return a }
''')
self.assertEqual(jsi.call_function('x'), 10)
@ -154,21 +264,53 @@ class TestJSInterpreter(unittest.TestCase):
''')
self.assertEqual(jsi.call_function('x'), 10)
def test_catch(self):
jsi = JSInterpreter('''
function x() { try{throw 10} catch(e){return 5} }
''')
self.assertEqual(jsi.call_function('x'), 5)
def test_finally(self):
jsi = JSInterpreter('''
function x() { try{throw 10} finally {return 42} }
''')
self.assertEqual(jsi.call_function('x'), 42)
jsi = JSInterpreter('''
function x() { try{throw 10} catch(e){return 5} finally {return 42} }
''')
self.assertEqual(jsi.call_function('x'), 42)
def test_nested_try(self):
jsi = JSInterpreter('''
function x() {try {
try{throw 10} finally {throw 42}
} catch(e){return 5} }
''')
self.assertEqual(jsi.call_function('x'), 5)
def test_for_loop_continue(self):
jsi = JSInterpreter('''
function x() { a=0; for (i=0; i-10; i++) { continue; a++ } a }
function x() { a=0; for (i=0; i-10; i++) { continue; a++ } return a }
''')
self.assertEqual(jsi.call_function('x'), 0)
def test_for_loop_break(self):
jsi = JSInterpreter('''
function x() { a=0; for (i=0; i-10; i++) { break; a++ } a }
function x() { a=0; for (i=0; i-10; i++) { break; a++ } return a }
''')
self.assertEqual(jsi.call_function('x'), 0)
def test_for_loop_try(self):
jsi = JSInterpreter('''
function x() {
for (i=0; i-10; i++) { try { if (i == 5) throw i} catch {return 10} finally {break} };
return 42 }
''')
self.assertEqual(jsi.call_function('x'), 42)
def test_literal_list(self):
jsi = JSInterpreter('''
function x() { [1, 2, "asdf", [5, 6, 7]][3] }
function x() { return [1, 2, "asdf", [5, 6, 7]][3] }
''')
self.assertEqual(jsi.call_function('x'), [5, 6, 7])
@ -177,6 +319,171 @@ class TestJSInterpreter(unittest.TestCase):
function x() { a=5; a -= 1, a+=3; return a }
''')
self.assertEqual(jsi.call_function('x'), 7)
jsi = JSInterpreter('''
function x() { a=5; return (a -= 1, a+=3, a); }
''')
self.assertEqual(jsi.call_function('x'), 7)
jsi = JSInterpreter('''
function x() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }
''')
self.assertEqual(jsi.call_function('x'), 5)
def test_void(self):
jsi = JSInterpreter('''
function x() { return void 42; }
''')
self.assertEqual(jsi.call_function('x'), None)
def test_return_function(self):
jsi = JSInterpreter('''
function x() { return [1, function(){return 1}][1] }
''')
self.assertEqual(jsi.call_function('x')([]), 1)
def test_null(self):
jsi = JSInterpreter('''
function x() { return null; }
''')
self.assertIs(jsi.call_function('x'), None)
jsi = JSInterpreter('''
function x() { return [null > 0, null < 0, null == 0, null === 0]; }
''')
self.assertEqual(jsi.call_function('x'), [False, False, False, False])
jsi = JSInterpreter('''
function x() { return [null >= 0, null <= 0]; }
''')
self.assertEqual(jsi.call_function('x'), [True, True])
def test_undefined(self):
jsi = JSInterpreter('''
function x() { return undefined === undefined; }
''')
self.assertTrue(jsi.call_function('x'))
jsi = JSInterpreter('''
function x() { return undefined; }
''')
self.assertIs(jsi.call_function('x'), JS_Undefined)
jsi = JSInterpreter('''
function x() { let v; return v; }
''')
self.assertIs(jsi.call_function('x'), JS_Undefined)
jsi = JSInterpreter('''
function x() { return [undefined === undefined, undefined == undefined, undefined < undefined, undefined > undefined]; }
''')
self.assertEqual(jsi.call_function('x'), [True, True, False, False])
jsi = JSInterpreter('''
function x() { return [undefined === 0, undefined == 0, undefined < 0, undefined > 0]; }
''')
self.assertEqual(jsi.call_function('x'), [False, False, False, False])
jsi = JSInterpreter('''
function x() { return [undefined >= 0, undefined <= 0]; }
''')
self.assertEqual(jsi.call_function('x'), [False, False])
jsi = JSInterpreter('''
function x() { return [undefined > null, undefined < null, undefined == null, undefined === null]; }
''')
self.assertEqual(jsi.call_function('x'), [False, False, True, False])
jsi = JSInterpreter('''
function x() { return [undefined === null, undefined == null, undefined < null, undefined > null]; }
''')
self.assertEqual(jsi.call_function('x'), [False, True, False, False])
jsi = JSInterpreter('''
function x() { let v; return [42+v, v+42, v**42, 42**v, 0**v]; }
''')
for y in jsi.call_function('x'):
self.assertTrue(math.isnan(y))
jsi = JSInterpreter('''
function x() { let v; return v**0; }
''')
self.assertEqual(jsi.call_function('x'), 1)
jsi = JSInterpreter('''
function x() { let v; return [v>42, v<=42, v&&42, 42&&v]; }
''')
self.assertEqual(jsi.call_function('x'), [False, False, JS_Undefined, JS_Undefined])
jsi = JSInterpreter('function x(){return undefined ?? 42; }')
self.assertEqual(jsi.call_function('x'), 42)
def test_object(self):
jsi = JSInterpreter('''
function x() { return {}; }
''')
self.assertEqual(jsi.call_function('x'), {})
jsi = JSInterpreter('''
function x() { let a = {m1: 42, m2: 0 }; return [a["m1"], a.m2]; }
''')
self.assertEqual(jsi.call_function('x'), [42, 0])
jsi = JSInterpreter('''
function x() { let a; return a?.qq; }
''')
self.assertIs(jsi.call_function('x'), JS_Undefined)
jsi = JSInterpreter('''
function x() { let a = {m1: 42, m2: 0 }; return a?.qq; }
''')
self.assertIs(jsi.call_function('x'), JS_Undefined)
def test_regex(self):
jsi = JSInterpreter('''
function x() { let a=/,,[/,913,/](,)}/; }
''')
self.assertIs(jsi.call_function('x'), None)
jsi = JSInterpreter('''
function x() { let a=/,,[/,913,/](,)}/; return a; }
''')
attrs = set(('findall', 'finditer', 'flags', 'groupindex',
'groups', 'match', 'pattern', 'scanner',
'search', 'split', 'sub', 'subn'))
self.assertTrue(set(dir(jsi.call_function('x'))) > attrs)
jsi = JSInterpreter('''
function x() { let a=/,,[/,913,/](,)}/i; return a; }
''')
self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
jsi = JSInterpreter(r'''
function x() { let a=[/[)\\]/]; return a[0]; }
''')
self.assertEqual(jsi.call_function('x').pattern, r'[)\\]')
""" # fails
jsi = JSInterpreter(r'''
function x() { let a=100; a/=/[0-9]+/.exec('divide by 20 today')[0]; }
''')
self.assertEqual(jsi.call_function('x'), 5)
"""
def test_char_code_at(self):
jsi = JSInterpreter('function x(i){return "test".charCodeAt(i)}')
self.assertEqual(jsi.call_function('x', 0), 116)
self.assertEqual(jsi.call_function('x', 1), 101)
self.assertEqual(jsi.call_function('x', 2), 115)
self.assertEqual(jsi.call_function('x', 3), 116)
self.assertEqual(jsi.call_function('x', 4), None)
self.assertEqual(jsi.call_function('x', 'not_a_number'), 116)
def test_bitwise_operators_overflow(self):
jsi = JSInterpreter('function x(){return -524999584 << 5}')
self.assertEqual(jsi.call_function('x'), 379882496)
jsi = JSInterpreter('function x(){return 1236566549 << 5}')
self.assertEqual(jsi.call_function('x'), 915423904)
if __name__ == '__main__':

View File

@ -38,6 +38,9 @@ class BaseTestSubtitles(unittest.TestCase):
self.DL = FakeYDL()
self.ie = self.IE()
self.DL.add_info_extractor(self.ie)
if not self.IE.working():
print('Skipping: %s marked as not _WORKING' % self.IE.ie_key())
self.skipTest('IE marked as not _WORKING')
def getInfoDict(self):
info_dict = self.DL.extract_info(self.url, download=False)
@ -56,6 +59,21 @@ class BaseTestSubtitles(unittest.TestCase):
class TestYoutubeSubtitles(BaseTestSubtitles):
# Available subtitles for QRS8MkLhQmM:
# Language formats
# ru vtt, ttml, srv3, srv2, srv1, json3
# fr vtt, ttml, srv3, srv2, srv1, json3
# en vtt, ttml, srv3, srv2, srv1, json3
# nl vtt, ttml, srv3, srv2, srv1, json3
# de vtt, ttml, srv3, srv2, srv1, json3
# ko vtt, ttml, srv3, srv2, srv1, json3
# it vtt, ttml, srv3, srv2, srv1, json3
# zh-Hant vtt, ttml, srv3, srv2, srv1, json3
# hi vtt, ttml, srv3, srv2, srv1, json3
# pt-BR vtt, ttml, srv3, srv2, srv1, json3
# es-MX vtt, ttml, srv3, srv2, srv1, json3
# ja vtt, ttml, srv3, srv2, srv1, json3
# pl vtt, ttml, srv3, srv2, srv1, json3
url = 'QRS8MkLhQmM'
IE = YoutubeIE
@ -64,41 +82,60 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 13)
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
self.assertEqual(md5(subtitles['it']), '6d752b98c31f1cf8d597050c7a2cb4b5')
self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d')
self.assertEqual(md5(subtitles['it']), '0e0b667ba68411d88fd1c5f4f4eab2f9')
for lang in ['fr', 'de']:
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
def test_youtube_subtitles_ttml_format(self):
def _test_subtitles_format(self, fmt, md5_hash, lang='en'):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'ttml'
self.DL.params['subtitlesformat'] = fmt
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), 'e306f8c42842f723447d9f63ad65df54')
self.assertEqual(md5(subtitles[lang]), md5_hash)
def test_youtube_subtitles_ttml_format(self):
self._test_subtitles_format('ttml', 'c97ddf1217390906fa9fbd34901f3da2')
def test_youtube_subtitles_vtt_format(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'vtt'
self._test_subtitles_format('vtt', 'ae1bd34126571a77aabd4d276b28044d')
def test_youtube_subtitles_json3_format(self):
self._test_subtitles_format('json3', '688dd1ce0981683867e7fe6fde2a224b')
def _test_automatic_captions(self, url, lang):
self.url = url
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslangs'] = [lang]
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
self.assertTrue(subtitles[lang] is not None)
def test_youtube_automatic_captions(self):
self.url = '8YoUxe5ncPo'
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslangs'] = ['it']
subtitles = self.getSubtitles()
self.assertTrue(subtitles['it'] is not None)
# Available automatic captions for 8YoUxe5ncPo:
# Language formats (all in vtt, ttml, srv3, srv2, srv1, json3)
# gu, zh-Hans, zh-Hant, gd, ga, gl, lb, la, lo, tt, tr,
# lv, lt, tk, th, tg, te, fil, haw, yi, ceb, yo, de, da,
# el, eo, en, eu, et, es, ru, rw, ro, bn, be, bg, uk, jv,
# bs, ja, or, xh, co, ca, cy, cs, ps, pt, pa, vi, pl, hy,
# hr, ht, hu, hmn, hi, ha, mg, uz, ml, mn, mi, mk, ur,
# mt, ms, mr, ug, ta, my, af, sw, is, am,
# *it*, iw, sv, ar,
# su, zu, az, id, ig, nl, no, ne, ny, fr, ku, fy, fa, fi,
# ka, kk, sr, sq, ko, kn, km, st, sk, si, so, sn, sm, sl,
# ky, sd
# ...
self._test_automatic_captions('8YoUxe5ncPo', 'it')
@unittest.skip('ASR subs all in all supported langs now')
def test_youtube_translated_subtitles(self):
# This video has a subtitles track, which can be translated
self.url = 'Ky9eprVWzlI'
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslangs'] = ['it']
subtitles = self.getSubtitles()
self.assertTrue(subtitles['it'] is not None)
# This video has a subtitles track, which can be translated (#4555)
self._test_automatic_captions('Ky9eprVWzlI', 'it')
def test_youtube_nosubtitles(self):
self.DL.expect_warning('video doesn\'t have subtitles')
self.url = 'n5BB19UTcdA'
# Available automatic captions for 8YoUxe5ncPo:
# ...
# 8YoUxe5ncPo has no subtitles
self.url = '8YoUxe5ncPo'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
@ -128,6 +165,7 @@ class TestDailymotionSubtitles(BaseTestSubtitles):
self.assertFalse(subtitles)
@unittest.skip('IE broken')
class TestTedSubtitles(BaseTestSubtitles):
url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
IE = TEDIE
@ -152,18 +190,19 @@ class TestVimeoSubtitles(BaseTestSubtitles):
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
self.assertEqual(md5(subtitles['en']), '386cbc9320b94e25cb364b97935e5dd1')
self.assertEqual(md5(subtitles['fr']), 'c9b69eef35bc6641c0d4da8a04f9dfac')
def test_nosubtitles(self):
self.DL.expect_warning('video doesn\'t have subtitles')
self.url = 'http://vimeo.com/56015672'
self.url = 'http://vimeo.com/68093876'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertFalse(subtitles)
@unittest.skip('IE broken')
class TestWallaSubtitles(BaseTestSubtitles):
url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
IE = WallaIE
@ -185,6 +224,7 @@ class TestWallaSubtitles(BaseTestSubtitles):
self.assertFalse(subtitles)
@unittest.skip('IE broken')
class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
IE = CeskaTelevizeIE
@ -206,6 +246,7 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
self.assertFalse(subtitles)
@unittest.skip('IE broken')
class TestLyndaSubtitles(BaseTestSubtitles):
url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
IE = LyndaIE
@ -218,6 +259,7 @@ class TestLyndaSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
@unittest.skip('IE broken')
class TestNPOSubtitles(BaseTestSubtitles):
url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
IE = NPOIE
@ -230,6 +272,7 @@ class TestNPOSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
@unittest.skip('IE broken')
class TestMTVSubtitles(BaseTestSubtitles):
url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans'
IE = ComedyCentralIE
@ -253,8 +296,8 @@ class TestNRKSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['no']))
self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
self.assertEqual(set(subtitles.keys()), set(['nb-ttv']))
self.assertEqual(md5(subtitles['nb-ttv']), '67e06ff02d0deaf975e68f6cb8f6a149')
class TestRaiPlaySubtitles(BaseTestSubtitles):
@ -277,6 +320,7 @@ class TestRaiPlaySubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd')
@unittest.skip('IE broken - DRM only')
class TestVikiSubtitles(BaseTestSubtitles):
url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
IE = VikiIE
@ -303,6 +347,7 @@ class TestThePlatformSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
@unittest.skip('IE broken')
class TestThePlatformFeedSubtitles(BaseTestSubtitles):
url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
IE = ThePlatformFeedIE
@ -338,7 +383,7 @@ class TestDemocracynowSubtitles(BaseTestSubtitles):
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['en']))
self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045')
def test_subtitles_in_page(self):
self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree'
@ -346,7 +391,7 @@ class TestDemocracynowSubtitles(BaseTestSubtitles):
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['en']))
self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045')
if __name__ == '__main__':

View File

@ -12,7 +12,9 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# Various small unit tests
import io
import itertools
import json
import re
import xml.etree.ElementTree
from youtube_dl.utils import (
@ -40,11 +42,14 @@ from youtube_dl.utils import (
get_element_by_attribute,
get_elements_by_class,
get_elements_by_attribute,
get_first,
InAdvancePagedList,
int_or_none,
intlist_to_bytes,
is_html,
join_nonempty,
js_to_json,
LazyList,
limit_length,
merge_dicts,
mimetype2ext,
@ -79,6 +84,8 @@ from youtube_dl.utils import (
strip_or_none,
subtitles_filename,
timeconvert,
traverse_obj,
try_call,
unescapeHTML,
unified_strdate,
unified_timestamp,
@ -92,6 +99,7 @@ from youtube_dl.utils import (
urlencode_postdata,
urshift,
update_url_query,
variadic,
version_tuple,
xpath_with_ns,
xpath_element,
@ -112,12 +120,18 @@ from youtube_dl.compat import (
compat_getenv,
compat_os_name,
compat_setenv,
compat_str,
compat_urlparse,
compat_parse_qs,
)
class TestUtil(unittest.TestCase):
# yt-dlp shim
def assertCountEqual(self, expected, got, msg='count should be the same'):
return self.assertEqual(len(tuple(expected)), len(tuple(got)), msg=msg)
def test_timeconvert(self):
self.assertTrue(timeconvert('') is None)
self.assertTrue(timeconvert('bougrg') is None)
@ -370,6 +384,9 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363)
self.assertEqual(unified_timestamp('December 31 1969 20:00:01 EDT'), 1)
self.assertEqual(unified_timestamp('Wednesday 31 December 1969 18:01:26 MDT'), 86)
self.assertEqual(unified_timestamp('12/31/1969 20:01:18 EDT', False), 78)
def test_determine_ext(self):
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
@ -1475,6 +1492,315 @@ Line 1
self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')
def test_LazyList(self):
it = list(range(10))
self.assertEqual(list(LazyList(it)), it)
self.assertEqual(LazyList(it).exhaust(), it)
self.assertEqual(LazyList(it)[5], it[5])
self.assertEqual(LazyList(it)[5:], it[5:])
self.assertEqual(LazyList(it)[:5], it[:5])
self.assertEqual(LazyList(it)[::2], it[::2])
self.assertEqual(LazyList(it)[1::2], it[1::2])
self.assertEqual(LazyList(it)[5::-1], it[5::-1])
self.assertEqual(LazyList(it)[6:2:-2], it[6:2:-2])
self.assertEqual(LazyList(it)[::-1], it[::-1])
self.assertTrue(LazyList(it))
self.assertFalse(LazyList(range(0)))
self.assertEqual(len(LazyList(it)), len(it))
self.assertEqual(repr(LazyList(it)), repr(it))
self.assertEqual(compat_str(LazyList(it)), compat_str(it))
self.assertEqual(list(LazyList(it, reverse=True)), it[::-1])
self.assertEqual(list(reversed(LazyList(it))[::-1]), it)
self.assertEqual(list(reversed(LazyList(it))[1:3:7]), it[::-1][1:3:7])
def test_LazyList_laziness(self):
def test(ll, idx, val, cache):
self.assertEqual(ll[idx], val)
self.assertEqual(ll._cache, list(cache))
ll = LazyList(range(10))
test(ll, 0, 0, range(1))
test(ll, 5, 5, range(6))
test(ll, -3, 7, range(10))
ll = LazyList(range(10), reverse=True)
test(ll, -1, 0, range(1))
test(ll, 3, 6, range(10))
ll = LazyList(itertools.count())
test(ll, 10, 10, range(11))
ll = reversed(ll)
test(ll, -15, 14, range(15))
def test_try_call(self):
def total(*x, **kwargs):
return sum(x) + sum(kwargs.values())
self.assertEqual(try_call(None), None,
msg='not a fn should give None')
self.assertEqual(try_call(lambda: 1), 1,
msg='int fn with no expected_type should give int')
self.assertEqual(try_call(lambda: 1, expected_type=int), 1,
msg='int fn with expected_type int should give int')
self.assertEqual(try_call(lambda: 1, expected_type=dict), None,
msg='int fn with wrong expected_type should give None')
self.assertEqual(try_call(total, args=(0, 1, 0, ), expected_type=int), 1,
msg='fn should accept arglist')
self.assertEqual(try_call(total, kwargs={'a': 0, 'b': 1, 'c': 0}, expected_type=int), 1,
msg='fn should accept kwargs')
self.assertEqual(try_call(lambda: 1, expected_type=dict), None,
msg='int fn with no expected_type should give None')
self.assertEqual(try_call(lambda x: {}, total, args=(42, ), expected_type=int), 42,
msg='expect first int result with expected_type int')
def test_variadic(self):
self.assertEqual(variadic(None), (None, ))
self.assertEqual(variadic('spam'), ('spam', ))
self.assertEqual(variadic('spam', allowed_types=dict), 'spam')
def test_traverse_obj(self):
_TEST_DATA = {
100: 100,
1.2: 1.2,
'str': 'str',
'None': None,
'...': Ellipsis,
'urls': [
{'index': 0, 'url': 'https://www.example.com/0'},
{'index': 1, 'url': 'https://www.example.com/1'},
],
'data': (
{'index': 2},
{'index': 3},
),
'dict': {},
}
# Test base functionality
self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str',
msg='allow tuple path')
self.assertEqual(traverse_obj(_TEST_DATA, ['str']), 'str',
msg='allow list path')
self.assertEqual(traverse_obj(_TEST_DATA, (value for value in ("str",))), 'str',
msg='allow iterable path')
self.assertEqual(traverse_obj(_TEST_DATA, 'str'), 'str',
msg='single items should be treated as a path')
self.assertEqual(traverse_obj(_TEST_DATA, None), _TEST_DATA)
self.assertEqual(traverse_obj(_TEST_DATA, 100), 100)
self.assertEqual(traverse_obj(_TEST_DATA, 1.2), 1.2)
# Test Ellipsis behavior
self.assertCountEqual(traverse_obj(_TEST_DATA, Ellipsis),
(item for item in _TEST_DATA.values() if item is not None),
msg='`...` should give all values except `None`')
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, Ellipsis)), _TEST_DATA['urls'][0].values(),
msg='`...` selection for dicts should select all values')
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'url')),
['https://www.example.com/0', 'https://www.example.com/1'],
msg='nested `...` queries should work')
self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), range(4),
msg='`...` query result should be flattened')
# Test function as key
self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
[_TEST_DATA['urls']],
msg='function as query key should perform a filter based on (key, value)')
self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], compat_str)), {'str'},
msg='exceptions in the query function should be caught')
# Test alternative paths
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
msg='multiple `paths` should be treated as alternative paths')
self.assertEqual(traverse_obj(_TEST_DATA, 'str', 100), 'str',
msg='alternatives should exit early')
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'fail'), None,
msg='alternatives should return `default` if exhausted')
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, 'fail'), 100), 100,
msg='alternatives should track their own branching return')
self.assertEqual(traverse_obj(_TEST_DATA, ('dict', Ellipsis), ('data', Ellipsis)), list(_TEST_DATA['data']),
msg='alternatives on empty objects should search further')
# Test branch and path nesting
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')), ['https://www.example.com/0'],
msg='tuple as key should be treated as branches')
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', [3, 0], 'url')), ['https://www.example.com/0'],
msg='list as key should be treated as branches')
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ((1, 'fail'), (0, 'url')))), ['https://www.example.com/0'],
msg='double nesting in path should be treated as paths')
self.assertEqual(traverse_obj(['0', [1, 2]], [(0, 1), 0]), [1],
msg='do not fail early on branching')
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', ((1, ('fail', 'url')), (0, 'url')))),
['https://www.example.com/0', 'https://www.example.com/1'],
msg='triple nesting in path should be treated as branches')
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ('fail', (Ellipsis, 'url')))),
['https://www.example.com/0', 'https://www.example.com/1'],
msg='ellipsis as branch path start gets flattened')
# Test dictionary as key
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}), {0: 100, 1: 1.2},
msg='dict key should result in a dict with the same keys')
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', 0, 'url')}),
{0: 'https://www.example.com/0'},
msg='dict key should allow paths')
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', (3, 0), 'url')}),
{0: ['https://www.example.com/0']},
msg='tuple in dict path should be treated as branches')
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, 'fail'), (0, 'url')))}),
{0: ['https://www.example.com/0']},
msg='double nesting in dict path should be treated as paths')
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}),
{0: ['https://www.example.com/1', 'https://www.example.com/0']},
msg='triple nesting in dict path should be treated as branches')
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {},
msg='remove `None` values when dict key')
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
msg='do not remove `None` values if `default`')
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {0: {}},
msg='do not remove empty values when dict key')
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: {}},
msg='do not remove empty values when dict key and a default')
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', Ellipsis)}), {0: []},
msg='if branch in dict key not successful, return `[]`')
# Testing default parameter behavior
_DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail'), None,
msg='default value should be `None`')
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', 'fail', default=Ellipsis), Ellipsis,
msg='chained fails should result in default')
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', 'int'), 0,
msg='should not short cirquit on `None`')
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', default=1), 1,
msg='invalid dict key should result in `default`')
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', default=1), 1,
msg='`None` is a deliberate sentinel and should become `default`')
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', 10)), None,
msg='`IndexError` should result in `default`')
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=1), 1,
msg='if branched but not successful return `default` if defined, not `[]`')
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=None), None,
msg='if branched but not successful return `default` even if `default` is `None`')
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail')), [],
msg='if branched but not successful return `[]`, not `default`')
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', Ellipsis)), [],
msg='if branched but object is empty return `[]`, not `default`')
# Testing expected_type behavior
_EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=compat_str), 'str',
msg='accept matching `expected_type` type')
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int), None,
msg='reject non matching `expected_type` type')
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: compat_str(x)), '0',
msg='transform type using type function')
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str',
expected_type=lambda _: 1 / 0), None,
msg='wrap expected_type function in try_call')
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, Ellipsis, expected_type=compat_str), ['str'],
msg='eliminate items that expected_type fails on')
# Test get_all behavior
_GET_ALL_DATA = {'key': [0, 1, 2]}
self.assertEqual(traverse_obj(_GET_ALL_DATA, ('key', Ellipsis), get_all=False), 0,
msg='if not `get_all`, return only first matching value')
self.assertEqual(traverse_obj(_GET_ALL_DATA, Ellipsis, get_all=False), [0, 1, 2],
msg='do not overflatten if not `get_all`')
# Test casesense behavior
_CASESENSE_DATA = {
'KeY': 'value0',
0: {
'KeY': 'value1',
0: {'KeY': 'value2'},
},
# FULLWIDTH LATIN CAPITAL LETTER K
'\uff2bey': 'value3',
}
self.assertEqual(traverse_obj(_CASESENSE_DATA, 'key'), None,
msg='dict keys should be case sensitive unless `casesense`')
self.assertEqual(traverse_obj(_CASESENSE_DATA, 'keY',
casesense=False), 'value0',
msg='allow non matching key case if `casesense`')
self.assertEqual(traverse_obj(_CASESENSE_DATA, '\uff4bey', # FULLWIDTH LATIN SMALL LETTER K
casesense=False), 'value3',
msg='allow non matching Unicode key case if `casesense`')
self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ('keY',)),
casesense=False), ['value1'],
msg='allow non matching key case in branch if `casesense`')
self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ((0, 'keY'),)),
casesense=False), ['value2'],
msg='allow non matching key case in branch path if `casesense`')
# Test traverse_string behavior
_TRAVERSE_STRING_DATA = {'str': 'str', 1.2: 1.2}
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0)), None,
msg='do not traverse into string if not `traverse_string`')
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0),
_traverse_string=True), 's',
msg='traverse into string if `traverse_string`')
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, (1.2, 1),
_traverse_string=True), '.',
msg='traverse into converted data if `traverse_string`')
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', Ellipsis),
_traverse_string=True), list('str'),
msg='`...` branching into string should result in list')
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
_traverse_string=True), ['s', 'r'],
msg='branching into string should result in list')
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda _, x: x),
_traverse_string=True), list('str'),
msg='function branching into string should result in list')
# Test is_user_input behavior
_IS_USER_INPUT_DATA = {'range8': list(range(8))}
self.assertEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3'),
_is_user_input=True), 3,
msg='allow for string indexing if `is_user_input`')
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3:'),
_is_user_input=True), tuple(range(8))[3:],
msg='allow for string slice if `is_user_input`')
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':4:2'),
_is_user_input=True), tuple(range(8))[:4:2],
msg='allow step in string slice if `is_user_input`')
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':'),
_is_user_input=True), range(8),
msg='`:` should be treated as `...` if `is_user_input`')
with self.assertRaises(TypeError, msg='too many params should result in error'):
traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':::'), _is_user_input=True)
# Test re.Match as input obj
mobj = re.match(r'^0(12)(?P<group>3)(4)?$', '0123')
self.assertEqual(traverse_obj(mobj, Ellipsis), [x for x in mobj.groups() if x is not None],
msg='`...` on a `re.Match` should give its `groups()`')
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 2)), ['0123', '3'],
msg='function on a `re.Match` should give groupno, value starting at 0')
self.assertEqual(traverse_obj(mobj, 'group'), '3',
msg='str key on a `re.Match` should give group with that name')
self.assertEqual(traverse_obj(mobj, 2), '3',
msg='int key on a `re.Match` should give group with that name')
self.assertEqual(traverse_obj(mobj, 'gRoUp', casesense=False), '3',
msg='str key on a `re.Match` should respect casesense')
self.assertEqual(traverse_obj(mobj, 'fail'), None,
msg='failing str key on a `re.Match` should return `default`')
self.assertEqual(traverse_obj(mobj, 'gRoUpS', casesense=False), None,
msg='failing str key on a `re.Match` should return `default`')
self.assertEqual(traverse_obj(mobj, 8), None,
msg='failing int key on a `re.Match` should return `default`')
def test_get_first(self):
self.assertEqual(get_first([{'a': None}, {'a': 'spam'}], 'a'), 'spam')
def test_join_nonempty(self):
self.assertEqual(join_nonempty('a', 'b'), 'a-b')
self.assertEqual(join_nonempty(
'a', 'b', 'c', 'd',
from_dict={'a': 'c', 'c': [], 'b': 'd', 'd': None}), 'c-d')
if __name__ == '__main__':
unittest.main()

View File

@ -12,10 +12,11 @@ import io
import re
import string
from youtube_dl.compat import compat_str, compat_urlretrieve
from test.helper import FakeYDL
from youtube_dl.extractor import YoutubeIE
from youtube_dl.jsinterp import JSInterpreter
from youtube_dl.compat import compat_str, compat_urlretrieve
_SIG_TESTS = [
(
@ -90,12 +91,61 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/e06dea74/player_ias.vflset/en_US/base.js',
'AiuodmaDDYw8d3y4bf', 'ankd8eza2T6Qmw',
),
(
'https://www.youtube.com/s/player/5dd88d1d/player-plasma-ias-phone-en_US.vflset/base.js',
'kSxKFLeqzv_ZyHSAt', 'n8gS8oRlHOxPFA',
),
(
'https://www.youtube.com/s/player/324f67b9/player_ias.vflset/en_US/base.js',
'xdftNy7dh9QGnhW', '22qLGxrmX8F1rA',
),
(
'https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js',
'TDCstCG66tEAO5pR9o', 'dbxNtZ14c-yWyw',
),
(
'https://www.youtube.com/s/player/c81bbb4a/player_ias.vflset/en_US/base.js',
'gre3EcLurNY2vqp94', 'Z9DfGxWP115WTg',
),
(
'https://www.youtube.com/s/player/1f7d5369/player_ias.vflset/en_US/base.js',
'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw',
),
(
'https://www.youtube.com/s/player/009f1d77/player_ias.vflset/en_US/base.js',
'5dwFHw8aFWQUQtffRq', 'audescmLUzI3jw',
),
(
'https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/en_US/base.js',
'5EHDMgYLV6HPGk_Mu-kk', 'n9lUJLHbxUI0GQ',
),
(
'https://www.youtube.com/s/player/c2199353/player_ias.vflset/en_US/base.js',
'5EHDMgYLV6HPGk_Mu-kk', 'AD5rgS85EkrE7',
),
(
'https://www.youtube.com/s/player/113ca41c/player_ias.vflset/en_US/base.js',
'cgYl-tlYkhjT7A', 'hI7BBr2zUgcmMg',
),
(
'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js',
'-Txvy6bT5R6LqgnQNx', 'dcklJCnRUHbgSg',
),
(
'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ',
),
(
'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
),
]
class TestPlayerInfo(unittest.TestCase):
def test_youtube_extract_player_info(self):
PLAYER_URLS = (
('https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js', '4c3f79c5'),
('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'),
('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/fr_FR/base.js', '64dddad9'),
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-en_US.vflset/base.js', '64dddad9'),

View File

@ -73,6 +73,7 @@ from .utils import (
PostProcessingError,
preferredencoding,
prepend_extension,
process_communicate_or_kill,
register_socks_protocols,
render_table,
replace_extension,
@ -720,7 +721,7 @@ class YoutubeDL(object):
filename = encodeFilename(filename, True).decode(preferredencoding())
return sanitize_path(filename)
except ValueError as err:
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
self.report_error('Error in output template: ' + error_to_compat_str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
return None
def _match_entry(self, info_dict, incomplete):
@ -1569,9 +1570,6 @@ class YoutubeDL(object):
else:
formats = info_dict['formats']
if not formats:
raise ExtractorError('No video formats found!')
def is_wellformed(f):
url = f.get('url')
if not url:
@ -1584,7 +1582,10 @@ class YoutubeDL(object):
return True
# Filter out malformed formats for better extraction robustness
formats = list(filter(is_wellformed, formats))
formats = list(filter(is_wellformed, formats or []))
if not formats:
raise ExtractorError('No video formats found!')
formats_dict = {}
@ -1778,10 +1779,9 @@ class YoutubeDL(object):
assert info_dict.get('_type', 'video') == 'video'
max_downloads = self.params.get('max_downloads')
if max_downloads is not None:
if self._num_downloads >= int(max_downloads):
raise MaxDownloadsReached()
max_downloads = int_or_none(self.params.get('max_downloads')) or float('inf')
if self._num_downloads >= max_downloads:
raise MaxDownloadsReached()
# TODO: backward compatibility, to be removed
info_dict['fulltitle'] = info_dict['title']
@ -2058,9 +2058,12 @@ class YoutubeDL(object):
try:
self.post_process(filename, info_dict)
except (PostProcessingError) as err:
self.report_error('postprocessing: %s' % str(err))
self.report_error('postprocessing: %s' % error_to_compat_str(err))
return
self.record_download_archive(info_dict)
# avoid possible nugatory search for further items (PR #26638)
if self._num_downloads >= max_downloads:
raise MaxDownloadsReached()
def download(self, url_list):
"""Download a given list of URLs."""
@ -2323,7 +2326,7 @@ class YoutubeDL(object):
['git', 'rev-parse', '--short', 'HEAD'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
cwd=os.path.dirname(os.path.abspath(__file__)))
out, err = sp.communicate()
out, err = process_communicate_or_kill(sp)
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
self._write_string('[debug] Git HEAD: ' + out + '\n')

View File

@ -8,6 +8,18 @@ from .utils import bytes_to_intlist, intlist_to_bytes
BLOCK_SIZE_BYTES = 16
def pkcs7_padding(data):
"""
PKCS#7 padding
@param {int[]} data cleartext
@returns {int[]} padding data
"""
remaining_length = BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES
return data + [remaining_length] * remaining_length
def aes_ctr_decrypt(data, key, counter):
"""
Decrypt with aes in counter mode
@ -76,8 +88,7 @@ def aes_cbc_encrypt(data, key, iv):
previous_cipher_block = iv
for i in range(block_count):
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
remaining_length = BLOCK_SIZE_BYTES - len(block)
block += [remaining_length] * remaining_length
block = pkcs7_padding(block)
mixed_block = xor(block, previous_cipher_block)
encrypted_block = aes_encrypt(mixed_block, expanded_key)
@ -88,6 +99,28 @@ def aes_cbc_encrypt(data, key, iv):
return encrypted_data
def aes_ecb_encrypt(data, key):
"""
Encrypt with aes in ECB mode. Using PKCS#7 padding
@param {int[]} data cleartext
@param {int[]} key 16/24/32-Byte cipher key
@returns {int[]} encrypted data
"""
expanded_key = key_expansion(key)
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
encrypted_data = []
for i in range(block_count):
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
block = pkcs7_padding(block)
encrypted_block = aes_encrypt(block, expanded_key)
encrypted_data += encrypted_block
return encrypted_data
def key_expansion(data):
"""
Generate key schedule
@ -303,7 +336,7 @@ def xor(data1, data2):
def rijndael_mul(a, b):
if(a == 0 or b == 0):
if (a == 0 or b == 0):
return 0
return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF]

View File

@ -10,12 +10,21 @@ import traceback
from .compat import compat_getenv
from .utils import (
error_to_compat_str,
expand_path,
is_outdated_version,
try_get,
write_json_file,
)
from .version import __version__
class Cache(object):
_YTDL_DIR = 'youtube-dl'
_VERSION_KEY = _YTDL_DIR + '_version'
_DEFAULT_VERSION = '2021.12.17'
def __init__(self, ydl):
self._ydl = ydl
@ -23,7 +32,7 @@ class Cache(object):
res = self._ydl.params.get('cachedir')
if res is None:
cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
res = os.path.join(cache_root, 'youtube-dl')
res = os.path.join(cache_root, self._YTDL_DIR)
return expand_path(res)
def _get_cache_fn(self, section, key, dtype):
@ -50,13 +59,22 @@ class Cache(object):
except OSError as ose:
if ose.errno != errno.EEXIST:
raise
write_json_file(data, fn)
write_json_file({self._VERSION_KEY: __version__, 'data': data}, fn)
except Exception:
tb = traceback.format_exc()
self._ydl.report_warning(
'Writing cache to %r failed: %s' % (fn, tb))
def load(self, section, key, dtype='json', default=None):
def _validate(self, data, min_ver):
version = try_get(data, lambda x: x[self._VERSION_KEY])
if not version: # Backward compatibility
data, version = {'data': data}, self._DEFAULT_VERSION
if not is_outdated_version(version, min_ver or '0', assume_new=False):
return data['data']
self._ydl.to_screen(
'Discarding old cache from version {version} (needs {min_ver})'.format(**locals()))
def load(self, section, key, dtype='json', default=None, min_ver=None):
assert dtype in ('json',)
if not self.enabled:
@ -66,12 +84,12 @@ class Cache(object):
try:
try:
with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
return json.load(cachef)
return self._validate(json.load(cachef), min_ver)
except ValueError:
try:
file_size = os.path.getsize(cache_fn)
except (OSError, IOError) as oe:
file_size = str(oe)
file_size = error_to_compat_str(oe)
self._ydl.report_warning(
'Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
except IOError:

1667
youtube_dl/casefold.py Normal file

File diff suppressed because it is too large Load Diff

View File

@ -21,6 +21,19 @@ import subprocess
import sys
import xml.etree.ElementTree
# deal with critical unicode/str things first
try:
# Python 2
compat_str, compat_basestring, compat_chr = (
unicode, basestring, unichr
)
from .casefold import casefold as compat_casefold
except NameError:
compat_str, compat_basestring, compat_chr = (
str, str, chr
)
compat_casefold = lambda s: s.casefold()
try:
import collections.abc as compat_collections_abc
except ImportError:
@ -2373,11 +2386,6 @@ try:
except ImportError:
import BaseHTTPServer as compat_http_server
try:
compat_str = unicode # Python 2
except NameError:
compat_str = str
try:
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
from urllib.parse import unquote as compat_urllib_parse_unquote
@ -2508,22 +2516,11 @@ except ImportError: # Python < 3.4
return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
try:
compat_basestring = basestring # Python 2
except NameError:
compat_basestring = str
try:
compat_chr = unichr # Python 2
except NameError:
compat_chr = chr
try:
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
except ImportError: # Python 2.6
from xml.parsers.expat import ExpatError as compat_xml_parse_error
etree = xml.etree.ElementTree
@ -2890,6 +2887,7 @@ else:
_terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
def compat_get_terminal_size(fallback=(80, 24)):
from .utils import process_communicate_or_kill
columns = compat_getenv('COLUMNS')
if columns:
columns = int(columns)
@ -2906,7 +2904,7 @@ else:
sp = subprocess.Popen(
['stty', 'size'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = sp.communicate()
out, err = process_communicate_or_kill(sp)
_lines, _columns = map(int, out.split())
except Exception:
_columns, _lines = _terminal_size(*fallback)
@ -2984,7 +2982,6 @@ except ImportError:
except ImportError:
compat_filter = filter
try:
from future_builtins import zip as compat_zip
except ImportError: # not 2.6+ or is 3.x
@ -2994,6 +2991,82 @@ except ImportError: # not 2.6+ or is 3.x
compat_zip = zip
# method renamed between Py2/3
try:
from itertools import zip_longest as compat_itertools_zip_longest
except ImportError:
from itertools import izip_longest as compat_itertools_zip_longest
# new class in collections
try:
from collections import ChainMap as compat_collections_chain_map
# Py3.3's ChainMap is deficient
if sys.version_info < (3, 4):
raise ImportError
except ImportError:
# Py <= 3.3
class compat_collections_chain_map(compat_collections_abc.MutableMapping):
maps = [{}]
def __init__(self, *maps):
self.maps = list(maps) or [{}]
def __getitem__(self, k):
for m in self.maps:
if k in m:
return m[k]
raise KeyError(k)
def __setitem__(self, k, v):
self.maps[0].__setitem__(k, v)
return
def __contains__(self, k):
return any((k in m) for m in self.maps)
def __delitem(self, k):
if k in self.maps[0]:
del self.maps[0][k]
return
raise KeyError(k)
def __delitem__(self, k):
self.__delitem(k)
def __iter__(self):
return itertools.chain(*reversed(self.maps))
def __len__(self):
return len(iter(self))
# to match Py3, don't del directly
def pop(self, k, *args):
if self.__contains__(k):
off = self.__getitem__(k)
self.__delitem(k)
return off
elif len(args) > 0:
return args[0]
raise KeyError(k)
def new_child(self, m=None, **kwargs):
m = m or {}
m.update(kwargs)
return compat_collections_chain_map(m, *self.maps)
@property
def parents(self):
return compat_collections_chain_map(*(self.maps[1:]))
# Pythons disagree on the type of a pattern (RegexObject, _sre.SRE_Pattern, Pattern, ...?)
compat_re_Pattern = type(re.compile(''))
# and on the type of a match
compat_re_Match = type(re.match('a', 'a'))
if sys.version_info < (3, 3):
def compat_b64decode(s, *args, **kwargs):
if isinstance(s, compat_str):
@ -3028,8 +3101,10 @@ __all__ = [
'compat_Struct',
'compat_b64decode',
'compat_basestring',
'compat_casefold',
'compat_chr',
'compat_collections_abc',
'compat_collections_chain_map',
'compat_cookiejar',
'compat_cookiejar_Cookie',
'compat_cookies',
@ -3050,6 +3125,7 @@ __all__ = [
'compat_input',
'compat_integer_types',
'compat_itertools_count',
'compat_itertools_zip_longest',
'compat_kwargs',
'compat_map',
'compat_numeric_types',
@ -3057,6 +3133,8 @@ __all__ = [
'compat_os_name',
'compat_parse_qs',
'compat_print',
'compat_re_Match',
'compat_re_Pattern',
'compat_realpath',
'compat_setenv',
'compat_shlex_quote',

View File

@ -22,6 +22,7 @@ from ..utils import (
handle_youtubedl_headers,
check_executable,
is_outdated_version,
process_communicate_or_kill,
)
@ -104,7 +105,7 @@ class ExternalFD(FileDownloader):
p = subprocess.Popen(
cmd, stderr=subprocess.PIPE)
_, stderr = p.communicate()
_, stderr = process_communicate_or_kill(p)
if p.returncode != 0:
self.to_stderr(stderr.decode('utf-8', 'replace'))
return p.returncode
@ -141,7 +142,7 @@ class CurlFD(ExternalFD):
# curl writes the progress to stderr so don't capture it.
p = subprocess.Popen(cmd)
p.communicate()
process_communicate_or_kill(p)
return p.returncode
@ -336,14 +337,17 @@ class FFmpegFD(ExternalFD):
proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)
try:
retval = proc.wait()
except KeyboardInterrupt:
# subprocces.run would send the SIGKILL signal to ffmpeg and the
except BaseException as e:
# subprocess.run would send the SIGKILL signal to ffmpeg and the
# mp4 file couldn't be played, but if we ask ffmpeg to quit it
# produces a file that is playable (this is mostly useful for live
# streams). Note that Windows is not affected and produces playable
# files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
if sys.platform != 'win32':
proc.communicate(b'q')
if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32':
process_communicate_or_kill(proc, b'q')
else:
proc.kill()
proc.wait()
raise
return retval

View File

@ -89,11 +89,13 @@ class RtmpFD(FileDownloader):
self.to_screen('')
cursor_in_new_line = True
self.to_screen('[rtmpdump] ' + line)
finally:
if not cursor_in_new_line:
self.to_screen('')
return proc.wait()
except BaseException: # Including KeyboardInterrupt
proc.kill()
proc.wait()
if not cursor_in_new_line:
self.to_screen('')
return proc.returncode
raise
url = info_dict['url']
player_url = info_dict.get('player_url')

View File

@ -31,30 +31,34 @@ from ..utils import (
class ADNIE(InfoExtractor):
IE_DESC = 'Anime Digital Network'
_VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
_TEST = {
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
'md5': '0319c99885ff5547565cacb4f3f9348d',
IE_DESC = 'Animation Digital Network'
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
_TESTS = [{
'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
'md5': '1c9ef066ceb302c86f80c2b371615261',
'info_dict': {
'id': '7778',
'id': '9841',
'ext': 'mp4',
'title': 'Blue Exorcist - Kyôto Saga - Episode 1',
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
'series': 'Blue Exorcist - Kyôto Saga',
'duration': 1467,
'release_date': '20170106',
'title': 'Fruits Basket - Episode 1',
'description': 'md5:14be2f72c3c96809b0ca424b0097d336',
'series': 'Fruits Basket',
'duration': 1437,
'release_date': '20190405',
'comment_count': int,
'average_rating': float,
'season_number': 2,
'episode': 'Début des hostilités',
'season_number': 1,
'episode': 'À ce soir !',
'episode_number': 1,
}
}
},
'skip': 'Only available in region (FR, ...)',
}, {
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
'only_matching': True,
}]
_NETRC_MACHINE = 'animedigitalnetwork'
_BASE_URL = 'http://animedigitalnetwork.fr'
_API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
_NETRC_MACHINE = 'animationdigitalnetwork'
_BASE = 'animationdigitalnetwork.fr'
_API_BASE_URL = 'https://gw.api.' + _BASE + '/'
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
_HEADERS = {}
_LOGIN_ERR_MESSAGE = 'Unable to log in'
@ -82,14 +86,14 @@ class ADNIE(InfoExtractor):
if subtitle_location:
enc_subtitles = self._download_webpage(
subtitle_location, video_id, 'Downloading subtitles data',
fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'})
fatal=False, headers={'Origin': 'https://' + self._BASE})
if not enc_subtitles:
return None
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
# http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
bytes_to_intlist(binascii.unhexlify(self._K + 'ab9f52f5baae7c72')),
bytes_to_intlist(binascii.unhexlify(self._K + '7fac1178830cfe0c')),
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
))
subtitles_json = self._parse_json(
@ -138,9 +142,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
if not username:
return
try:
url = self._API_BASE_URL + 'authentication/login'
access_token = (self._download_json(
self._API_BASE_URL + 'authentication/login', None,
'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
url, None, 'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
data=urlencode_postdata({
'password': password,
'rememberMe': False,
@ -153,7 +157,8 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
message = None
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
resp = self._parse_json(
e.cause.read().decode(), None, fatal=False) or {}
self._webpage_read_content(e.cause, url, username),
username, fatal=False) or {}
message = resp.get('message') or resp.get('code')
self.report_warning(message or self._LOGIN_ERR_MESSAGE)
@ -211,7 +216,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
# This usually goes away with a different random pkcs1pad, so retry
continue
error = self._parse_json(e.cause.read(), video_id)
error = self._parse_json(
self._webpage_read_content(e.cause, links_url, video_id),
video_id, fatal=False) or {}
message = error.get('message')
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
self.raise_geo_restricted(msg=message)

View File

@ -20,8 +20,8 @@ class AENetworksBaseIE(ThePlatformIE):
(?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
fyi\.tv
)/'''
_THEPLATFORM_KEY = 'crazyjava'
_THEPLATFORM_SECRET = 's3cr3t'
_THEPLATFORM_KEY = '43jXaGRQud'
_THEPLATFORM_SECRET = 'S10BPXHMlb'
_DOMAIN_MAP = {
'history.com': ('HISTORY', 'history'),
'aetv.com': ('AETV', 'aetv'),

View File

@ -15,7 +15,7 @@ from ..utils import (
class AmericasTestKitchenIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?P<resource_type>episode|videos)/(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?:cooks(?:country|illustrated)/)?(?P<resource_type>episode|videos)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
'md5': 'b861c3e365ac38ad319cfd509c30577f',
@ -23,15 +23,20 @@ class AmericasTestKitchenIE(InfoExtractor):
'id': '5b400b9ee338f922cb06450c',
'title': 'Japanese Suppers',
'ext': 'mp4',
'display_id': 'weeknight-japanese-suppers',
'description': 'md5:64e606bfee910627efc4b5f050de92b3',
'thumbnail': r're:^https?://',
'timestamp': 1523318400,
'upload_date': '20180410',
'release_date': '20180410',
'timestamp': 1523304000,
'upload_date': '20180409',
'release_date': '20180409',
'series': "America's Test Kitchen",
'season': 'Season 18',
'season_number': 18,
'episode': 'Japanese Suppers',
'episode_number': 15,
'duration': 1376,
'thumbnail': r're:^https?://',
'average_rating': 0,
'view_count': int,
},
'params': {
'skip_download': True,
@ -44,15 +49,20 @@ class AmericasTestKitchenIE(InfoExtractor):
'id': '5fbe8c61bda2010001c6763b',
'title': 'Simple Chicken Dinner',
'ext': 'mp4',
'display_id': 'atktv_2103_simple-chicken-dinner_full-episode_web-mp4',
'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
'thumbnail': r're:^https?://',
'timestamp': 1610755200,
'upload_date': '20210116',
'release_date': '20210116',
'timestamp': 1610737200,
'upload_date': '20210115',
'release_date': '20210115',
'series': "America's Test Kitchen",
'season': 'Season 21',
'season_number': 21,
'episode': 'Simple Chicken Dinner',
'episode_number': 3,
'duration': 1397,
'thumbnail': r're:^https?://',
'view_count': int,
'average_rating': 0,
},
'params': {
'skip_download': True,
@ -60,6 +70,12 @@ class AmericasTestKitchenIE(InfoExtractor):
}, {
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
'only_matching': True,
}, {
'url': 'https://www.americastestkitchen.com/cookscountry/episode/564-when-only-chocolate-will-do',
'only_matching': True,
}, {
'url': 'https://www.americastestkitchen.com/cooksillustrated/videos/4478-beef-wellington',
'only_matching': True,
}, {
'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
'only_matching': True,
@ -94,7 +110,7 @@ class AmericasTestKitchenIE(InfoExtractor):
class AmericasTestKitchenSeasonIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|(?P<cooks>cooks(?:country|illustrated)))\.com(?:(?:/(?P<show2>cooks(?:country|illustrated)))?(?:/?$|(?<!ated)(?<!ated\.com)/episodes/browse/season_(?P<season>\d+)))'
_TESTS = [{
# ATK Season
'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
@ -105,48 +121,93 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
'playlist_count': 13,
}, {
# Cooks Country Season
'url': 'https://www.cookscountry.com/episodes/browse/season_12',
'url': 'https://www.americastestkitchen.com/cookscountry/episodes/browse/season_12',
'info_dict': {
'id': 'season_12',
'title': 'Season 12',
},
'playlist_count': 13,
}, {
# America's Test Kitchen Series
'url': 'https://www.americastestkitchen.com/',
'info_dict': {
'id': 'americastestkitchen',
'title': 'America\'s Test Kitchen',
},
'playlist_count': 558,
}, {
# Cooks Country Series
'url': 'https://www.americastestkitchen.com/cookscountry',
'info_dict': {
'id': 'cookscountry',
'title': 'Cook\'s Country',
},
'playlist_count': 199,
}, {
'url': 'https://www.americastestkitchen.com/cookscountry/',
'only_matching': True,
}, {
'url': 'https://www.cookscountry.com/episodes/browse/season_12',
'only_matching': True,
}, {
'url': 'https://www.cookscountry.com',
'only_matching': True,
}, {
'url': 'https://www.americastestkitchen.com/cooksillustrated/',
'only_matching': True,
}, {
'url': 'https://www.cooksillustrated.com',
'only_matching': True,
}]
def _real_extract(self, url):
show_name, season_number = re.match(self._VALID_URL, url).groups()
season_number = int(season_number)
match = re.match(self._VALID_URL, url).groupdict()
show = match.get('show2')
show_path = ('/' + show) if show else ''
show = show or match['show']
season_number = int_or_none(match.get('season'))
slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
slug, title = {
'americastestkitchen': ('atk', 'America\'s Test Kitchen'),
'cookscountry': ('cco', 'Cook\'s Country'),
'cooksillustrated': ('cio', 'Cook\'s Illustrated'),
}[show]
season = 'Season %d' % season_number
facet_filters = [
'search_document_klass:episode',
'search_show_slug:' + slug,
]
if season_number:
playlist_id = 'season_%d' % season_number
playlist_title = 'Season %d' % season_number
facet_filters.append('search_season_list:' + playlist_title)
else:
playlist_id = show
playlist_title = title
season_search = self._download_json(
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
season, headers={
'Origin': 'https://www.%s.com' % show_name,
playlist_id, headers={
'Origin': 'https://www.americastestkitchen.com',
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
'X-Algolia-Application-Id': 'Y1FNZXUI30',
}, query={
'facetFilters': json.dumps([
'search_season_list:' + season,
'search_document_klass:episode',
'search_show_slug:' + slug,
]),
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
'facetFilters': json.dumps(facet_filters),
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug,
'attributesToHighlight': '',
'hitsPerPage': 1000,
})
def entries():
for episode in (season_search.get('hits') or []):
search_url = episode.get('search_url')
search_url = episode.get('search_url') # always formatted like '/episode/123-title-of-episode'
if not search_url:
continue
yield {
'_type': 'url',
'url': 'https://www.%s.com%s' % (show_name, search_url),
'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
'url': 'https://www.americastestkitchen.com%s%s' % (show_path, search_url),
'id': try_get(episode, lambda e: e['objectID'].rsplit('_', 1)[-1]),
'title': episode.get('title'),
'description': episode.get('description'),
'timestamp': unified_timestamp(episode.get('search_document_date')),
@ -156,4 +217,4 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
}
return self.playlist_result(
entries(), 'season_%d' % season_number, season)
entries(), playlist_id, playlist_title)

View File

@ -0,0 +1,173 @@
# coding: utf-8
from __future__ import unicode_literals
import json
from ..utils import (
strip_or_none,
traverse_obj,
)
from .common import InfoExtractor
class BlerpIE(InfoExtractor):
IE_NAME = 'blerp'
_VALID_URL = r'https?://(?:www\.)?blerp\.com/soundbites/(?P<id>[0-9a-zA-Z]+)'
_TESTS = [{
'url': 'https://blerp.com/soundbites/6320fe8745636cb4dd677a5a',
'info_dict': {
'id': '6320fe8745636cb4dd677a5a',
'title': 'Samsung Galaxy S8 Over the Horizon Ringtone 2016',
'uploader': 'luminousaj',
'uploader_id': '5fb81e51aa66ae000c395478',
'ext': 'mp3',
'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'],
}
}, {
'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f',
'info_dict': {
'id': '5bc94ef4796001000498429f',
'title': 'Yee',
'uploader': '179617322678353920',
'uploader_id': '5ba99cf71386730004552c42',
'ext': 'mp3',
'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee']
}
}]
_GRAPHQL_OPERATIONNAME = "webBitePageGetBite"
_GRAPHQL_QUERY = (
'''query webBitePageGetBite($_id: MongoID!) {
web {
biteById(_id: $_id) {
...bitePageFrag
__typename
}
__typename
}
}
fragment bitePageFrag on Bite {
_id
title
userKeywords
keywords
color
visibility
isPremium
owned
price
extraReview
isAudioExists
image {
filename
original {
url
__typename
}
__typename
}
userReactions {
_id
reactions
createdAt
__typename
}
topReactions
totalSaveCount
saved
blerpLibraryType
license
licenseMetaData
playCount
totalShareCount
totalFavoriteCount
totalAddedToBoardCount
userCategory
userAudioQuality
audioCreationState
transcription
userTranscription
description
createdAt
updatedAt
author
listingType
ownerObject {
_id
username
profileImage {
filename
original {
url
__typename
}
__typename
}
__typename
}
transcription
favorited
visibility
isCurated
sourceUrl
audienceRating
strictAudienceRating
ownerId
reportObject {
reportedContentStatus
__typename
}
giphy {
mp4
gif
__typename
}
audio {
filename
original {
url
__typename
}
mp3 {
url
__typename
}
__typename
}
__typename
}
''')
def _real_extract(self, url):
audio_id = self._match_id(url)
data = {
'operationName': self._GRAPHQL_OPERATIONNAME,
'query': self._GRAPHQL_QUERY,
'variables': {
'_id': audio_id
}
}
headers = {
'Content-Type': 'application/json'
}
json_result = self._download_json('https://api.blerp.com/graphql',
audio_id, data=json.dumps(data).encode('utf-8'), headers=headers)
bite_json = json_result['data']['web']['biteById']
info_dict = {
'id': bite_json['_id'],
'url': bite_json['audio']['mp3']['url'],
'title': bite_json['title'],
'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none),
'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none),
'ext': 'mp3',
'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None)
}
return info_dict

View File

@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals
import re
@ -12,13 +13,28 @@ from ..utils import (
class BongaCamsIE(InfoExtractor):
_VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.com)/(?P<id>[^/?&#]+)'
_VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.(?:com|net))/(?P<id>[^/?&#]+)'
_TESTS = [{
'url': 'https://de.bongacams.com/azumi-8',
'only_matching': True,
}, {
'url': 'https://cn.bongacams.com/azumi-8',
'only_matching': True,
}, {
'url': 'https://de.bongacams.net/claireashton',
'info_dict': {
'id': 'claireashton',
'ext': 'mp4',
'title': r're:ClaireAshton \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
'age_limit': 18,
'uploader_id': 'ClaireAshton',
'uploader': 'ClaireAshton',
'like_count': int,
'is_live': True,
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url):

View File

@ -0,0 +1,74 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
traverse_obj,
try_get,
)
class CallinIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?:[^/#?-]+-)*(?P<id>[^/#?-]+)'
_TESTS = [{
'url': 'https://www.callin.com/episode/fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW',
'md5': '14ede27ee2c957b7e4db93140fc0745c',
'info_dict': {
'id': 'PrumRdSQJW',
'ext': 'mp4',
'title': 'FCC Commissioner Brendan Carr on Elons Starlink',
'description': 'Or, why the government doesnt like SpaceX',
'channel': 'The Pull Request',
'channel_url': 'https://callin.com/show/the-pull-request-ucnDJmEKAa',
}
}, {
'url': 'https://www.callin.com/episode/episode-81-elites-melt-down-over-student-debt-lzxMidUnjA',
'md5': '16f704ddbf82a27e3930533b12062f07',
'info_dict': {
'id': 'lzxMidUnjA',
'ext': 'mp4',
'title': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?',
'description': 'Lets talk todays episode about the primary election shake up in NYC and the elites melting down over student debt cancelation.',
'channel': 'The DEBRIEF With Briahna Joy Gray',
'channel_url': 'https://callin.com/show/the-debrief-with-briahna-joy-gray-siiFDzGegm',
}
}]
def _search_nextjs_data(self, webpage, video_id, transform_source=None, fatal=True, **kw):
return self._parse_json(
self._search_regex(
r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
webpage, 'next.js data', fatal=fatal, **kw),
video_id, transform_source=transform_source, fatal=fatal)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
next_data = self._search_nextjs_data(webpage, video_id)
episode = traverse_obj(next_data, ('props', 'pageProps', 'episode'), expected_type=dict)
if not episode:
raise ExtractorError('Failed to find episode data')
title = episode.get('title') or self._og_search_title(webpage)
description = episode.get('description') or self._og_search_description(webpage)
formats = []
formats.extend(self._extract_m3u8_formats(
episode.get('m3u8'), video_id, 'mp4',
entry_protocol='m3u8_native', fatal=False))
self._sort_formats(formats)
channel = try_get(episode, lambda x: x['show']['title'], compat_str)
channel_url = try_get(episode, lambda x: x['show']['linkObj']['resourceUrl'], compat_str)
return {
'id': video_id,
'title': title,
'description': description,
'formats': formats,
'channel': channel,
'channel_url': channel_url,
}

View File

@ -3,7 +3,6 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
url_or_none,
)
@ -20,32 +19,11 @@ class CamModelsIE(InfoExtractor):
def _real_extract(self, url):
user_id = self._match_id(url)
webpage = self._download_webpage(
url, user_id, headers=self.geo_verification_headers())
manifest_root = self._html_search_regex(
r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None)
if not manifest_root:
ERRORS = (
("I'm offline, but let's stay connected", 'This user is currently offline'),
('in a private show', 'This user is in a private show'),
('is currently performing LIVE', 'This model is currently performing live'),
)
for pattern, message in ERRORS:
if pattern in webpage:
error = message
expected = True
break
else:
error = 'Unable to find manifest URL root'
expected = False
raise ExtractorError(error, expected=expected)
manifest = self._download_json(
'%s%s.json' % (manifest_root, user_id), user_id)
'https://manifest-server.naiadsystems.com/live/s:%s.json' % user_id, user_id)
formats = []
thumbnails = []
for format_id, format_dict in manifest['formats'].items():
if not isinstance(format_dict, dict):
continue
@ -85,6 +63,13 @@ class CamModelsIE(InfoExtractor):
'preference': -1,
})
else:
if format_id == 'jpeg':
thumbnails.append({
'url': f['url'],
'width': f['width'],
'height': f['height'],
'format_id': f['format_id'],
})
continue
formats.append(f)
self._sort_formats(formats)
@ -92,6 +77,7 @@ class CamModelsIE(InfoExtractor):
return {
'id': user_id,
'title': self._live_title(user_id),
'thumbnails': thumbnails,
'is_live': True,
'formats': formats,
'age_limit': 18

View File

@ -12,35 +12,21 @@ from ..utils import (
ExtractorError,
float_or_none,
sanitized_Request,
unescapeHTML,
update_url_query,
str_or_none,
traverse_obj,
urlencode_postdata,
USER_AGENTS,
)
class CeskaTelevizeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
_TESTS = [{
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
'info_dict': {
'id': '61924494877246241',
'ext': 'mp4',
'title': 'Hyde Park Civilizace: Život v Grónsku',
'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 3350,
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en',
'info_dict': {
'id': '61924494877028507',
'ext': 'mp4',
'title': 'Hyde Park Civilizace: Bonus 01 - En',
'title': 'Bonus 01 - En - Hyde Park Civilizace',
'description': 'English Subtittles',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 81.3,
@ -51,31 +37,111 @@ class CeskaTelevizeIE(InfoExtractor):
},
}, {
# live stream
'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
'url': 'http://www.ceskatelevize.cz/zive/ct1/',
'info_dict': {
'id': 402,
'id': '102',
'ext': 'mp4',
'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'title': r'ČT1 - živé vysílání online',
'description': 'Sledujte živé vysílání kanálu ČT1 online. Vybírat si můžete i z dalších kanálů České televize na kterémkoli z vašich zařízení.',
'is_live': True,
},
'params': {
# m3u8 download
'skip_download': True,
},
'skip': 'Georestricted to Czech Republic',
}, {
# another
'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
'only_matching': True,
'info_dict': {
'id': 402,
'ext': 'mp4',
'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'is_live': True,
},
# 'skip': 'Georestricted to Czech Republic',
}, {
'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
'only_matching': True,
}, {
# video with 18+ caution trailer
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
'info_dict': {
'id': '215562210900007-bogotart',
'title': 'Bogotart - Queer',
'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti',
},
'playlist': [{
'info_dict': {
'id': '61924494877311053',
'ext': 'mp4',
'title': 'Bogotart - Queer (Varování 18+)',
'duration': 11.9,
},
}, {
'info_dict': {
'id': '61924494877068022',
'ext': 'mp4',
'title': 'Bogotart - Queer (Queer)',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 1558.3,
},
}],
'params': {
# m3u8 download
'skip_download': True,
},
}, {
# iframe embed
'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
'only_matching': True,
}]
def _search_nextjs_data(self, webpage, video_id, **kw):
return self._parse_json(
self._search_regex(
r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
webpage, 'next.js data', **kw),
video_id, **kw)
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage, urlh = self._download_webpage_handle(url, playlist_id)
parsed_url = compat_urllib_parse_urlparse(urlh.geturl())
site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
playlist_title = self._og_search_title(webpage, default=None)
if site_name and playlist_title:
playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0]
playlist_description = self._og_search_description(webpage, default=None)
if playlist_description:
playlist_description = playlist_description.replace('\xa0', ' ')
webpage = self._download_webpage(url, playlist_id)
type_ = 'IDEC'
if re.search(r'(^/porady|/zive)/', parsed_url.path):
next_data = self._search_nextjs_data(webpage, playlist_id)
if '/zive/' in parsed_url.path:
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'idec'), get_all=False)
else:
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False)
if not idec:
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'videobonusDetail', 'bonusId'), get_all=False)
if idec:
type_ = 'bonus'
if not idec:
raise ExtractorError('Failed to find IDEC id')
iframe_hash = self._download_webpage(
'https://www.ceskatelevize.cz/v-api/iframe-hash/',
playlist_id, note='Getting IFRAME hash')
query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec, }
webpage = self._download_webpage(
'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php',
playlist_id, note='Downloading player', query=query)
NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
self.raise_geo_restricted(NOT_AVAILABLE_STRING)
if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen', )):
raise ExtractorError('no video with IDEC available', video_id=idec, expected=True)
type_ = None
episode_id = None
@ -100,7 +166,7 @@ class CeskaTelevizeIE(InfoExtractor):
data = {
'playlist[0][type]': type_,
'playlist[0][id]': episode_id,
'requestUrl': compat_urllib_parse_urlparse(url).path,
'requestUrl': parsed_url.path,
'requestSource': 'iVysilani',
}
@ -108,7 +174,7 @@ class CeskaTelevizeIE(InfoExtractor):
for user_agent in (None, USER_AGENTS['Safari']):
req = sanitized_Request(
'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/',
data=urlencode_postdata(data))
req.add_header('Content-type', 'application/x-www-form-urlencoded')
@ -130,9 +196,6 @@ class CeskaTelevizeIE(InfoExtractor):
req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
req.add_header('Referer', url)
playlist_title = self._og_search_title(webpage, default=None)
playlist_description = self._og_search_description(webpage, default=None)
playlist = self._download_json(req, playlist_id, fatal=False)
if not playlist:
continue
@ -167,7 +230,7 @@ class CeskaTelevizeIE(InfoExtractor):
entries[num]['formats'].extend(formats)
continue
item_id = item.get('id') or item['assetId']
item_id = str_or_none(item.get('id') or item['assetId'])
title = item['title']
duration = float_or_none(item.get('duration'))
@ -181,8 +244,6 @@ class CeskaTelevizeIE(InfoExtractor):
if playlist_len == 1:
final_title = playlist_title or title
if is_live:
final_title = self._live_title(final_title)
else:
final_title = '%s (%s)' % (playlist_title, title)
@ -200,6 +261,8 @@ class CeskaTelevizeIE(InfoExtractor):
for e in entries:
self._sort_formats(e['formats'])
if len(entries) == 1:
return entries[0]
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
def _get_subtitles(self, episode_id, subs):
@ -236,54 +299,3 @@ class CeskaTelevizeIE(InfoExtractor):
yield line
return '\r\n'.join(_fix_subtitle(subtitles))
class CeskaTelevizePoradyIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
_TESTS = [{
# video with 18+ caution trailer
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
'info_dict': {
'id': '215562210900007-bogotart',
'title': 'Queer: Bogotart',
'description': 'Alternativní průvodce současným queer světem',
},
'playlist': [{
'info_dict': {
'id': '61924494876844842',
'ext': 'mp4',
'title': 'Queer: Bogotart (Varování 18+)',
'duration': 10.2,
},
}, {
'info_dict': {
'id': '61924494877068022',
'ext': 'mp4',
'title': 'Queer: Bogotart (Queer)',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 1558.3,
},
}],
'params': {
# m3u8 download
'skip_download': True,
},
}, {
# iframe embed
'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
data_url = update_url_query(unescapeHTML(self._search_regex(
(r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'),
webpage, 'iframe player url', group='url')), query={
'autoStart': 'true',
})
return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())

View File

@ -70,6 +70,7 @@ from ..utils import (
str_or_none,
str_to_int,
strip_or_none,
try_get,
unescapeHTML,
unified_strdate,
unified_timestamp,
@ -2713,7 +2714,7 @@ class InfoExtractor(object):
def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
mobj = re.search(
r'(?s)jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)(?!</script>).*?\.setup\s*\((?P<options>[^)]+)\)',
r'''(?s)jwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?!</script>).*?\.\s*setup\s*\(\s*(?P<options>(?:\([^)]*\)|[^)])+)\s*\)''',
webpage)
if mobj:
try:
@ -2734,9 +2735,14 @@ class InfoExtractor(object):
def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
flat_pl = try_get(jwplayer_data, lambda x: x.get('playlist') or True)
if flat_pl is None:
# not even a dict
return []
# JWPlayer backward compatibility: flattened playlists
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
if 'playlist' not in jwplayer_data:
if flat_pl is True:
jwplayer_data = {'playlist': [jwplayer_data]}
entries = []
@ -2784,6 +2790,13 @@ class InfoExtractor(object):
'timestamp': int_or_none(video_data.get('pubdate')),
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
'subtitles': subtitles,
'alt_title': clean_html(video_data.get('subtitle')), # attributes used e.g. by Tele5 ...
'genre': clean_html(video_data.get('genre')),
'channel': clean_html(dict_get(video_data, ('category', 'channel'))),
'season_number': int_or_none(video_data.get('season')),
'episode_number': int_or_none(video_data.get('episode')),
'release_year': int_or_none(video_data.get('releasedate')),
'age_limit': int_or_none(video_data.get('age_restriction')),
}
# https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32
if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']):
@ -2792,7 +2805,9 @@ class InfoExtractor(object):
'url': formats[0]['url'],
})
else:
self._sort_formats(formats)
# avoid exception in case of only sttls
if formats:
self._sort_formats(formats)
entry['formats'] = formats
entries.append(entry)
if len(entries) == 1:
@ -2802,7 +2817,7 @@ class InfoExtractor(object):
def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
urls = []
urls = set()
formats = []
for source in jwplayer_sources_data:
if not isinstance(source, dict):
@ -2811,14 +2826,14 @@ class InfoExtractor(object):
base_url, self._proto_relative_url(source.get('file')))
if not source_url or source_url in urls:
continue
urls.append(source_url)
urls.add(source_url)
source_type = source.get('type') or ''
ext = mimetype2ext(source_type) or determine_ext(source_url)
if source_type == 'hls' or ext == 'm3u8':
if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url:
formats.extend(self._extract_m3u8_formats(
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id=m3u8_id, fatal=False))
elif source_type == 'dash' or ext == 'mpd':
elif source_type == 'dash' or ext == 'mpd' or 'format=mpd-time-csf' in source_url:
formats.extend(self._extract_mpd_formats(
source_url, video_id, mpd_id=mpd_id, fatal=False))
elif ext == 'smil':
@ -2833,20 +2848,23 @@ class InfoExtractor(object):
'ext': ext,
})
else:
format_id = str_or_none(source.get('label'))
height = int_or_none(source.get('height'))
if height is None:
if height is None and format_id:
# Often no height is provided but there is a label in
# format like "1080p", "720p SD", or 1080.
height = int_or_none(self._search_regex(
r'^(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''),
'height', default=None))
height = parse_resolution(format_id).get('height')
a_format = {
'url': source_url,
'width': int_or_none(source.get('width')),
'height': height,
'tbr': int_or_none(source.get('bitrate')),
'tbr': int_or_none(source.get('bitrate'), scale=1000),
'filesize': int_or_none(source.get('filesize')),
'ext': ext,
}
if format_id:
a_format['format_id'] = format_id
if source_url.startswith('rtmp'):
a_format['ext'] = 'flv'
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as

View File

@ -139,6 +139,7 @@ from .bleacherreport import (
BleacherReportIE,
BleacherReportCMSIE,
)
from .blerp import BlerpIE
from .bloomberg import BloombergIE
from .bokecc import BokeCCIE
from .bongacams import BongaCamsIE
@ -159,6 +160,7 @@ from .businessinsider import BusinessInsiderIE
from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE
from .c56 import C56IE
from .callin import CallinIE
from .camdemy import (
CamdemyIE,
CamdemyFolderIE
@ -209,10 +211,7 @@ from .ccc import (
from .ccma import CCMAIE
from .cctv import CCTVIE
from .cda import CDAIE
from .ceskatelevize import (
CeskaTelevizeIE,
CeskaTelevizePoradyIE,
)
from .ceskatelevize import CeskaTelevizeIE
from .channel9 import Channel9IE
from .charlierose import CharlieRoseIE
from .chaturbate import ChaturbateIE
@ -378,6 +377,7 @@ from .fc2 import (
FC2EmbedIE,
)
from .fczenit import FczenitIE
from .fifa import FifaIE
from .filmon import (
FilmOnIE,
FilmOnChannelIE,
@ -729,6 +729,7 @@ from .myvi import (
MyviIE,
MyviEmbedIE,
)
from .myvideoge import MyVideoGeIE
from .myvidster import MyVidsterIE
from .nationalgeographic import (
NationalGeographicVideoIE,
@ -913,6 +914,10 @@ from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE
from .pbs import PBSIE
from .pearvideo import PearVideoIE
from .peekvids import (
PeekVidsIE,
PlayVidsIE,
)
from .peertube import PeerTubeIE
from .people import PeopleIE
from .performgroup import PerformGroupIE
@ -1265,6 +1270,11 @@ from .theweatherchannel import TheWeatherChannelIE
from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE
from .thisoldhouse import ThisOldHouseIE
from .thisvid import (
ThisVidIE,
ThisVidMemberIE,
ThisVidPlaylistIE,
)
from .threeqsdn import ThreeQSDNIE
from .tiktok import (
TikTokIE,
@ -1662,3 +1672,7 @@ from .zingmp3 import (
)
from .zoom import ZoomIE
from .zype import ZypeIE
from .pr0gramm import (
Pr0grammIE,
Pr0grammStaticIE,
)

View File

@ -0,0 +1,101 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
traverse_obj,
unified_timestamp,
)
if not callable(getattr(InfoExtractor, '_match_valid_url', None)):
BaseInfoExtractor = InfoExtractor
import re
class InfoExtractor(BaseInfoExtractor):
@classmethod
def _match_valid_url(cls, url):
return re.match(cls._VALID_URL, url)
class FifaIE(InfoExtractor):
_VALID_URL = r'https?://www.fifa.com/fifaplus/(?P<locale>\w{2})/watch/([^#?]+/)?(?P<id>\w+)'
_TESTS = [{
'url': 'https://www.fifa.com/fifaplus/en/watch/7on10qPcnyLajDDU3ntg6y',
'info_dict': {
'id': '7on10qPcnyLajDDU3ntg6y',
'title': 'Italy v France | Final | 2006 FIFA World Cup Germany™ | Full Match Replay',
'description': 'md5:f4520d0ee80529c8ba4134a7d692ff8b',
'ext': 'mp4',
'categories': ['FIFA Tournaments'],
'thumbnail': 'https://digitalhub.fifa.com/transform/135e2656-3a51-407b-8810-6c34bec5b59b/FMR_2006_Italy_France_Final_Hero',
'duration': 8165,
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.fifa.com/fifaplus/pt/watch/1cg5r5Qt6Qt12ilkDgb1sV',
'info_dict': {
'id': '1cg5r5Qt6Qt12ilkDgb1sV',
'title': 'Brazil v Germany | Semi-finals | 2014 FIFA World Cup Brazil™ | Extended Highlights',
'description': 'md5:d908c74ee66322b804ae2e521b02a855',
'ext': 'mp4',
'categories': ['FIFA Tournaments', 'Highlights'],
'thumbnail': 'https://digitalhub.fifa.com/transform/d8fe6f61-276d-4a73-a7fe-6878a35fd082/FIFAPLS_100EXTHL_2014BRAvGER_TMB',
'duration': 902,
'release_timestamp': 1404777600,
'release_date': '20140708',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.fifa.com/fifaplus/fr/watch/3C6gQH9C2DLwzNx7BMRQdp',
'info_dict': {
'id': '3C6gQH9C2DLwzNx7BMRQdp',
'title': 'Josimar goal against Northern Ireland | Classic Goals',
'description': 'md5:cbe7e7bb52f603c9f1fe9a4780fe983b',
'ext': 'mp4',
'categories': ['FIFA Tournaments', 'Goal'],
'duration': 28,
'thumbnail': 'https://digitalhub.fifa.com/transform/f9301391-f8d9-48b5-823e-c093ac5e3e11/CG_MEN_1986_JOSIMAR',
},
'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):
video_id, locale = self._match_valid_url(url).group('id', 'locale')
webpage = self._download_webpage(url, video_id)
preconnect_link = self._search_regex(
r'<link\b[^>]+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link')
video_details = self._download_json(
'{preconnect_link}/sections/videoDetails/{video_id}'.format(**locals()), video_id, 'Downloading Video Details', fatal=False)
preplay_parameters = self._download_json(
'{preconnect_link}/videoPlayerData/{video_id}'.format(**locals()), video_id, 'Downloading Preplay Parameters')['preplayParameters']
content_data = self._download_json(
# 1. query string is expected to be sent as-is
# 2. `sig` must be appended
# 3. if absent, the call appears to work but the manifest is bad (404)
'https://content.uplynk.com/preplay/{contentId}/multiple.json?{queryStr}&sig={signature}'.format(**preplay_parameters),
video_id, 'Downloading Content Data')
# formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id)
formats, subtitles = self._extract_m3u8_formats(content_data['playURL'], video_id, ext='mp4', entry_protocol='m3u8_native'), None
self._sort_formats(formats)
return {
'id': video_id,
'title': video_details['title'],
'description': video_details.get('description'),
'duration': int_or_none(video_details.get('duration')),
'release_timestamp': unified_timestamp(video_details.get('dateOfRelease')),
'categories': traverse_obj(video_details, (('videoCategory', 'videoSubcategory'),)),
'thumbnail': traverse_obj(video_details, ('backgroundImage', 'src')),
'formats': formats,
'subtitles': subtitles,
}

View File

@ -28,6 +28,7 @@ from ..utils import (
mimetype2ext,
orderedSet,
parse_duration,
parse_resolution,
sanitized_Request,
smuggle_url,
unescapeHTML,
@ -35,6 +36,7 @@ from ..utils import (
unsmuggle_url,
UnsupportedError,
url_or_none,
urljoin,
xpath_attr,
xpath_text,
xpath_with_ns,
@ -2227,6 +2229,97 @@ class GenericIE(InfoExtractor):
# Sibnet embed (https://help.sibnet.ru/?sibnet_video_embed)
'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html',
'only_matching': True,
}, {
# KVS Player
'url': 'https://www.kvs-demo.com/videos/105/kelis-4th-of-july/',
'info_dict': {
'id': '105',
'display_id': 'kelis-4th-of-july',
'ext': 'mp4',
'title': 'Kelis - 4th Of July',
'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
},
}, {
# KVS Player
'url': 'https://www.kvs-demo.com/embed/105/',
'info_dict': {
'id': '105',
'display_id': 'kelis-4th-of-july',
'ext': 'mp4',
'title': 'Kelis - 4th Of July / Embed Player',
'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
},
'params': {
'skip_download': True,
},
}, {
# KVS Player (tested also in thisvid.py)
'url': 'https://youix.com/video/leningrad-zoj/',
'md5': '94f96ba95706dc3880812b27b7d8a2b8',
'info_dict': {
'id': '18485',
'display_id': 'leningrad-zoj',
'ext': 'mp4',
'title': 'Клип: Ленинград - ЗОЖ скачать, смотреть онлайн | Youix.com',
'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg',
},
}, {
# KVS Player
'url': 'https://youix.com/embed/18485',
'md5': '94f96ba95706dc3880812b27b7d8a2b8',
'info_dict': {
'id': '18485',
'display_id': 'leningrad-zoj',
'ext': 'mp4',
'title': 'Ленинград - ЗОЖ',
'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg',
},
}, {
# KVS Player
'url': 'https://bogmedia.org/videos/21217/40-nochey-40-nights-2016/',
'md5': '94166bdb26b4cb1fb9214319a629fc51',
'info_dict': {
'id': '21217',
'display_id': '40-nochey-2016',
'ext': 'mp4',
'title': '40 ночей (2016) - BogMedia.org',
'description': 'md5:4e6d7d622636eb7948275432eb256dc3',
'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg',
},
}, {
# KVS Player (for sites that serve kt_player.js via non-https urls)
'url': 'http://www.camhub.world/embed/389508',
'md5': 'fbe89af4cfb59c8fd9f34a202bb03e32',
'info_dict': {
'id': '389508',
'display_id': 'syren-de-mer-onlyfans-05-07-2020have-a-happy-safe-holiday5f014e68a220979bdb8cd-source',
'ext': 'mp4',
'title': 'Syren De Mer onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плеер',
'thumbnail': r're:https?://www\.camhub\.world/contents/videos_screenshots/389000/389508/preview\.mp4\.jpg',
},
}, {
'url': 'https://mrdeepfakes.com/video/5/selena-gomez-pov-deep-fakes',
'md5': 'fec4ad5ec150f655e0c74c696a4a2ff4',
'info_dict': {
'id': '5',
'display_id': 'selena-gomez-pov-deep-fakes',
'ext': 'mp4',
'title': 'Selena Gomez POV (Deep Fakes) DeepFake Porn - MrDeepFakes',
'description': 'md5:17d1f84b578c9c26875ac5ef9a932354',
'height': 720,
'age_limit': 18,
},
}, {
'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/',
'md5': 'e2f0a4c329f7986280b7328e24036d60',
'info_dict': {
'id': '284002',
'display_id': 'just-out-of-the-shower-joi',
'ext': 'mp4',
'title': 'Just Out Of The Shower JOI - Shooshtime',
'height': 720,
'age_limit': 18,
},
},
]
@ -2332,6 +2425,88 @@ class GenericIE(InfoExtractor):
'title': title,
}
def _extract_kvs(self, url, webpage, video_id):
def getlicensetoken(license):
modlicense = license.replace('$', '').replace('0', '1')
center = int(len(modlicense) / 2)
fronthalf = int(modlicense[:center + 1])
backhalf = int(modlicense[center:])
modlicense = compat_str(4 * abs(fronthalf - backhalf))
def parts():
for o in range(0, center + 1):
for i in range(1, 5):
yield compat_str((int(license[o + i]) + int(modlicense[o])) % 10)
return ''.join(parts())
def getrealurl(video_url, license_code):
if not video_url.startswith('function/0/'):
return video_url # not obfuscated
url_path, _, url_query = video_url.partition('?')
urlparts = url_path.split('/')[2:]
license = getlicensetoken(license_code)
newmagic = urlparts[5][:32]
def spells(x, o):
l = (o + sum(int(n) for n in license[o:])) % 32
for i in range(0, len(x)):
yield {l: x[o], o: x[l]}.get(i, x[i])
for o in range(len(newmagic) - 1, -1, -1):
newmagic = ''.join(spells(newmagic, o))
urlparts[5] = newmagic + urlparts[5][32:]
return '/'.join(urlparts) + '?' + url_query
flashvars = self._search_regex(
r'(?s)<script\b[^>]*>.*?var\s+flashvars\s*=\s*(\{.+?\});.*?</script>',
webpage, 'flashvars')
flashvars = self._parse_json(flashvars, video_id, transform_source=js_to_json)
# extract the part after the last / as the display_id from the
# canonical URL.
display_id = self._search_regex(
r'(?:<link href="https?://[^"]+/(.+?)/?" rel="canonical"\s*/?>'
r'|<link rel="canonical" href="https?://[^"]+/(.+?)/?"\s*/?>)',
webpage, 'display_id', fatal=False
)
title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)</(?:h1|title)>', webpage, 'title')
thumbnail = flashvars['preview_url']
if thumbnail.startswith('//'):
protocol, _, _ = url.partition('/')
thumbnail = protocol + thumbnail
url_keys = list(filter(re.compile(r'^video_(?:url|alt_url\d*)$').match, flashvars.keys()))
formats = []
for key in url_keys:
if '/get_file/' not in flashvars[key]:
continue
format_id = flashvars.get(key + '_text', key)
formats.append(merge_dicts(
parse_resolution(format_id) or parse_resolution(flashvars[key]), {
'url': urljoin(url, getrealurl(flashvars[key], flashvars['license_code'])),
'format_id': format_id,
'ext': 'mp4',
'http_headers': {'Referer': url},
}))
if not formats[-1].get('height'):
formats[-1]['quality'] = 1
self._sort_formats(formats)
return {
'id': flashvars['video_id'],
'display_id': display_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
}
def _real_extract(self, url):
if url.startswith('//'):
return self.url_result(self.http_scheme() + url)
@ -2540,9 +2715,16 @@ class GenericIE(InfoExtractor):
# but actually don't.
AGE_LIMIT_MARKERS = [
r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
r'>[^<]*you acknowledge you are at least (\d+) years old',
r'>\s*(?:18\s+U(?:\.S\.C\.|SC)\s+)?(?:§+\s*)?2257\b',
]
if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
age_limit = 18
for marker in AGE_LIMIT_MARKERS:
m = re.search(marker, webpage)
if not m:
continue
age_limit = max(
age_limit or 0,
int_or_none(m.groups() and m.group(1), default=18))
# video uploader is domain name
video_uploader = self._search_regex(
@ -3389,6 +3571,20 @@ class GenericIE(InfoExtractor):
info_dict['formats'] = formats
return info_dict
# Look for generic KVS player (before ld+json for tests)
found = self._search_regex(
(r'<script\b[^>]+?\bsrc\s*=\s*(["\'])https?://(?:\S+?/)+kt_player\.js\?v=(?P<ver>\d+(?:\.\d+)+)\1[^>]*>',
# kt_player('kt_player', 'https://i.shoosh.co/player/kt_player.swf?v=5.5.1', ...
r'kt_player\s*\(\s*(["\'])(?:(?!\1)[\w\W])+\1\s*,\s*(["\'])https?://(?:\S+?/)+kt_player\.swf\?v=(?P<ver>\d+(?:\.\d+)+)\2\s*,',
), webpage, 'KVS player', group='ver', default=False)
if found:
self.report_extraction('%s: KVS Player' % (video_id, ))
if found.split('.')[0] not in ('4', '5', '6'):
self.report_warning('Untested major version (%s) in player engine - download may fail.' % (found, ))
return merge_dicts(
self._extract_kvs(url, webpage, video_id),
info_dict)
# Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld(
webpage, video_id, default={}, expected_type='VideoObject')

View File

@ -1,6 +1,9 @@
# coding: utf-8
from __future__ import unicode_literals
from ..utils import (
ExtractorError,
)
from ..compat import (
compat_b64decode,
@ -90,7 +93,11 @@ class InfoQIE(BokeCCBaseIE):
}]
def _extract_http_audio(self, webpage, video_id):
fields = self._form_hidden_inputs('mp3Form', webpage)
try:
fields = self._form_hidden_inputs('mp3Form', webpage)
except ExtractorError:
fields = {}
http_audio_url = fields.get('filename')
if not http_audio_url:
return []

View File

@ -3,123 +3,266 @@ from __future__ import unicode_literals
import json
import re
import sys
from .common import InfoExtractor
from .brightcove import BrightcoveNewIE
from ..compat import (
compat_HTTPError,
compat_integer_types,
compat_kwargs,
compat_urlparse,
)
from ..utils import (
clean_html,
determine_ext,
error_to_compat_str,
extract_attributes,
get_element_by_class,
JSON_LD_RE,
ExtractorError,
get_element_by_attribute,
int_or_none,
merge_dicts,
parse_duration,
parse_iso8601,
remove_start,
smuggle_url,
strip_or_none,
traverse_obj,
url_or_none,
urljoin,
)
class ITVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
_GEO_COUNTRIES = ['GB']
class ITVBaseIE(InfoExtractor):
def _search_nextjs_data(self, webpage, video_id, **kw):
transform_source = kw.pop('transform_source', None)
fatal = kw.pop('fatal', True)
return self._parse_json(
self._search_regex(
r'''<script\b[^>]+\bid=('|")__NEXT_DATA__\1[^>]*>(?P<js>[^<]+)</script>''',
webpage, 'next.js data', group='js', fatal=fatal, **kw),
video_id, transform_source=transform_source, fatal=fatal)
def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True):
if errnote is False:
return False
if errnote is None:
errnote = 'Unable to download webpage'
errmsg = '%s: %s' % (errnote, error_to_compat_str(err))
if fatal:
raise ExtractorError(errmsg, sys.exc_info()[2], cause=err, video_id=video_id)
else:
self._downloader.report_warning(errmsg)
return False
@staticmethod
def _vanilla_ua_header():
return {'User-agent': 'Mozilla/5.0'}
def _download_webpage_handle(self, url, video_id, *args, **kwargs):
# specialised to (a) use vanilla UA (b) detect geo-block
params = self._downloader.params
nkwargs = {}
if (
'user_agent' not in params
and not any(re.match(r'(?i)user-agent\s*:', h)
for h in (params.get('headers') or []))
and 'User-agent' not in (kwargs.get('headers') or {})):
kwargs.setdefault('headers', {})
kwargs['headers'] = self._vanilla_ua_header()
nkwargs = kwargs
if kwargs.get('expected_status') is not None:
exp = kwargs['expected_status']
if isinstance(exp, compat_integer_types):
exp = [exp]
if isinstance(exp, (list, tuple)) and 403 not in exp:
kwargs['expected_status'] = [403]
kwargs['expected_status'].extend(exp)
nkwargs = kwargs
else:
kwargs['expected_status'] = 403
nkwargs = kwargs
if nkwargs:
kwargs = compat_kwargs(kwargs)
ret = super(ITVBaseIE, self)._download_webpage_handle(url, video_id, *args, **kwargs)
if ret is False:
return ret
webpage, urlh = ret
if urlh.getcode() == 403:
# geo-block error is like this, with an unnecessary 'Of':
# '{\n "Message" : "Request Originated Outside Of Allowed Geographic Region",\
# \n "TransactionId" : "oas-magni-475082-xbYF0W"\n}'
if '"Request Originated Outside Of Allowed Geographic Region"' in webpage:
self.raise_geo_restricted(countries=['GB'])
ret = self.__handle_request_webpage_error(
compat_HTTPError(urlh.geturl(), 403, 'HTTP Error 403: Forbidden', urlh.headers, urlh),
fatal=kwargs.get('fatal'))
return ret
class ITVIE(ITVBaseIE):
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)'
_IE_DESC = 'ITVX'
_TESTS = [{
'note': 'Hub URLs redirect to ITVX',
'url': 'https://www.itv.com/hub/liar/2a4547a0012',
'info_dict': {
'id': '2a4547a0012',
'ext': 'mp4',
'title': 'Liar - Series 2 - Episode 6',
'description': 'md5:d0f91536569dec79ea184f0a44cca089',
'series': 'Liar',
'season_number': 2,
'episode_number': 6,
},
'params': {
# m3u8 download
'skip_download': True,
},
'only_matching': True,
}, {
# unavailable via data-playlist-url
'note': 'Hub page unavailable via data-playlist-url (404 now)',
'url': 'https://www.itv.com/hub/through-the-keyhole/2a2271a0033',
'only_matching': True,
}, {
# InvalidVodcrid
'note': 'Hub page with InvalidVodcrid (404 now)',
'url': 'https://www.itv.com/hub/james-martins-saturday-morning/2a5159a0034',
'only_matching': True,
}, {
# ContentUnavailable
'note': 'Hub page with ContentUnavailable (404 now)',
'url': 'https://www.itv.com/hub/whos-doing-the-dishes/2a2898a0024',
'only_matching': True,
}]
}, {
'note': 'ITVX, or itvX, show',
'url': 'https://www.itv.com/watch/vera/1a7314/1a7314a0014',
'md5': 'bd0ad666b2c058fffe7d036785880064',
'info_dict': {
'id': '1a7314a0014',
'ext': 'mp4',
'title': 'Vera - Series 3 - Episode 4 - Prodigal Son',
'description': 'Vera and her team investigate the fatal stabbing of an ex-Met police officer outside a busy Newcastle nightclub - but there aren\'t many clues.',
'timestamp': 1653591600,
'upload_date': '20220526',
'uploader': 'ITVX',
'thumbnail': r're:https://\w+\.itv\.com/images/(?:\w+/)+\d+x\d+\?',
'duration': 5340.8,
'age_limit': 16,
'series': 'Vera',
'series_number': 3,
'episode': 'Prodigal Son',
'episode_number': 4,
'channel': 'ITV3',
'categories': list,
},
'params': {
# m3u8 download
# 'skip_download': True,
},
'skip': 'only available in UK',
}, {
'note': 'Latest ITV news bulletin: details change daily',
'url': 'https://www.itv.com/watch/news/varies-but-is-not-checked/6js5d0f',
'info_dict': {
'id': '6js5d0f',
'ext': 'mp4',
'title': r're:The latest ITV News headlines - \S.+',
'description': r'''re:.* today's top stories from the ITV News team.$''',
'timestamp': int,
'upload_date': r're:2\d\d\d(?:0[1-9]|1[0-2])(?:[012][1-9]|3[01])',
'uploader': 'ITVX',
'thumbnail': r're:https://images\.ctfassets\.net/(?:\w+/)+[\w.]+\.(?:jpg|png)',
'duration': float,
'age_limit': None,
},
'params': {
# variable download
# 'skip_download': True,
},
'skip': 'only available in UK',
}
]
def _og_extract(self, webpage, require_title=False):
return {
'title': self._og_search_title(webpage, fatal=require_title),
'description': self._og_search_description(webpage, default=None),
'thumbnail': self._og_search_thumbnail(webpage, default=None),
'uploader': self._og_search_property('site_name', webpage, default=None),
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
params = extract_attributes(self._search_regex(
r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params'))
ios_playlist_url = params.get('data-video-playlist') or params['data-video-id']
hmac = params['data-video-hmac']
webpage = self._download_webpage(url, video_id)
# now quite different params!
params = extract_attributes(self._search_regex(
r'''(<[^>]+\b(?:class|data-testid)\s*=\s*("|')genie-container\2[^>]*>)''',
webpage, 'params'))
ios_playlist_url = traverse_obj(
params, 'data-video-id', 'data-video-playlist',
get_all=False, expected_type=url_or_none)
headers = self.geo_verification_headers()
headers.update({
'Accept': 'application/vnd.itv.vod.playlist.v2+json',
'Content-Type': 'application/json',
'hmac': hmac.upper(),
})
ios_playlist = self._download_json(
ios_playlist_url, video_id, data=json.dumps({
'user': {
'itvUserId': '',
'entitlements': [],
'token': ''
},
'device': {
'manufacturer': 'Safari',
'model': '5',
'manufacturer': 'Mobile Safari',
'model': '5.1',
'os': {
'name': 'Windows NT',
'version': '6.1',
'type': 'desktop'
'name': 'iOS',
'version': '5.0',
'type': ' mobile'
}
},
'client': {
'version': '4.1',
'id': 'browser'
'id': 'browser',
'supportsAdPods': True,
'service': 'itv.x',
'appversion': '2.43.28',
},
'variantAvailability': {
'player': 'hls',
'featureset': {
'min': ['hls', 'aes', 'outband-webvtt'],
'max': ['hls', 'aes', 'outband-webvtt']
},
'platformTag': 'dotcom'
'platformTag': 'mobile'
}
}).encode(), headers=headers)
video_data = ios_playlist['Playlist']['Video']
ios_base_url = video_data.get('Base')
ios_base_url = traverse_obj(video_data, 'Base', expected_type=url_or_none)
media_url = (
(lambda u: url_or_none(urljoin(ios_base_url, u)))
if ios_base_url else url_or_none)
formats = []
for media_file in (video_data.get('MediaFiles') or []):
href = media_file.get('Href')
for media_file in traverse_obj(video_data, 'MediaFiles', expected_type=list) or []:
href = traverse_obj(media_file, 'Href', expected_type=media_url)
if not href:
continue
if ios_base_url:
href = ios_base_url + href
ext = determine_ext(href)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
href, video_id, 'mp4', entry_protocol='m3u8_native',
href, video_id, 'mp4', entry_protocol='m3u8',
m3u8_id='hls', fatal=False))
else:
formats.append({
'url': href,
})
self._sort_formats(formats)
for f in formats:
f.setdefault('http_headers', {})
f['http_headers'].update(self._vanilla_ua_header())
subtitles = {}
subs = video_data.get('Subtitles') or []
for sub in subs:
if not isinstance(sub, dict):
continue
href = url_or_none(sub.get('Href'))
for sub in traverse_obj(video_data, 'Subtitles', expected_type=list) or []:
href = traverse_obj(sub, 'Href', expected_type=url_or_none)
if not href:
continue
subtitles.setdefault('en', []).append({
@ -127,59 +270,132 @@ class ITVIE(InfoExtractor):
'ext': determine_ext(href, 'vtt'),
})
info = self._search_json_ld(webpage, video_id, default={})
if not info:
json_ld = self._parse_json(self._search_regex(
JSON_LD_RE, webpage, 'JSON-LD', '{}',
group='json_ld'), video_id, fatal=False)
if json_ld and json_ld.get('@type') == 'BreadcrumbList':
for ile in (json_ld.get('itemListElement:') or []):
item = ile.get('item:') or {}
if item.get('@type') == 'TVEpisode':
item['@context'] = 'http://schema.org'
info = self._json_ld(item, video_id, fatal=False) or {}
break
next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default='{}')
video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {})
title = traverse_obj(video_data, 'headerTitle', 'episodeTitle')
info = self._og_extract(webpage, require_title=not title)
tn = info.pop('thumbnail', None)
if tn:
info['thumbnails'] = [{'url': tn}]
# num. episode title
num_ep_title = video_data.get('numberedEpisodeTitle')
if not num_ep_title:
num_ep_title = clean_html(get_element_by_attribute('data-testid', 'episode-hero-description-strong', webpage))
num_ep_title = num_ep_title and num_ep_title.rstrip(' -')
ep_title = strip_or_none(
video_data.get('episodeTitle')
or (num_ep_title.split('.', 1)[-1] if num_ep_title else None))
title = title or re.sub(r'\s+-\s+ITVX$', '', info['title'])
if ep_title and ep_title != title:
title = title + ' - ' + ep_title
def get_thumbnails():
tns = []
for w, x in (traverse_obj(video_data, ('imagePresets'), expected_type=dict) or {}).items():
if isinstance(x, dict):
for y, z in x.items():
tns.append({'id': w + '_' + y, 'url': z})
return tns or None
video_str = lambda *x: traverse_obj(
video_data, *x, get_all=False, expected_type=strip_or_none)
return merge_dicts({
'id': video_id,
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage),
'title': title,
'formats': formats,
'subtitles': subtitles,
'duration': parse_duration(video_data.get('Duration')),
'description': clean_html(get_element_by_class('episode-info__synopsis', webpage)),
# parsing hh:mm:ss:nnn not yet patched
'duration': parse_duration(re.sub(r'(\d{2})(:)(\d{3}$)', r'\1.\3', video_data.get('Duration') or '')),
'description': video_str('synopsis'),
'timestamp': traverse_obj(video_data, 'broadcastDateTime', 'dateTime', expected_type=parse_iso8601),
'thumbnails': get_thumbnails(),
'series': video_str('showTitle', 'programmeTitle'),
'series_number': int_or_none(video_data.get('seriesNumber')),
'episode': ep_title,
'episode_number': int_or_none((num_ep_title or '').split('.')[0]),
'channel': video_str('channel'),
'categories': traverse_obj(video_data, ('categories', 'formatted'), expected_type=list),
'age_limit': {False: 16, True: 0}.get(video_data.get('isChildrenCategory')),
}, info)
class ITVBTCCIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TEST = {
'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch',
class ITVBTCCIE(ITVBaseIE):
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)'
_IE_DESC = 'ITV articles: News, British Touring Car Championship'
_TESTS = [{
'note': 'British Touring Car Championship',
'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch',
'info_dict': {
'id': 'btcc-2018-all-the-action-from-brands-hatch',
'title': 'BTCC 2018: All the action from Brands Hatch',
},
'playlist_mincount': 9,
}
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s'
}, {
'note': 'redirects to /btcc/articles/...',
'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch',
'only_matching': True,
}, {
'note': 'news article',
'url': 'https://www.itv.com/news/wales/2020-07-23/sean-fletcher-shows-off-wales-coastline-in-new-itv-series-as-british-tourists-opt-for-staycations',
'info_dict': {
'id': 'sean-fletcher-shows-off-wales-coastline-in-new-itv-series-as-british-tourists-opt-for-staycations',
'title': '''Sean Fletcher on why Wales' coastline should be your 'staycation' destination | ITV News''',
},
'playlist_mincount': 1,
}]
# should really be a class var of the BC IE
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
BRIGHTCOVE_ACCOUNT = '1582188683001'
BRIGHTCOVE_PLAYER = 'HkiHLnNRx'
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
webpage, urlh = self._download_webpage_handle(url, playlist_id)
link = compat_urlparse.urlparse(urlh.geturl()).path.strip('/')
entries = [
self.url_result(
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {
# ITV does not like some GB IP ranges, so here are some
# IP blocks it accepts
'geo_ip_blocks': [
'193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21'
],
'referrer': url,
}),
ie=BrightcoveNewIE.ie_key(), video_id=video_id)
for video_id in re.findall(r'data-video-id=["\'](\d+)', webpage)]
next_data = self._search_nextjs_data(webpage, playlist_id, fatal=False, default='{}')
path_prefix = compat_urlparse.urlparse(next_data.get('assetPrefix') or '').path.strip('/')
link = remove_start(link, path_prefix).strip('/')
content = traverse_obj(
next_data, ('props', 'pageProps', Ellipsis),
expected_type=lambda x: x if x['link'] == link else None,
get_all=False, default={})
content = traverse_obj(
content, ('body', 'content', Ellipsis, 'data'),
expected_type=lambda x: x if x.get('name') == 'Brightcove' or x.get('type') == 'Brightcove' else None)
contraband = {
# ITV does not like some GB IP ranges, so here are some
# IP blocks it accepts
'geo_ip_blocks': [
'193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21'
],
'referrer': urlh.geturl(),
}
def entries():
for data in content or []:
video_id = data.get('id')
if not video_id:
continue
account = data.get('accountId') or self.BRIGHTCOVE_ACCOUNT
player = data.get('playerId') or self.BRIGHTCOVE_PLAYER
yield self.url_result(
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (account, player, video_id), contraband),
ie=BrightcoveNewIE.ie_key(), video_id=video_id)
# obsolete ?
for video_id in re.findall(r'''data-video-id=["'](\d+)''', webpage):
yield self.url_result(
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (self.BRIGHTCOVE_ACCOUNT, self.BRIGHTCOVE_PLAYER, video_id), contraband),
ie=BrightcoveNewIE.ie_key(), video_id=video_id)
title = self._og_search_title(webpage, fatal=False)
return self.playlist_result(entries, playlist_id, title)
return self.playlist_result(entries(), playlist_id, title)

View File

@ -1,11 +1,16 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
determine_ext,
extract_attributes,
int_or_none,
str_to_int,
url_or_none,
urlencode_postdata,
)
@ -20,17 +25,20 @@ class ManyVidsIE(InfoExtractor):
'id': '133957',
'ext': 'mp4',
'title': 'everthing about me (Preview)',
'uploader': 'ellyxxix',
'view_count': int,
'like_count': int,
},
}, {
# full video
'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
'md5': 'f3e8f7086409e9b470e2643edb96bdcc',
'md5': 'bb47bab0e0802c2a60c24ef079dfe60f',
'info_dict': {
'id': '935718',
'ext': 'mp4',
'title': 'MY FACE REVEAL',
'description': 'md5:ec5901d41808b3746fed90face161612',
'uploader': 'Sarah Calanthe',
'view_count': int,
'like_count': int,
},
@ -39,17 +47,50 @@ class ManyVidsIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
real_url = 'https://www.manyvids.com/video/%s/gtm.js' % (video_id, )
try:
webpage = self._download_webpage(real_url, video_id)
except Exception:
# probably useless fallback
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1',
webpage, 'video URL', group='url')
info = self._search_regex(
r'''(<div\b[^>]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''',
webpage, 'meta details', default='')
info = extract_attributes(info)
title = self._html_search_regex(
(r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
webpage, 'title', default=None) or self._html_search_meta(
'twitter:title', webpage, 'title', fatal=True)
player = self._search_regex(
r'''(<div\b[^>]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''',
webpage, 'player details', default='')
player = extract_attributes(player)
video_urls_and_ids = (
(info.get('data-meta-video'), 'video'),
(player.get('data-video-transcoded'), 'transcoded'),
(player.get('data-video-filepath'), 'filepath'),
(self._og_search_video_url(webpage, secure=False, default=None), 'og_video'),
)
def txt_or_none(s, default=None):
return (s.strip() or default) if isinstance(s, compat_str) else default
uploader = txt_or_none(info.get('data-meta-author'))
def mung_title(s):
if uploader:
s = re.sub(r'^\s*%s\s+[|-]' % (re.escape(uploader), ), '', s)
return txt_or_none(s)
title = (
mung_title(info.get('data-meta-title'))
or self._html_search_regex(
(r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
webpage, 'title', default=None)
or self._html_search_meta(
'twitter:title', webpage, 'title', fatal=True))
title = re.sub(r'\s*[|-]\s+ManyVids\s*$', '', title) or title
if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
title += ' (Preview)'
@ -62,7 +103,8 @@ class ManyVidsIE(InfoExtractor):
# Sets some cookies
self._download_webpage(
'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
video_id, fatal=False, data=urlencode_postdata({
video_id, note='Setting format cookies', fatal=False,
data=urlencode_postdata({
'mvtoken': mv_token,
'vid': video_id,
}), headers={
@ -70,23 +112,56 @@ class ManyVidsIE(InfoExtractor):
'X-Requested-With': 'XMLHttpRequest'
})
if determine_ext(video_url) == 'm3u8':
formats = self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
else:
formats = [{'url': video_url}]
formats = []
for v_url, fmt in video_urls_and_ids:
v_url = url_or_none(v_url)
if not v_url:
continue
if determine_ext(v_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
v_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls'))
else:
formats.append({
'url': v_url,
'format_id': fmt,
})
like_count = int_or_none(self._search_regex(
r'data-likes=["\'](\d+)', webpage, 'like count', default=None))
view_count = str_to_int(self._html_search_regex(
r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage,
'view count', default=None))
self._remove_duplicate_formats(formats)
for f in formats:
if f.get('height') is None:
f['height'] = int_or_none(
self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None))
if '/preview/' in f['url']:
f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview')))
f['preference'] = -10
if 'transcoded' in f['format_id']:
f['preference'] = f.get('preference', -1) - 1
self._sort_formats(formats)
def get_likes():
likes = self._search_regex(
r'''(<a\b[^>]*\bdata-id\s*=\s*(['"])%s\2[^>]*>)''' % (video_id, ),
webpage, 'likes', default='')
likes = extract_attributes(likes)
return int_or_none(likes.get('data-likes'))
def get_views():
return str_to_int(self._html_search_regex(
r'''(?s)<span\b[^>]*\bclass\s*=["']views-wrapper\b[^>]+>.+?<span\b[^>]+>\s*(\d[\d,.]*)\s*</span>''',
webpage, 'view count', default=None))
return {
'id': video_id,
'title': title,
'view_count': view_count,
'like_count': like_count,
'formats': formats,
'description': txt_or_none(info.get('data-meta-description')),
'uploader': txt_or_none(info.get('data-meta-author')),
'thumbnail': (
url_or_none(info.get('data-meta-image'))
or url_or_none(player.get('data-video-screenshot'))),
'view_count': get_views(),
'like_count': get_likes(),
}

View File

@ -24,7 +24,7 @@ class MediasetIE(ThePlatformBaseIE):
(?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
(?:
(?:video|on-demand|movie)/(?:[^/]+/)+[^/]+_|
player/index\.html\?.*?\bprogramGuid=
player(?:/v\d+)?/index\.html\?.*?\bprogramGuid=
)
)(?P<id>[0-9A-Z]{16,})
'''
@ -73,6 +73,10 @@ class MediasetIE(ThePlatformBaseIE):
# iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/)
'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665104&id=665104',
'only_matching': True,
}, {
# embedUrl (from https://www.wittytv.it/amici/est-ce-que-tu-maimes-gabriele-5-dicembre-copia/)
'url': 'https://static3.mediasetplay.mediaset.it/player/v2/index.html?partnerId=wittytv&configId=&programGuid=FD00000000153323&autoplay=true&purl=http://www.wittytv.it/amici/est-ce-que-tu-maimes-gabriele-5-dicembre-copia/',
'only_matching': True,
}, {
'url': 'mediaset:FAFU000000665924',
'only_matching': True,

View File

@ -78,7 +78,7 @@ class MindsIE(MindsBaseIE):
else:
return self.url_result(entity['perma_url'])
else:
assert(entity['subtype'] == 'video')
assert (entity['subtype'] == 'video')
video_id = entity_id
# 1080p and webm formats available only on the sources array
video = self._call_api(

View File

@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals
import datetime
@ -71,7 +72,7 @@ class MotherlessIE(InfoExtractor):
'title': 'a/ Hot Teens',
'categories': list,
'upload_date': '20210104',
'uploader_id': 'yonbiw',
'uploader_id': 'anonymous',
'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18,
},
@ -125,9 +126,10 @@ class MotherlessIE(InfoExtractor):
kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
comment_count = webpage.count('class="media-comment-contents"')
comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
uploader_id = self._html_search_regex(
r'"thumb-member-username">\s+<a href="/m/([^"]+)"',
(r'''<span\b[^>]+\bclass\s*=\s*["']username\b[^>]*>([^<]+)</span>''',
r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)'''),
webpage, 'uploader_id')
categories = self._html_search_meta('keywords', webpage, default=None)
@ -169,7 +171,18 @@ class MotherlessGroupIE(InfoExtractor):
'description': 'Sex can be funny. Wide smiles,laugh, games, fun of '
'any kind!'
},
'playlist_mincount': 9,
'playlist_mincount': 0,
'expected_warnings': [
'This group has no videos.',
]
}, {
'url': 'https://motherless.com/g/beautiful_cock',
'info_dict': {
'id': 'beautiful_cock',
'title': 'Beautiful Cock',
'description': 'Group for lovely cocks yours, mine, a friends anything human',
},
'playlist_mincount': 2500,
}]
@classmethod
@ -208,16 +221,23 @@ class MotherlessGroupIE(InfoExtractor):
r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
description = self._html_search_meta(
'description', webpage, fatal=False)
page_count = self._int(self._search_regex(
r'(\d+)</(?:a|span)><(?:a|span)[^>]+>\s*NEXT',
webpage, 'page_count'), 'page_count')
page_count = str_to_int(self._search_regex(
r'(\d+)\s*</(?:a|span)>\s*<(?:a|span)[^>]+(?:>\s*NEXT|\brel\s*=\s*["\']?next)\b',
webpage, 'page_count', default=0))
if not page_count:
message = self._search_regex(
r'''class\s*=\s*['"]error-page\b[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*''',
webpage, 'error_msg', default=None) or 'This group has no videos.'
self.report_warning(message, group_id)
page_count = 1
PAGE_SIZE = 80
def _get_page(idx):
webpage = self._download_webpage(
page_url, group_id, query={'page': idx + 1},
note='Downloading page %d/%d' % (idx + 1, page_count)
)
if idx > 0:
webpage = self._download_webpage(
page_url, group_id, query={'page': idx + 1},
note='Downloading page %d/%d' % (idx + 1, page_count)
)
for entry in self._extract_entries(webpage, url):
yield entry

View File

@ -0,0 +1,87 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
get_element_by_id,
get_element_by_class,
int_or_none,
js_to_json,
MONTH_NAMES,
qualities,
unified_strdate,
)
class MyVideoGeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?myvideo\.ge/v/(?P<id>[0-9]+)'
_TEST = {
'url': 'https://www.myvideo.ge/v/3941048',
'md5': '8c192a7d2b15454ba4f29dc9c9a52ea9',
'info_dict': {
'id': '3941048',
'ext': 'mp4',
'title': 'The best prikol',
'upload_date': '20200611',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'chixa33',
'description': 'md5:5b067801318e33c2e6eea4ab90b1fdd3',
},
# working from local dev system
'skip': 'site blocks CI servers',
}
_MONTH_NAMES_KA = ['იანვარი', 'თებერვალი', 'მარტი', 'აპრილი', 'მაისი', 'ივნისი', 'ივლისი', 'აგვისტო', 'სექტემბერი', 'ოქტომბერი', 'ნოემბერი', 'დეკემბერი']
_quality = staticmethod(qualities(('SD', 'HD')))
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = (
self._og_search_title(webpage, default=None)
or clean_html(get_element_by_class('my_video_title', webpage))
or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title\b', webpage, 'title'))
jwplayer_sources = self._parse_json(
self._search_regex(
r'''(?s)jwplayer\s*\(\s*['"]mvplayer['"]\s*\)\s*\.\s*setup\s*\(.*?\bsources\s*:\s*(\[.*?])\s*[,});]''', webpage, 'jwplayer sources', fatal=False)
or '',
video_id, transform_source=js_to_json, fatal=False)
formats = self._parse_jwplayer_formats(jwplayer_sources or [], video_id)
for f in formats or []:
f['preference'] = self._quality(f['format_id'])
self._sort_formats(formats)
description = (
self._og_search_description(webpage)
or get_element_by_id('long_desc_holder', webpage)
or self._html_search_meta('description', webpage))
uploader = self._search_regex(r'<a[^>]+class="mv_user_name"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False)
upload_date = get_element_by_class('mv_vid_upl_date', webpage)
# as ka locale may not be present roll a local date conversion
upload_date = (unified_strdate(
# translate any ka month to an en one
re.sub('|'.join(self._MONTH_NAMES_KA),
lambda m: MONTH_NAMES['en'][self._MONTH_NAMES_KA.index(m.group(0))],
upload_date, re.I))
if upload_date else None)
return {
'id': video_id,
'title': title,
'description': description,
'uploader': uploader,
'formats': formats,
'thumbnail': self._og_search_thumbnail(webpage),
'upload_date': upload_date,
'view_count': int_or_none(get_element_by_class('mv_vid_views', webpage)),
'like_count': int_or_none(get_element_by_id('likes_count', webpage)),
'dislike_count': int_or_none(get_element_by_id('dislikes_count', webpage)),
}

View File

@ -1,20 +1,32 @@
# coding: utf-8
from __future__ import unicode_literals
from hashlib import md5
from base64 import b64encode
from binascii import hexlify
from datetime import datetime
from hashlib import md5
from random import randint
import json
import re
import time
from .common import InfoExtractor
from ..aes import aes_ecb_encrypt, pkcs7_padding
from ..compat import (
compat_urllib_parse_urlencode,
compat_str,
compat_itertools_count,
)
from ..utils import (
sanitized_Request,
ExtractorError,
bytes_to_intlist,
error_to_compat_str,
float_or_none,
int_or_none,
intlist_to_bytes,
sanitized_Request,
std_headers,
try_get,
)
@ -35,32 +47,106 @@ class NetEaseMusicBaseIE(InfoExtractor):
result = b64encode(m.digest()).decode('ascii')
return result.replace('/', '_').replace('+', '-')
@classmethod
def make_player_api_request_data_and_headers(cls, song_id, bitrate):
KEY = b'e82ckenh8dichen8'
URL = '/api/song/enhance/player/url'
now = int(time.time() * 1000)
rand = randint(0, 1000)
cookie = {
'osver': None,
'deviceId': None,
'appver': '8.0.0',
'versioncode': '140',
'mobilename': None,
'buildver': '1623435496',
'resolution': '1920x1080',
'__csrf': '',
'os': 'pc',
'channel': None,
'requestId': '{0}_{1:04}'.format(now, rand),
}
request_text = json.dumps(
{'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie},
separators=(',', ':'))
message = 'nobody{0}use{1}md5forencrypt'.format(
URL, request_text).encode('latin1')
msg_digest = md5(message).hexdigest()
data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format(
URL, request_text, msg_digest)
data = pkcs7_padding(bytes_to_intlist(data))
encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY)))
encrypted_params = hexlify(encrypted).decode('ascii').upper()
cookie = '; '.join(
['{0}={1}'.format(k, v if v is not None else 'undefined')
for [k, v] in cookie.items()])
headers = {
'User-Agent': std_headers['User-Agent'],
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': 'https://music.163.com',
'Cookie': cookie,
}
return ('params={0}'.format(encrypted_params), headers)
def _call_player_api(self, song_id, bitrate):
url = 'https://interface3.music.163.com/eapi/song/enhance/player/url'
data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate)
try:
msg = 'empty result'
result = self._download_json(
url, song_id, data=data.encode('ascii'), headers=headers)
if result:
return result
except ExtractorError as e:
if type(e.cause) in (ValueError, TypeError):
# JSON load failure
raise
except Exception as e:
msg = error_to_compat_str(e)
self.report_warning('%s API call (%s) failed: %s' % (
song_id, bitrate, msg))
return {}
def extract_formats(self, info):
err = 0
formats = []
song_id = info['id']
for song_format in self._FORMATS:
details = info.get(song_format)
if not details:
continue
song_file_path = '/%s/%s.%s' % (
self._encrypt(details['dfsId']), details['dfsId'], details['extension'])
# 203.130.59.9, 124.40.233.182, 115.231.74.139, etc is a reverse proxy-like feature
# from NetEase's CDN provider that can be used if m5.music.126.net does not
# work, especially for users outside of Mainland China
# via: https://github.com/JixunMoe/unblock-163/issues/3#issuecomment-163115880
for host in ('http://m5.music.126.net', 'http://115.231.74.139/m1.music.126.net',
'http://124.40.233.182/m1.music.126.net', 'http://203.130.59.9/m1.music.126.net'):
song_url = host + song_file_path
bitrate = int_or_none(details.get('bitrate')) or 999000
data = self._call_player_api(song_id, bitrate)
for song in try_get(data, lambda x: x['data'], list) or []:
song_url = try_get(song, lambda x: x['url'])
if not song_url:
continue
if self._is_valid_url(song_url, info['id'], 'song'):
formats.append({
'url': song_url,
'ext': details.get('extension'),
'abr': float_or_none(details.get('bitrate'), scale=1000),
'abr': float_or_none(song.get('br'), scale=1000),
'format_id': song_format,
'filesize': details.get('size'),
'asr': details.get('sr')
'filesize': int_or_none(song.get('size')),
'asr': int_or_none(details.get('sr')),
})
break
elif err == 0:
err = try_get(song, lambda x: x['code'], int)
if not formats:
msg = 'No media links found'
if err != 0 and (err < 200 or err >= 400):
raise ExtractorError(
'%s (site code %d)' % (msg, err, ), expected=True)
else:
self.raise_geo_restricted(
msg + ': probably this video is not available from your location due to geo restriction.',
countries=['CN'])
return formats
@classmethod
@ -76,33 +162,19 @@ class NetEaseMusicBaseIE(InfoExtractor):
class NetEaseMusicIE(NetEaseMusicBaseIE):
IE_NAME = 'netease:song'
IE_DESC = '网易云音乐'
_VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)'
_VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://music.163.com/#/song?id=32102397',
'md5': 'f2e97280e6345c74ba9d5677dd5dcb45',
'md5': '3e909614ce09b1ccef4a3eb205441190',
'info_dict': {
'id': '32102397',
'ext': 'mp3',
'title': 'Bad Blood (feat. Kendrick Lamar)',
'title': 'Bad Blood',
'creator': 'Taylor Swift / Kendrick Lamar',
'upload_date': '20150517',
'timestamp': 1431878400,
'description': 'md5:a10a54589c2860300d02e1de821eb2ef',
'upload_date': '20150516',
'timestamp': 1431792000,
'description': 'md5:25fc5f27e47aad975aa6d36382c7833c',
},
'skip': 'Blocked outside Mainland China',
}, {
'note': 'No lyrics translation.',
'url': 'http://music.163.com/#/song?id=29822014',
'info_dict': {
'id': '29822014',
'ext': 'mp3',
'title': '听见下雨的声音',
'creator': '周杰伦',
'upload_date': '20141225',
'timestamp': 1419523200,
'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c',
},
'skip': 'Blocked outside Mainland China',
}, {
'note': 'No lyrics.',
'url': 'http://music.163.com/song?id=17241424',
@ -112,9 +184,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'title': 'Opus 28',
'creator': 'Dustin O\'Halloran',
'upload_date': '20080211',
'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4',
'timestamp': 1202745600,
},
'skip': 'Blocked outside Mainland China',
}, {
'note': 'Has translated name.',
'url': 'http://music.163.com/#/song?id=22735043',
@ -128,7 +200,18 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'timestamp': 1264608000,
'alt_title': '说出愿望吧(Genie)',
},
'skip': 'Blocked outside Mainland China',
}, {
'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
'md5': '95826c73ea50b1c288b22180ec9e754d',
'info_dict': {
'id': '95670',
'ext': 'mp3',
'title': '国际歌',
'creator': '马备',
'upload_date': '19911130',
'timestamp': 691516800,
'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
},
}]
def _process_lyrics(self, lyrics_info):

View File

@ -8,7 +8,7 @@ from ..utils import urljoin
class NhkBaseIE(InfoExtractor):
_API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/%s/%s/%s/all%s.json'
_API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json'
_BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand'
_TYPE_REGEX = r'/(?P<type>video|audio)/'

View File

@ -60,8 +60,7 @@ class NRKBaseIE(InfoExtractor):
return self._download_json(
urljoin('https://psapi.nrk.no/', path),
video_id, note or 'Downloading %s JSON' % item,
fatal=fatal, query=query,
headers={'Accept-Encoding': 'gzip, deflate, br'})
fatal=fatal, query=query)
class NRKIE(NRKBaseIE):

View File

@ -16,6 +16,7 @@ from ..utils import (
ExtractorError,
get_exe_version,
is_outdated_version,
process_communicate_or_kill,
std_headers,
)
@ -226,7 +227,7 @@ class PhantomJSwrapper(object):
self.exe, '--ssl-protocol=any',
self._TMP_FILES['script'].name
], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = p.communicate()
out, err = process_communicate_or_kill(p)
if p.returncode != 0:
raise ExtractorError(
'Executing JS failed\n:' + encodeArgument(err))

View File

@ -0,0 +1,193 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
get_element_by_class,
int_or_none,
merge_dicts,
url_or_none,
)
class PeekVidsIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://(?:www\.)?peekvids\.com/
(?:(?:[^/?#]+/){2}|embed/?\?(?:[^#]*&)?v=)
(?P<id>[^/?&#]*)
'''
_TESTS = [{
'url': 'https://peekvids.com/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp/BSyLMbN0YCd',
'md5': '2ff6a357a9717dc9dc9894b51307e9a2',
'info_dict': {
'id': '1262717',
'display_id': 'BSyLMbN0YCd',
'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp',
'ext': 'mp4',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:0a61df3620de26c0af8963b1a730cd69',
'timestamp': 1642579329,
'upload_date': '20220119',
'duration': 416,
'view_count': int,
'age_limit': 18,
'uploader': 'SEXYhub.com',
'categories': list,
'tags': list,
},
}]
_DOMAIN = 'www.peekvids.com'
def _get_detail(self, html):
return get_element_by_class('detail-video-block', html)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id, expected_status=429)
if '>Rate Limit Exceeded' in webpage:
raise ExtractorError(
'[%s] %s: %s' % (self.IE_NAME, video_id, 'You are suspected as a bot. Wait, or pass the captcha test on the site and provide --cookies.'),
expected=True)
title = self._html_search_regex(r'(?s)<h1\b[^>]*>(.+?)</h1>', webpage, 'title')
display_id = video_id
video_id = self._search_regex(r'(?s)<video\b[^>]+\bdata-id\s*=\s*["\']?([\w-]+)', webpage, 'short video ID')
srcs = self._download_json(
'https://%s/v-alt/%s' % (self._DOMAIN, video_id), video_id,
note='Downloading list of source files')
formats = [{
'url': f_url,
'format_id': f_id,
'height': int_or_none(f_id),
} for f_url, f_id in (
(url_or_none(f_v), f_match.group(1))
for f_v, f_match in (
(v, re.match(r'^data-src(\d{3,})$', k))
for k, v in srcs.items() if v) if f_match)
if f_url
]
if not formats:
formats = [{'url': url} for url in srcs.values()]
self._sort_formats(formats)
info = self._search_json_ld(webpage, video_id, expected_type='VideoObject', default={})
info.pop('url', None)
# may not have found the thumbnail if it was in a list in the ld+json
info.setdefault('thumbnail', self._og_search_thumbnail(webpage))
detail = self._get_detail(webpage) or ''
info['description'] = self._html_search_regex(
r'(?s)(.+?)(?:%s\s*<|<ul\b)' % (re.escape(info.get('description', '')), ),
detail, 'description', default=None) or None
info['title'] = re.sub(r'\s*[,-][^,-]+$', '', info.get('title') or title) or self._generic_title(url)
def cat_tags(name, html):
l = self._html_search_regex(
r'(?s)<span\b[^>]*>\s*%s\s*:\s*</span>(.+?)</li>' % (re.escape(name), ),
html, name, default='')
return [x for x in re.split(r'\s+', l) if x]
return merge_dicts({
'id': video_id,
'display_id': display_id,
'age_limit': 18,
'formats': formats,
'categories': cat_tags('Categories', detail),
'tags': cat_tags('Tags', detail),
'uploader': self._html_search_regex(r'[Uu]ploaded\s+by\s(.+?)"', webpage, 'uploader', default=None),
}, info)
class PlayVidsIE(PeekVidsIE):
_VALID_URL = r'https?://(?:www\.)?playvids\.com/(?:embed/|\w\w?/)?(?P<id>[^/?#]*)'
_TESTS = [{
'url': 'https://www.playvids.com/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp',
'md5': '2f12e50213dd65f142175da633c4564c',
'info_dict': {
'id': '1978030',
'display_id': 'U3pBrYhsjXM',
'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp',
'ext': 'mp4',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:0a61df3620de26c0af8963b1a730cd69',
'timestamp': 1640435839,
'upload_date': '20211225',
'duration': 416,
'view_count': int,
'age_limit': 18,
'uploader': 'SEXYhub.com',
'categories': list,
'tags': list,
},
}, {
'url': 'https://www.playvids.com/es/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp',
'only_matching': True,
}, {
'url': 'https://www.playvids.com/embed/U3pBrYhsjXM',
'only_matching': True,
}, {
'url': 'https://www.playvids.com/bKmGLe3IwjZ/sv/brazzers-800-phone-sex-madison-ivy-always-on-the-line',
'md5': 'e783986e596cafbf46411a174ab42ba6',
'info_dict': {
'id': '762385',
'display_id': 'bKmGLe3IwjZ',
'ext': 'mp4',
'title': 'Brazzers - 1 800 Phone Sex: Madison Ivy Always On The Line 6',
'description': 'md5:bdcd2db2b8ad85831a491d7c8605dcef',
'timestamp': 1516958544,
'upload_date': '20180126',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 480,
'uploader': 'Brazzers',
'age_limit': 18,
'view_count': int,
'age_limit': 18,
'categories': list,
'tags': list,
},
}, {
'url': 'https://www.playvids.com/v/47iUho33toY',
'md5': 'b056b5049d34b648c1e86497cf4febce',
'info_dict': {
'id': '700621',
'display_id': '47iUho33toY',
'ext': 'mp4',
'title': 'KATEE OWEN STRIPTIASE IN SEXY RED LINGERIE',
'description': None,
'timestamp': 1507052209,
'upload_date': '20171003',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 332,
'uploader': 'Cacerenele',
'age_limit': 18,
'view_count': int,
'categories': list,
'tags': list,
}
}, {
'url': 'https://www.playvids.com/z3_7iwWCmqt/sexy-teen-filipina-striptease-beautiful-pinay-bargirl-strips-and-dances',
'md5': 'efa09be9f031314b7b7e3bc6510cd0df',
'info_dict': {
'id': '1523518',
'display_id': 'z3_7iwWCmqt',
'ext': 'mp4',
'title': 'SEXY TEEN FILIPINA STRIPTEASE - Beautiful Pinay Bargirl Strips and Dances',
'description': None,
'timestamp': 1607470323,
'upload_date': '20201208',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 593,
'uploader': 'yorours',
'age_limit': 18,
'view_count': int,
'categories': list,
'tags': list,
},
}]
_DOMAIN = 'www.playvids.com'
def _get_detail(self, html):
return get_element_by_class('detail-block', html)

View File

@ -0,0 +1,105 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
import re
from ..utils import (
merge_dicts,
)
class Pr0grammStaticIE(InfoExtractor):
# Possible urls:
# https://pr0gramm.com/static/5466437
_VALID_URL = r'https?://pr0gramm\.com/static/(?P<id>[0-9]+)'
_TEST = {
'url': 'https://pr0gramm.com/static/5466437',
'md5': '52fa540d70d3edc286846f8ca85938aa',
'info_dict': {
'id': '5466437',
'ext': 'mp4',
'title': 'pr0gramm-5466437 by g11st',
'uploader': 'g11st',
'upload_date': '20221221',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
# Fetch media sources
entries = self._parse_html5_media_entries(url, webpage, video_id)
media_info = entries[0]
# this raises if there are no formats
self._sort_formats(media_info.get('formats') or [])
# Fetch author
uploader = self._html_search_regex(r'by\W+([\w-]+)\W+', webpage, 'uploader')
# Fetch approx upload timestamp from filename
# Have None-defaults in case the extraction fails
uploadDay = None
uploadMon = None
uploadYear = None
uploadTimestr = None
# (//img.pr0gramm.com/2022/12/21/62ae8aa5e2da0ebf.mp4)
m = re.search(r'//img\.pr0gramm\.com/(?P<year>[\d]+)/(?P<mon>[\d]+)/(?P<day>[\d]+)/\w+\.\w{,4}', webpage)
if (m):
# Up to a day of accuracy should suffice...
uploadDay = m.groupdict().get('day')
uploadMon = m.groupdict().get('mon')
uploadYear = m.groupdict().get('year')
uploadTimestr = uploadYear + uploadMon + uploadDay
return merge_dicts({
'id': video_id,
'title': 'pr0gramm-%s%s' % (video_id, (' by ' + uploader) if uploader else ''),
'uploader': uploader,
'upload_date': uploadTimestr
}, media_info)
# This extractor is for the primary url (used for sharing, and appears in the
# location bar) Since this page loads the DOM via JS, yt-dl can't find any
# video information here. So let's redirect to a compatibility version of
# the site, which does contain the <video>-element by itself, without requiring
# js to be ran.
class Pr0grammIE(InfoExtractor):
# Possible urls:
# https://pr0gramm.com/new/546637
# https://pr0gramm.com/new/video/546637
# https://pr0gramm.com/top/546637
# https://pr0gramm.com/top/video/546637
# https://pr0gramm.com/user/g11st/uploads/5466437
# https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290
# https://pr0gramm.com/user/froschler/reinziehen-1elf/5232030
# https://pr0gramm.com/user/froschler/1elf/5232030
# https://pr0gramm.com/new/5495710:comment62621020 <- this is not the id!
# https://pr0gramm.com/top/fruher war alles damals/5498175
_VALID_URL = r'https?:\/\/pr0gramm\.com\/(?!static/\d+).+?\/(?P<id>[\d]+)(:|$)'
_TEST = {
'url': 'https://pr0gramm.com/new/video/5466437',
'info_dict': {
'id': '5466437',
'ext': 'mp4',
'title': 'pr0gramm-5466437 by g11st',
'uploader': 'g11st',
'upload_date': '20221221',
}
}
def _generic_title():
return "oof"
def _real_extract(self, url):
video_id = self._match_id(url)
return self.url_result(
'https://pr0gramm.com/static/' + video_id,
video_id=video_id,
ie=Pr0grammStaticIE.ie_key())

View File

@ -34,7 +34,9 @@ class TelegraafIE(InfoExtractor):
article_id = self._match_id(url)
video_id = self._download_json(
'https://www.telegraaf.nl/graphql', article_id, query={
'https://app.telegraaf.nl/graphql', article_id,
headers={'User-Agent': 'De Telegraaf/6.8.11 (Android 11; en_US)'},
query={
'query': '''{
article(uid: %s) {
videos {

View File

@ -0,0 +1,218 @@
# coding: utf-8
from __future__ import unicode_literals
import re
import itertools
from .common import InfoExtractor
from ..compat import (
compat_urlparse,
)
from ..utils import (
clean_html,
get_element_by_class,
int_or_none,
merge_dicts,
url_or_none,
urljoin,
)
class ThisVidIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?thisvid\.com/(?P<type>videos|embed)/(?P<id>[A-Za-z0-9-]+)'
_TESTS = [{
'url': 'https://thisvid.com/videos/sitting-on-ball-tight-jeans/',
'md5': '839becb572995687e11a69dc4358a386',
'info_dict': {
'id': '3533241',
'ext': 'mp4',
'title': 'Sitting on ball tight jeans',
'description': 'md5:372353bb995883d1b65fddf507489acd',
'thumbnail': r're:https?://\w+\.thisvid\.com/(?:[^/]+/)+3533241/preview\.jpg',
'uploader_id': '150629',
'uploader': 'jeanslevisjeans',
'age_limit': 18,
}
}, {
'url': 'https://thisvid.com/embed/3533241/',
'md5': '839becb572995687e11a69dc4358a386',
'info_dict': {
'id': '3533241',
'ext': 'mp4',
'title': 'Sitting on ball tight jeans',
'thumbnail': r're:https?://\w+\.thisvid\.com/(?:[^/]+/)+3533241/preview\.jpg',
'uploader_id': '150629',
'uploader': 'jeanslevisjeans',
'age_limit': 18,
}
}]
def _real_extract(self, url):
main_id, type_ = re.match(self._VALID_URL, url).group('id', 'type')
webpage = self._download_webpage(url, main_id)
title = self._html_search_regex(
r'<title\b[^>]*?>(?:Video:\s+)?(.+?)(?:\s+-\s+ThisVid(?:\.com| tube))?</title>',
webpage, 'title')
if type_ == 'embed':
# look for more metadata
video_alt_url = url_or_none(self._search_regex(
r'''video_alt_url\s*:\s+'(%s/)',''' % (self._VALID_URL, ),
webpage, 'video_alt_url', default=None))
if video_alt_url and video_alt_url != url:
webpage = self._download_webpage(
video_alt_url, main_id,
note='Redirecting embed to main page', fatal=False) or webpage
video_holder = get_element_by_class('video-holder', webpage) or ''
if '>This video is a private video' in video_holder:
self.raise_login_required(
(clean_html(video_holder) or 'Private video').split('\n', 1)[0])
uploader = self._html_search_regex(
r'''(?s)<span\b[^>]*>Added by:\s*</span><a\b[^>]+\bclass\s*=\s*["']author\b[^>]+\bhref\s*=\s*["']https://thisvid\.com/members/([0-9]+/.{3,}?)\s*</a>''',
webpage, 'uploader', default='')
uploader = re.split(r'''/["'][^>]*>\s*''', uploader)
if len(uploader) == 2:
# id must be non-empty, uploader could be ''
uploader_id, uploader = uploader
uploader = uploader or None
else:
uploader_id = uploader = None
return merge_dicts({
'_type': 'url_transparent',
'title': title,
'age_limit': 18,
'uploader': uploader,
'uploader_id': uploader_id,
}, self.url_result(url, ie='Generic'))
class ThisVidMemberIE(InfoExtractor):
_VALID_URL = r'https?://thisvid\.com/members/(?P<id>\d+)'
_TESTS = [{
'url': 'https://thisvid.com/members/2140501/',
'info_dict': {
'id': '2140501',
'title': 'Rafflesia\'s Profile',
},
'playlist_mincount': 16,
}, {
'url': 'https://thisvid.com/members/2140501/favourite_videos/',
'info_dict': {
'id': '2140501',
'title': 'Rafflesia\'s Favourite Videos',
},
'playlist_mincount': 15,
}, {
'url': 'https://thisvid.com/members/636468/public_videos/',
'info_dict': {
'id': '636468',
'title': 'Happymouth\'s Public Videos',
},
'playlist_mincount': 196,
},
]
def _urls(self, html):
for m in re.finditer(r'''<a\b[^>]+\bhref\s*=\s*["'](?P<url>%s\b)[^>]+>''' % (ThisVidIE._VALID_URL, ), html):
yield m.group('url')
def _real_extract(self, url):
pl_id = self._match_id(url)
webpage = self._download_webpage(url, pl_id)
title = re.split(
r'(?i)\s*\|\s*ThisVid\.com\s*$',
self._og_search_title(webpage, default=None) or self._html_search_regex(r'(?s)<title\b[^>]*>(.+?)</title', webpage, 'title', fatal=False) or '', 1)[0] or None
def entries(page_url, html=None):
for page in itertools.count(1):
if not html:
html = self._download_webpage(
page_url, pl_id, note='Downloading page %d' % (page, ),
fatal=False) or ''
for u in self._urls(html):
yield u
next_page = get_element_by_class('pagination-next', html) or ''
if next_page:
# member list page
next_page = urljoin(url, self._search_regex(
r'''<a\b[^>]+\bhref\s*=\s*("|')(?P<url>(?!#)(?:(?!\1).)+)''',
next_page, 'next page link', group='url', default=None))
# in case a member page should have pagination-next with empty link, not just `else:`
if next_page is None:
# playlist page
parsed_url = compat_urlparse.urlparse(page_url)
base_path, num = parsed_url.path.rsplit('/', 1)
num = int_or_none(num)
if num is None:
base_path, num = parsed_url.path.rstrip('/'), 1
parsed_url = parsed_url._replace(path=base_path + ('/%d' % (num + 1, )))
next_page = compat_urlparse.urlunparse(parsed_url)
if page_url == next_page:
next_page = None
if not next_page:
break
page_url, html = next_page, None
return self.playlist_from_matches(
entries(url, webpage), playlist_id=pl_id, playlist_title=title, ie='ThisVid')
class ThisVidPlaylistIE(ThisVidMemberIE):
_VALID_URL = r'https?://thisvid\.com/playlist/(?P<id>\d+)/video/(?P<video_id>[A-Za-z0-9-]+)'
_TESTS = [{
'url': 'https://thisvid.com/playlist/6615/video/big-italian-booty-28/',
'info_dict': {
'id': '6615',
'title': 'Underwear Stuff',
},
'playlist_mincount': 200,
}, {
'url': 'https://thisvid.com/playlist/6615/video/big-italian-booty-28/',
'info_dict': {
'id': '1072387',
'ext': 'mp4',
'title': 'Big Italian Booty 28',
'description': 'md5:1bccf7b13765e18fb27bf764dba7ede2',
'uploader_id': '367912',
'uploader': 'Jcmusclefun',
'age_limit': 18,
},
'params': {
'noplaylist': True,
},
}]
def _get_video_url(self, pl_url):
video_id = re.match(self._VALID_URL, pl_url).group('video_id')
return urljoin(pl_url, '/videos/%s/' % (video_id, ))
def _urls(self, html):
for m in re.finditer(r'''<a\b[^>]+\bhref\s*=\s*["'](?P<url>%s\b)[^>]+>''' % (self._VALID_URL, ), html):
yield self._get_video_url(m.group('url'))
def _real_extract(self, url):
pl_id = self._match_id(url)
if self._downloader.params.get('noplaylist'):
self.to_screen('Downloading just the featured video because of --no-playlist')
return self.url_result(self._get_video_url(url), 'ThisVid')
self.to_screen(
'Downloading playlist %s - add --no-playlist to download just the featured video' % (pl_id, ))
result = super(ThisVidPlaylistIE, self)._real_extract(url)
# rework title returned as `the title - the title`
title = result['title']
t_len = len(title)
if t_len > 5 and t_len % 2 != 0:
t_len = t_len // 2
if title[t_len] == '-':
title = [t.strip() for t in (title[:t_len], title[t_len + 1:])]
if title[0] and title[0] == title[1]:
result['title'] = title[0]
return result

View File

@ -5,7 +5,7 @@ from .common import InfoExtractor
class UKTVPlayIE(InfoExtractor):
_VALID_URL = r'https?://uktvplay\.uktv\.co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*watch-online/)(?P<id>\d+)'
_VALID_URL = r'https?://uktvplay\.(?:uktv\.)?co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*watch-online/)(?P<id>\d+)'
_TESTS = [{
'url': 'https://uktvplay.uktv.co.uk/shows/world-at-war/c/200/watch-online/?video=2117008346001',
'info_dict': {

View File

@ -663,7 +663,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
if '//player.vimeo.com/video/' in url:
config = self._parse_json(self._search_regex(
r'\bconfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
r'\b(?:playerC|c)onfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
if config.get('view') == 4:
config = self._verify_player_video_password(
redirect_url, video_id, headers)

View File

@ -64,6 +64,18 @@ class VVVVIDIE(InfoExtractor):
'params': {
'skip_download': True,
},
}, {
# video_type == 'video/dash'
'url': 'https://www.vvvvid.it/show/683/made-in-abyss/1542/693786/nanachi',
'info_dict': {
'id': '693786',
'ext': 'mp4',
'title': 'Nanachi',
},
'params': {
'skip_download': True,
'format': 'mp4',
},
}, {
'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048',
'only_matching': True
@ -205,6 +217,9 @@ class VVVVIDIE(InfoExtractor):
})
is_youtube = True
break
elif video_type == 'video/dash':
formats.extend(self._extract_m3u8_formats(
embed_code, video_id, 'mp4', m3u8_id='hls', fatal=False))
else:
formats.extend(self._extract_wowza_formats(
'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))

View File

@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals
import itertools
@ -23,7 +24,7 @@ from ..utils import (
class XHamsterIE(InfoExtractor):
_DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com)'
_DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com|xhvid\.com)'
_VALID_URL = r'''(?x)
https?://
(?:.+?\.)?%s/
@ -34,7 +35,7 @@ class XHamsterIE(InfoExtractor):
''' % _DOMAINS
_TESTS = [{
'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
'md5': '98b4687efb1ffd331c4197854dc09e8f',
'md5': '34e1ab926db5dc2750fed9e1f34304bb',
'info_dict': {
'id': '1509445',
'display_id': 'femaleagent-shy-beauty-takes-the-bait',
@ -43,6 +44,7 @@ class XHamsterIE(InfoExtractor):
'timestamp': 1350194821,
'upload_date': '20121014',
'uploader': 'Ruseful2011',
'uploader_id': 'ruseful2011',
'duration': 893,
'age_limit': 18,
},
@ -72,6 +74,7 @@ class XHamsterIE(InfoExtractor):
'timestamp': 1454948101,
'upload_date': '20160208',
'uploader': 'parejafree',
'uploader_id': 'parejafree',
'duration': 72,
'age_limit': 18,
},
@ -117,6 +120,12 @@ class XHamsterIE(InfoExtractor):
}, {
'url': 'http://de.xhamster.com/videos/skinny-girl-fucks-herself-hard-in-the-forest-xhnBJZx',
'only_matching': True,
}, {
'url': 'https://xhday.com/videos/strapless-threesome-xhh7yVf',
'only_matching': True,
}, {
'url': 'https://xhvid.com/videos/lk-mm-xhc6wn6',
'only_matching': True,
}]
def _real_extract(self, url):
@ -245,6 +254,7 @@ class XHamsterIE(InfoExtractor):
else:
categories = None
uploader_url = url_or_none(try_get(video, lambda x: x['author']['pageURL']))
return {
'id': video_id,
'display_id': display_id,
@ -253,6 +263,8 @@ class XHamsterIE(InfoExtractor):
'timestamp': int_or_none(video.get('created')),
'uploader': try_get(
video, lambda x: x['author']['name'], compat_str),
'uploader_url': uploader_url,
'uploader_id': uploader_url.split('/')[-1] if uploader_url else None,
'thumbnail': video.get('thumbURL'),
'duration': int_or_none(video.get('duration')),
'view_count': int_or_none(video.get('views')),
@ -261,7 +273,7 @@ class XHamsterIE(InfoExtractor):
'dislike_count': int_or_none(try_get(
video, lambda x: x['rating']['dislikes'], int)),
'comment_count': int_or_none(video.get('views')),
'age_limit': age_limit,
'age_limit': age_limit if age_limit is not None else 18,
'categories': categories,
'formats': formats,
}
@ -352,6 +364,7 @@ class XHamsterIE(InfoExtractor):
'description': description,
'upload_date': upload_date,
'uploader': uploader,
'uploader_id': uploader.lower() if uploader else None,
'thumbnail': thumbnail,
'duration': duration,
'view_count': view_count,
@ -420,6 +433,12 @@ class XHamsterUserIE(InfoExtractor):
'id': 'firatkaan',
},
'playlist_mincount': 1,
}, {
'url': 'https://xhday.com/users/mobhunter',
'only_matching': True,
}, {
'url': 'https://xhvid.com/users/pelushe21',
'only_matching': True,
}]
def _entries(self, user_id):

View File

@ -315,7 +315,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
title = try_get(
renderer,
(lambda x: x['title']['runs'][0]['text'],
lambda x: x['title']['simpleText']), compat_str)
lambda x: x['title']['simpleText'],
lambda x: x['headline']['simpleText']), compat_str)
description = try_get(
renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
compat_str)
@ -499,7 +500,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
)
_SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
_GEO_BYPASS = False
@ -1500,7 +1501,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return lambda s: jsi.extract_function_from_code(*func_code)([s])
def _n_descramble(self, n_param, player_url, video_id):
"""Compute the response to YT's "n" parameter challenge
"""Compute the response to YT's "n" parameter challenge,
or None
Args:
n_param -- challenge string that is the value of the
@ -1518,7 +1520,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if player_id not in self._player_cache:
self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
func = self._player_cache[player_id]
self._player_cache[sig_id] = func(n_param)
ret = func(n_param)
if ret.startswith('enhanced_except_'):
raise ExtractorError('Unhandled exception in decode')
self._player_cache[sig_id] = ret
if self._downloader.params.get('verbose', False):
self._downloader.to_screen('[debug] [%s] %s' % (self.IE_NAME, 'Decrypted nsig {0} => {1}'.format(n_param, self._player_cache[sig_id])))
return self._player_cache[sig_id]
@ -1539,10 +1544,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
continue
n_param = n_param[-1]
n_response = self._n_descramble(n_param, player_url, video_id)
if n_response:
qs['n'] = [n_response]
fmt['url'] = compat_urlparse.urlunparse(
parsed_fmt_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
if n_response is None:
# give up if descrambling failed
break
qs['n'] = [n_response]
fmt['url'] = compat_urlparse.urlunparse(
parsed_fmt_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
def _mark_watched(self, video_id, player_response):
playback_url = url_or_none(try_get(
@ -2201,6 +2208,24 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
IE_NAME = 'youtube:tab'
_TESTS = [{
# Shorts
'url': 'https://www.youtube.com/@SuperCooperShorts/shorts',
'playlist_mincount': 5,
'info_dict': {
'description': 'Short clips from Super Cooper Sundays!',
'id': 'UCKMA8kHZ8bPYpnMNaUSxfEQ',
'title': 'Super Cooper Shorts - Shorts',
}
}, {
# Channel that does not have a Shorts tab. Test should just download videos on Home tab instead
'url': 'https://www.youtube.com/@emergencyawesome/shorts',
'info_dict': {
'description': 'md5:592c080c06fef4de3c902c4a8eecd850',
'id': 'UCDiFRMQWpcp8_KD4vwIVicw',
'title': 'Emergency Awesome - Home',
},
'playlist_mincount': 5,
}, {
# playlists, multipage
'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
'playlist_mincount': 94,
@ -2674,7 +2699,11 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
def _rich_grid_entries(self, contents):
for content in contents:
video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
video_renderer = try_get(
content,
(lambda x: x['richItemRenderer']['content']['videoRenderer'],
lambda x: x['richItemRenderer']['content']['reelItemRenderer']),
dict)
if video_renderer:
entry = self._video_entry(video_renderer)
if entry:

View File

@ -8,13 +8,14 @@ from ..compat import compat_str
from ..utils import (
determine_ext,
ExtractorError,
extract_attributes,
float_or_none,
int_or_none,
merge_dicts,
NO_DEFAULT,
orderedSet,
parse_codecs,
qualities,
str_or_none,
try_get,
unified_timestamp,
update_url_query,
@ -57,28 +58,39 @@ class ZDFBaseIE(InfoExtractor):
format_urls.add(format_url)
mime_type = meta.get('mimeType')
ext = determine_ext(format_url)
join_nonempty = lambda s, l: s.join(filter(None, l))
meta_map = lambda t: map(lambda x: str_or_none(meta.get(x)), t)
if mime_type == 'application/x-mpegURL' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
new_formats = self._extract_m3u8_formats(
format_url, video_id, 'mp4', m3u8_id='hls',
entry_protocol='m3u8_native', fatal=False))
entry_protocol='m3u8_native', fatal=False)
elif mime_type == 'application/f4m+xml' or ext == 'f4m':
formats.extend(self._extract_f4m_formats(
update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False))
new_formats = self._extract_f4m_formats(
update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False)
else:
f = parse_codecs(meta.get('mimeCodec'))
if not f:
data = meta.get('type', '').split('_')
if try_get(data, lambda x: x[2]) == ext:
f = dict(zip(('vcodec', 'acodec'), data[1]))
format_id = ['http']
for p in (meta.get('type'), meta.get('quality')):
if p and isinstance(p, compat_str):
format_id.append(p)
format_id.extend(join_nonempty('-', meta_map(('type', 'quality'))))
f.update({
'url': format_url,
'format_id': '-'.join(format_id),
'format_note': meta.get('quality'),
'language': meta.get('language'),
'quality': qualities(self._QUALITIES)(meta.get('quality')),
'preference': -10,
'tbr': int_or_none(self._search_regex(r'_(\d+)k_', format_url, 'tbr', default=None))
})
formats.append(f)
new_formats = [f]
formats.extend(merge_dicts(f, {
'format_note': join_nonempty(',', meta_map(('quality', 'class'))),
'language': meta.get('language'),
'language_preference': 10 if meta.get('class') == 'main' else -10 if meta.get('class') == 'ad' else -1,
'quality': qualities(self._QUALITIES)(meta.get('quality')),
}) for f in new_formats)
def _extract_ptmd(self, ptmd_url, video_id, api_token, referrer):
ptmd = self._call_api(
@ -107,6 +119,7 @@ class ZDFBaseIE(InfoExtractor):
'type': f.get('type'),
'mimeType': f.get('mimeType'),
'quality': quality.get('quality'),
'class': track.get('class'),
'language': track.get('language'),
})
self._sort_formats(formats)
@ -171,6 +184,20 @@ class ZDFIE(ZDFBaseIE):
'duration': 2615,
'timestamp': 1465021200,
'upload_date': '20160604',
'thumbnail': 'https://www.zdf.de/assets/mauve-im-labor-100~768x432?cb=1464909117806',
},
}, {
'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html',
'md5': '1b93bdec7d02fc0b703c5e7687461628',
'info_dict': {
'ext': 'mp4',
'id': 'video_funk_1770473',
'duration': 1278,
'description': 'Die Neue an der Schule verdreht Ismail den Kopf.',
'title': 'Alles ist verzaubert',
'timestamp': 1635520560,
'upload_date': '20211029',
'thumbnail': 'https://www.zdf.de/assets/teaser-funk-alles-ist-verzaubert-100~1920x1080?cb=1636466431799',
},
}, {
# Same as https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche
@ -204,6 +231,19 @@ class ZDFIE(ZDFBaseIE):
'timestamp': 1641355200,
'upload_date': '20220105',
},
'skip': 'No longer available "Diese Seite wurde leider nicht gefunden"'
}, {
'url': 'https://www.zdf.de/serien/soko-stuttgart/das-geld-anderer-leute-100.html',
'info_dict': {
'id': '191205_1800_sendung_sok8',
'ext': 'mp4',
'title': 'Das Geld anderer Leute',
'description': 'md5:cb6f660850dc5eb7d1ab776ea094959d',
'duration': 2581.0,
'timestamp': 1654790700,
'upload_date': '20220609',
'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/e2d7e55a-09f0-424e-ac73-6cac4dd65f35?layout=2400x1350',
},
}]
def _extract_entry(self, url, player, content, video_id):
@ -265,15 +305,16 @@ class ZDFIE(ZDFBaseIE):
'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id,
video_id)
document = video['document']
title = document['titel']
content_id = document['basename']
formats = []
format_urls = set()
for f in document['formitaeten']:
self._extract_format(content_id, formats, format_urls, f)
formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list)
document = formitaeten and video['document']
if formitaeten:
title = document['titel']
content_id = document['basename']
format_urls = set()
for f in formitaeten or []:
self._extract_format(content_id, formats, format_urls, f)
self._sort_formats(formats)
thumbnails = []
@ -320,9 +361,9 @@ class ZDFChannelIE(ZDFBaseIE):
'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio',
'info_dict': {
'id': 'das-aktuelle-sportstudio',
'title': 'das aktuelle sportstudio | ZDF',
'title': 'das aktuelle sportstudio',
},
'playlist_mincount': 23,
'playlist_mincount': 18,
}, {
'url': 'https://www.zdf.de/dokumentation/planet-e',
'info_dict': {
@ -330,6 +371,14 @@ class ZDFChannelIE(ZDFBaseIE):
'title': 'planet e.',
},
'playlist_mincount': 50,
}, {
'url': 'https://www.zdf.de/gesellschaft/aktenzeichen-xy-ungeloest',
'info_dict': {
'id': 'aktenzeichen-xy-ungeloest',
'title': 'Aktenzeichen XY... ungelöst',
'entries': "lambda x: not any('xy580-fall1-kindermoerder-gesucht-100' in e['url'] for e in x)",
},
'playlist_mincount': 2,
}, {
'url': 'https://www.zdf.de/filme/taunuskrimi/',
'only_matching': True,
@ -339,60 +388,36 @@ class ZDFChannelIE(ZDFBaseIE):
def suitable(cls, url):
return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url)
def _og_search_title(self, webpage, fatal=False):
title = super(ZDFChannelIE, self)._og_search_title(webpage, fatal=fatal)
return re.split(r'\s+[-|]\s+ZDF(?:mediathek)?$', title or '')[0] or None
def _real_extract(self, url):
channel_id = self._match_id(url)
webpage = self._download_webpage(url, channel_id)
entries = [
self.url_result(item_url, ie=ZDFIE.ie_key())
for item_url in orderedSet(re.findall(
r'data-plusbar-url=["\'](http.+?\.html)', webpage))]
matches = re.finditer(
r'''<div\b[^>]*?\sdata-plusbar-id\s*=\s*(["'])(?P<p_id>[\w-]+)\1[^>]*?\sdata-plusbar-url=\1(?P<url>%s)\1''' % ZDFIE._VALID_URL,
webpage)
return self.playlist_result(
entries, channel_id, self._og_search_title(webpage, fatal=False))
if self._downloader.params.get('noplaylist', False):
entry = next(
(self.url_result(m.group('url'), ie=ZDFIE.ie_key()) for m in matches),
None)
self.to_screen('Downloading just the main video because of --no-playlist')
if entry:
return entry
else:
self.to_screen('Downloading playlist %s - add --no-playlist to download just the main video' % (channel_id, ))
r"""
player = self._extract_player(webpage, channel_id)
def check_video(m):
v_ref = self._search_regex(
r'''(<a\b[^>]*?\shref\s*=[^>]+?\sdata-target-id\s*=\s*(["'])%s\2[^>]*>)''' % (m.group('p_id'), ),
webpage, 'check id', default='')
v_ref = extract_attributes(v_ref)
return v_ref.get('data-target-video-type') != 'novideo'
channel_id = self._search_regex(
r'docId\s*:\s*(["\'])(?P<id>(?!\1).+?)\1', webpage,
'channel id', group='id')
channel = self._call_api(
'https://api.zdf.de/content/documents/%s.json' % channel_id,
player, url, channel_id)
items = []
for module in channel['module']:
for teaser in try_get(module, lambda x: x['teaser'], list) or []:
t = try_get(
teaser, lambda x: x['http://zdf.de/rels/target'], dict)
if not t:
continue
items.extend(try_get(
t,
lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'],
list) or [])
items.extend(try_get(
module,
lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'],
list) or [])
entries = []
entry_urls = set()
for item in items:
t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict)
if not t:
continue
sharing_url = t.get('http://zdf.de/rels/sharing-url')
if not sharing_url or not isinstance(sharing_url, compat_str):
continue
if sharing_url in entry_urls:
continue
entry_urls.add(sharing_url)
entries.append(self.url_result(
sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id')))
return self.playlist_result(entries, channel_id, channel.get('title'))
"""
return self.playlist_from_matches(
(m.group('url') for m in matches if check_video(m)),
channel_id, self._og_search_title(webpage, fatal=False))

File diff suppressed because it is too large Load Diff

View File

@ -270,11 +270,11 @@ def parseOpts(overrideArguments=None):
selection.add_option(
'--match-title',
dest='matchtitle', metavar='REGEX',
help='Download only matching titles (case-insensitive regex or sub-string)')
help='Download only matching titles (case-insensitive regex or alphanumeric sub-string)')
selection.add_option(
'--reject-title',
dest='rejecttitle', metavar='REGEX',
help='Skip download for matching titles (case-insensitive regex or sub-string)')
help='Skip download for matching titles (case-insensitive regex or alphanumeric sub-string)')
selection.add_option(
'--max-downloads',
dest='max_downloads', metavar='NUMBER', type=int, default=None,
@ -801,7 +801,7 @@ def parseOpts(overrideArguments=None):
postproc.add_option(
'--postprocessor-args',
dest='postprocessor_args', metavar='ARGS',
help='Give these arguments to the postprocessor')
help='Give these arguments to the postprocessor (if postprocessing is required)')
postproc.add_option(
'-k', '--keep-video',
action='store_true', dest='keepvideo', default=False,

View File

@ -13,8 +13,9 @@ from ..utils import (
encodeFilename,
PostProcessingError,
prepend_extension,
process_communicate_or_kill,
replace_extension,
shell_quote
shell_quote,
)
@ -109,7 +110,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
self._downloader.to_screen('[debug] AtomicParsley command line: %s' % shell_quote(cmd))
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
stdout, stderr = process_communicate_or_kill(p)
if p.returncode != 0:
msg = stderr.decode('utf-8', 'replace').strip()

View File

@ -16,6 +16,7 @@ from ..utils import (
is_outdated_version,
PostProcessingError,
prepend_extension,
process_communicate_or_kill,
shell_quote,
subtitles_filename,
dfxp2srt,
@ -180,7 +181,7 @@ class FFmpegPostProcessor(PostProcessor):
handle = subprocess.Popen(
cmd, stderr=subprocess.PIPE,
stdout=subprocess.PIPE, stdin=subprocess.PIPE)
stdout_data, stderr_data = handle.communicate()
stdout_data, stderr_data = process_communicate_or_kill(handle)
expected_ret = 0 if self.probe_available else 1
if handle.wait() != expected_ret:
return None
@ -228,7 +229,7 @@ class FFmpegPostProcessor(PostProcessor):
if self._downloader.params.get('verbose', False):
self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd))
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
stdout, stderr = p.communicate()
stdout, stderr = process_communicate_or_kill(p)
if p.returncode != 0:
stderr = stderr.decode('utf-8', 'replace')
msgs = stderr.strip().split('\n')

View File

@ -40,6 +40,8 @@ class MetadataFromTitlePP(PostProcessor):
% self._titleformat)
return [], info
for attribute, value in match.groupdict().items():
if value is None:
continue
info[attribute] = value
self._downloader.to_screen(
'[fromtitle] parsed %s: %s'

View File

@ -33,6 +33,7 @@ import sys
import tempfile
import time
import traceback
import unicodedata
import xml.etree.ElementTree
import zlib
@ -42,6 +43,7 @@ from .compat import (
compat_HTTPError,
compat_basestring,
compat_chr,
compat_collections_abc,
compat_cookiejar,
compat_ctypes_WINFUNCTYPE,
compat_etree_fromstring,
@ -1684,6 +1686,7 @@ USER_AGENTS = {
NO_DEFAULT = object()
IDENTITY = lambda x: x
ENGLISH_MONTH_NAMES = [
'January', 'February', 'March', 'April', 'May', 'June',
@ -1696,6 +1699,17 @@ MONTH_NAMES = {
'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
}
# Timezone names for RFC2822 obs-zone
# From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
TIMEZONE_NAMES = {
'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
'AST': -4, 'ADT': -3, # Atlantic (used in Canada)
'EST': -5, 'EDT': -4, # Eastern
'CST': -6, 'CDT': -5, # Central
'MST': -7, 'MDT': -6, # Mountain
'PST': -8, 'PDT': -7 # Pacific
}
KNOWN_EXTENSIONS = (
'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
'flv', 'f4v', 'f4a', 'f4b',
@ -1735,12 +1749,17 @@ DATE_FORMATS = (
'%b %dth %Y %I:%M',
'%Y %m %d',
'%Y-%m-%d',
'%Y.%m.%d.',
'%Y/%m/%d',
'%Y/%m/%d %H:%M',
'%Y/%m/%d %H:%M:%S',
'%Y%m%d%H%M',
'%Y%m%d%H%M%S',
'%Y%m%d',
'%Y-%m-%d %H:%M',
'%Y-%m-%d %H:%M:%S',
'%Y-%m-%d %H:%M:%S.%f',
'%Y-%m-%d %H:%M:%S:%f',
'%d.%m.%Y %H:%M',
'%d.%m.%Y %H.%M',
'%Y-%m-%dT%H:%M:%SZ',
@ -1753,6 +1772,7 @@ DATE_FORMATS = (
'%b %d %Y at %H:%M:%S',
'%B %d %Y at %H:%M',
'%B %d %Y at %H:%M:%S',
'%H:%M %d-%b-%Y',
)
DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
@ -1763,6 +1783,7 @@ DATE_FORMATS_DAY_FIRST.extend([
'%d/%m/%Y',
'%d/%m/%y',
'%d/%m/%Y %H:%M:%S',
'%d-%m-%Y %H:%M',
])
DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
@ -2100,6 +2121,9 @@ def sanitize_filename(s, restricted=False, is_id=False):
return '_'
return char
# Replace look-alike Unicode glyphs
if restricted and not is_id:
s = unicodedata.normalize('NFKC', s)
# Handle timestamps
s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
result = ''.join(map(replace_insane, s))
@ -2212,6 +2236,15 @@ def unescapeHTML(s):
r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
def process_communicate_or_kill(p, *args, **kwargs):
try:
return p.communicate(*args, **kwargs)
except BaseException: # Including KeyboardInterrupt
p.kill()
p.wait()
raise
def get_subprocess_encoding():
if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
# For subprocess calls, encode with locale encoding
@ -2957,10 +2990,22 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
def extract_timezone(date_str):
m = re.search(
r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
date_str)
r'''(?x)
^.{8,}? # >=8 char non-TZ prefix, if present
(?P<tz>Z| # just the UTC Z, or
(?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
(?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
[ ]? # optional space
(?P<sign>\+|-) # +/-
(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
$)
''', date_str)
if not m:
timezone = datetime.timedelta()
m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
if timezone is not None:
date_str = date_str[:-len(m.group('tz'))]
timezone = datetime.timedelta(hours=timezone or 0)
else:
date_str = date_str[:-len(m.group('tz'))]
if not m.group('sign'):
@ -3028,7 +3073,8 @@ def unified_timestamp(date_str, day_first=True):
if date_str is None:
return None
date_str = re.sub(r'[,|]', '', date_str)
date_str = re.sub(r'\s+', ' ', re.sub(
r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
timezone, date_str = extract_timezone(date_str)
@ -3054,7 +3100,7 @@ def unified_timestamp(date_str, day_first=True):
pass
timetuple = email.utils.parsedate_tz(date_str)
if timetuple:
return calendar.timegm(timetuple) + pm_delta * 3600
return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
def determine_ext(url, default_ext='unknown_video'):
@ -3664,13 +3710,11 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
if get_attr:
if v is not None:
v = getattr(v, get_attr, None)
if v == '':
v = None
if v is None:
if v in (None, ''):
return default
try:
return int(v) * invscale // scale
except (ValueError, TypeError):
except (ValueError, TypeError, OverflowError):
return default
@ -3788,7 +3832,8 @@ def check_executable(exe, args=[]):
""" Checks if the given binary is installed somewhere in PATH, and returns its name.
args can be a list of arguments for a short output (like -version) """
try:
subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
process_communicate_or_kill(subprocess.Popen(
[exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
except OSError:
return False
return exe
@ -3802,10 +3847,10 @@ def get_exe_version(exe, args=['--version'],
# STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
# SIGTTOU if youtube-dl is run in the background.
# See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
out, _ = subprocess.Popen(
out, _ = process_communicate_or_kill(subprocess.Popen(
[encodeArgument(exe)] + args,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
except OSError:
return False
if isinstance(out, bytes): # Python 2.x
@ -3824,6 +3869,105 @@ def detect_exe_version(output, version_re=None, unrecognized='present'):
return unrecognized
class LazyList(compat_collections_abc.Sequence):
"""Lazy immutable list from an iterable
Note that slices of a LazyList are lists and not LazyList"""
class IndexError(IndexError):
def __init__(self, cause=None):
if cause:
# reproduce `raise from`
self.__cause__ = cause
super(IndexError, self).__init__()
def __init__(self, iterable, **kwargs):
# kwarg-only
reverse = kwargs.get('reverse', False)
_cache = kwargs.get('_cache')
self._iterable = iter(iterable)
self._cache = [] if _cache is None else _cache
self._reversed = reverse
def __iter__(self):
if self._reversed:
# We need to consume the entire iterable to iterate in reverse
for item in self.exhaust():
yield item
return
for item in self._cache:
yield item
for item in self._iterable:
self._cache.append(item)
yield item
def _exhaust(self):
self._cache.extend(self._iterable)
self._iterable = [] # Discard the emptied iterable to make it pickle-able
return self._cache
def exhaust(self):
"""Evaluate the entire iterable"""
return self._exhaust()[::-1 if self._reversed else 1]
@staticmethod
def _reverse_index(x):
return None if x is None else ~x
def __getitem__(self, idx):
if isinstance(idx, slice):
if self._reversed:
idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
start, stop, step = idx.start, idx.stop, idx.step or 1
elif isinstance(idx, int):
if self._reversed:
idx = self._reverse_index(idx)
start, stop, step = idx, idx, 0
else:
raise TypeError('indices must be integers or slices')
if ((start or 0) < 0 or (stop or 0) < 0
or (start is None and step < 0)
or (stop is None and step > 0)):
# We need to consume the entire iterable to be able to slice from the end
# Obviously, never use this with infinite iterables
self._exhaust()
try:
return self._cache[idx]
except IndexError as e:
raise self.IndexError(e)
n = max(start or 0, stop or 0) - len(self._cache) + 1
if n > 0:
self._cache.extend(itertools.islice(self._iterable, n))
try:
return self._cache[idx]
except IndexError as e:
raise self.IndexError(e)
def __bool__(self):
try:
self[-1] if self._reversed else self[0]
except self.IndexError:
return False
return True
def __len__(self):
self._exhaust()
return len(self._cache)
def __reversed__(self):
return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
def __copy__(self):
return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
def __repr__(self):
# repr and str should mimic a list. So we exhaust the iterable
return repr(self.exhaust())
def __str__(self):
return repr(self.exhaust())
class PagedList(object):
def __len__(self):
# This is only useful for tests
@ -3931,7 +4075,8 @@ def escape_rfc3986(s):
"""Escape non-ASCII characters as suggested by RFC 3986"""
if sys.version_info < (3, 0) and isinstance(s, compat_str):
s = s.encode('utf-8')
return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
# ensure unicode: after quoting, it can always be converted
return compat_str(compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]"))
def escape_url(url):
@ -4048,6 +4193,10 @@ def multipart_encode(data, boundary=None):
return out, content_type
def variadic(x, allowed_types=(compat_str, bytes, dict)):
return x if isinstance(x, compat_collections_abc.Iterable) and not isinstance(x, allowed_types) else (x,)
def dict_get(d, key_or_keys, default=None, skip_false_values=True):
if isinstance(key_or_keys, (list, tuple)):
for key in key_or_keys:
@ -4058,6 +4207,23 @@ def dict_get(d, key_or_keys, default=None, skip_false_values=True):
return d.get(key_or_keys, default)
def try_call(*funcs, **kwargs):
# parameter defaults
expected_type = kwargs.get('expected_type')
fargs = kwargs.get('args', [])
fkwargs = kwargs.get('kwargs', {})
for f in funcs:
try:
val = f(*fargs, **fkwargs)
except (AttributeError, KeyError, TypeError, IndexError, ZeroDivisionError):
pass
else:
if expected_type is None or isinstance(val, expected_type):
return val
def try_get(src, getter, expected_type=None):
if not isinstance(getter, (list, tuple)):
getter = [getter]
@ -5744,7 +5910,7 @@ def write_xattr(path, key, value):
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
except EnvironmentError as e:
raise XAttrMetadataError(e.errno, e.strerror)
stdout, stderr = p.communicate()
stdout, stderr = process_communicate_or_kill(p)
stderr = stderr.decode('utf-8', 'replace')
if p.returncode != 0:
raise XAttrMetadataError(p.returncode, stderr)
@ -5791,3 +5957,220 @@ def clean_podcast_url(url):
st\.fm # https://podsights.com/docs/
)/e
)/''', '', url)
def traverse_obj(obj, *paths, **kwargs):
"""
Safely traverse nested `dict`s and `Sequence`s
>>> obj = [{}, {"key": "value"}]
>>> traverse_obj(obj, (1, "key"))
"value"
Each of the provided `paths` is tested and the first producing a valid result will be returned.
The next path will also be tested if the path branched but no results could be found.
Supported values for traversal are `Mapping`, `Sequence` and `re.Match`.
A value of None is treated as the absence of a value.
The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
The keys in the path can be one of:
- `None`: Return the current object.
- `str`/`int`: Return `obj[key]`. For `re.Match, return `obj.group(key)`.
- `slice`: Branch out and return all values in `obj[key]`.
- `Ellipsis`: Branch out and return a list of all values.
- `tuple`/`list`: Branch out and return a list of all matching values.
Read as: `[traverse_obj(obj, branch) for branch in branches]`.
- `function`: Branch out and return values filtered by the function.
Read as: `[value for key, value in obj if function(key, value)]`.
For `Sequence`s, `key` is the index of the value.
- `dict` Transform the current object and return a matching dict.
Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
`tuple`, `list`, and `dict` all support nested paths and branches.
@params paths Paths which to traverse by.
Keyword arguments:
@param default Value to return if the paths do not match.
@param expected_type If a `type`, only accept final values of this type.
If any other callable, try to call the function on each result.
@param get_all If `False`, return the first matching result, otherwise all matching ones.
@param casesense If `False`, consider string dictionary keys as case insensitive.
The following are only meant to be used by YoutubeDL.prepare_outtmpl and are not part of the API
@param _is_user_input Whether the keys are generated from user input.
If `True` strings get converted to `int`/`slice` if needed.
@param _traverse_string Whether to traverse into objects as strings.
If `True`, any non-compatible object will first be
converted into a string and then traversed into.
@returns The result of the object traversal.
If successful, `get_all=True`, and the path branches at least once,
then a list of results is returned instead.
A list is always returned if the last path branches and no `default` is given.
"""
# parameter defaults
default = kwargs.get('default', NO_DEFAULT)
expected_type = kwargs.get('expected_type')
get_all = kwargs.get('get_all', True)
casesense = kwargs.get('casesense', True)
_is_user_input = kwargs.get('_is_user_input', False)
_traverse_string = kwargs.get('_traverse_string', False)
# instant compat
str = compat_str
is_sequence = lambda x: isinstance(x, compat_collections_abc.Sequence) and not isinstance(x, (str, bytes))
# stand-in until compat_re_Match is added
compat_re_Match = type(re.match('a', 'a'))
# stand-in until casefold.py is added
try:
''.casefold()
compat_casefold = lambda s: s.casefold()
except AttributeError:
compat_casefold = lambda s: s.lower()
casefold = lambda k: compat_casefold(k) if isinstance(k, str) else k
if isinstance(expected_type, type):
type_test = lambda val: val if isinstance(val, expected_type) else None
else:
type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
def from_iterable(iterables):
# chain.from_iterable(['ABC', 'DEF']) --> A B C D E F
for it in iterables:
for item in it:
yield item
def apply_key(key, obj):
if obj is None:
return
elif key is None:
yield obj
elif isinstance(key, (list, tuple)):
for branch in key:
_, result = apply_path(obj, branch)
for item in result:
yield item
elif key is Ellipsis:
result = []
if isinstance(obj, compat_collections_abc.Mapping):
result = obj.values()
elif is_sequence(obj):
result = obj
elif isinstance(obj, compat_re_Match):
result = obj.groups()
elif _traverse_string:
result = str(obj)
for item in result:
yield item
elif callable(key):
if is_sequence(obj):
iter_obj = enumerate(obj)
elif isinstance(obj, compat_collections_abc.Mapping):
iter_obj = obj.items()
elif isinstance(obj, compat_re_Match):
iter_obj = enumerate(itertools.chain([obj.group()], obj.groups()))
elif _traverse_string:
iter_obj = enumerate(str(obj))
else:
return
for item in (v for k, v in iter_obj if try_call(key, args=(k, v))):
yield item
elif isinstance(key, dict):
iter_obj = ((k, _traverse_obj(obj, v)) for k, v in key.items())
yield dict((k, v if v is not None else default) for k, v in iter_obj
if v is not None or default is not NO_DEFAULT)
elif isinstance(obj, compat_collections_abc.Mapping):
yield (obj.get(key) if casesense or (key in obj)
else next((v for k, v in obj.items() if casefold(k) == key), None))
elif isinstance(obj, compat_re_Match):
if isinstance(key, int) or casesense:
try:
yield obj.group(key)
return
except IndexError:
pass
if not isinstance(key, str):
return
yield next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
else:
if _is_user_input:
key = (int_or_none(key) if ':' not in key
else slice(*map(int_or_none, key.split(':'))))
if not isinstance(key, (int, slice)):
return
if not is_sequence(obj):
if not _traverse_string:
return
obj = str(obj)
try:
yield obj[key]
except IndexError:
pass
def apply_path(start_obj, path):
objs = (start_obj,)
has_branched = False
for key in variadic(path):
if _is_user_input and key == ':':
key = Ellipsis
if not casesense and isinstance(key, str):
key = compat_casefold(key)
if key is Ellipsis or isinstance(key, (list, tuple)) or callable(key):
has_branched = True
key_func = functools.partial(apply_key, key)
objs = from_iterable(map(key_func, objs))
return has_branched, objs
def _traverse_obj(obj, path, use_list=True):
has_branched, results = apply_path(obj, path)
results = LazyList(x for x in map(type_test, results) if x is not None)
if get_all and has_branched:
return results.exhaust() if results or use_list else None
return results[0] if results else None
for index, path in enumerate(paths, 1):
use_list = default is NO_DEFAULT and index == len(paths)
result = _traverse_obj(obj, path, use_list)
if result is not None:
return result
return None if default is NO_DEFAULT else default
def get_first(obj, keys, **kwargs):
return traverse_obj(obj, (Ellipsis,) + tuple(variadic(keys)), get_all=False, **kwargs)
def join_nonempty(*values, **kwargs):
# parameter defaults
delim = kwargs.get('delim', '-')
from_dict = kwargs.get('from_dict')
if from_dict is not None:
values = (traverse_obj(from_dict, variadic(v)) for v in values)
return delim.join(map(compat_str, filter(None, values)))