[unige] Add new extractor

This enables to extract videos posted to the University of Geneva mediaserver,
located at https://mediaserver.unige.ch. It supports:
- Single courses/videos
- Whole course "playlists" (accessible by clicking on the course's
  title)

Signed-off-by: Frank Villaro-Dixon <frank@villaro-dixon.eu>
This commit is contained in:
Frank Villaro-Dixon 2023-10-12 23:17:29 +02:00
parent 00ef748cc0
commit f677ef143c
2 changed files with 103 additions and 0 deletions

View File

@ -1427,6 +1427,10 @@ from .ufctv import (
UFCTVIE,
UFCArabiaIE,
)
from .unige import (
UnigeIE,
UnigePlaylistIE,
)
from .uktvplay import UKTVPlayIE
from .digiteka import DigitekaIE
from .dlive import (

View File

@ -0,0 +1,99 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
urlencode_postdata,
)
from youtube_dl.compat import (
compat_HTTPError,
)
class UnigeIE(InfoExtractor):
_VALID_URL = r'https://mediaserver.unige.ch/play/(?P<id>\d+)'
_TESTS = [{
'url': 'https://mediaserver.unige.ch/play/196613',
'md5': 'xxxx',
'info_dict': {
'id': '196613',
'display_id': '196613',
'ext': 'mp4',
},
}, {
'url': 'https://mediaserver.unige.ch/proxy/196613/VN3-2569-2023-2024-09-19.mp4',
'only_matching': True,
}]
def _login(self, video_id):
# Login credentials are per video group
username, password = self._get_login_info(netrc_machine=f'unige-mediaserver-{video_id}')
if not username or not password:
self.raise_login_required('You need a username/pwd to access this video')
try:
secure_wp = f'https://mediaserver.unige.ch/proxy/{video_id}/secure.php?view=play&id={video_id}'
self._download_webpage(
secure_wp, None, 'Logging in',
data=urlencode_postdata({
'httpd_username': username,
'httpd_password': password,
}), headers={
'Referer': secure_wp,
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
})
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
raise ExtractorError(
'Unable to login: incorrect username and/or password',
expected=True)
raise
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
try:
# This dumb download only checks if we need to login, as authentication
# is unique (and sometimes optional) for each video
secure_wp = f'https://mediaserver.unige.ch/proxy/{video_id}/secure.php?view=play&id={video_id}'
self._download_webpage(secure_wp, f'secure_{video_id}')
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
self._login(video_id)
else:
# The video doesn't require login
pass
video_title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage.replace('\n', ''), 'unige')
course_title = self._html_search_regex(r'<a href="/collection/[-\w+]+">(?P<course>.*)</a></div>', webpage, 'unige')
course_id = self._html_search_regex(r'<a href="/collection/(?P<courseid>[-\w+]+)">', webpage, 'unige')
video_url = self._search_regex(
r'<source src="([^"]+)"', webpage, 'video URL')
return {
'id': video_id,
'title': video_title,
'url': video_url,
'channel': course_title,
'channel_id': course_id,
}
class UnigePlaylistIE(InfoExtractor):
_VALID_URL = r'https://mediaserver.unige.ch/collection/(?P<id>[-\w+]+)'
def _real_extract(self, url):
collection_id = self._match_id(url)
rss = self._download_xml(url + '.rss', collection_id)
entries = [self.url_result(video.text, 'Unige')
for video in rss.findall('./channel/item/link')]
title_text = rss.find('./channel/title').text
return self.playlist_result(entries, collection_id, title_text)