2013-11-28 04:47:20 +00:00
# coding: utf-8
2014-02-24 06:47:47 +00:00
from __future__ import unicode_literals
2013-11-28 04:47:20 +00:00
2013-06-23 18:24:07 +00:00
import re
from . common import InfoExtractor
from . . utils import (
2013-12-25 14:33:09 +00:00
int_or_none ,
2013-11-25 02:12:26 +00:00
unified_strdate ,
2013-06-23 18:24:07 +00:00
)
2013-09-28 13:43:34 +00:00
2013-06-23 18:24:07 +00:00
class ZDFIE ( InfoExtractor ) :
2013-11-28 04:47:20 +00:00
_VALID_URL = r ' ^https?://www \ .zdf \ .de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<video_id>[0-9]+)(?:/[^/?]+)?(?: \ ?.*)? '
_TEST = {
2014-02-24 06:47:47 +00:00
' url ' : ' http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt ' ,
' info_dict ' : {
' id ' : ' 2037704 ' ,
' ext ' : ' webm ' ,
' title ' : ' ZDFspezial - Ende des Machtpokers ' ,
' description ' : ' Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial " Ende des Machtpokers - Große Koalition für Deutschland " . ' ,
' duration ' : 1022 ,
' uploader ' : ' spezial ' ,
' uploader_id ' : ' 225948 ' ,
' upload_date ' : ' 20131127 ' ,
2013-11-28 04:47:20 +00:00
} ,
2014-02-24 06:47:47 +00:00
' skip ' : ' Videos on ZDF.de are depublicised in short order ' ,
2013-11-28 04:47:20 +00:00
}
2013-06-23 18:24:07 +00:00
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
video_id = mobj . group ( ' video_id ' )
2014-02-24 06:47:47 +00:00
xml_url = ' http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id= %s ' % video_id
2013-11-28 04:47:50 +00:00
doc = self . _download_xml (
xml_url , video_id ,
2014-02-24 06:47:47 +00:00
note = ' Downloading video info ' ,
errnote = ' Failed to download video info ' )
2013-09-28 13:43:34 +00:00
2013-11-25 02:12:26 +00:00
title = doc . find ( ' .//information/title ' ) . text
description = doc . find ( ' .//information/detail ' ) . text
2014-02-24 06:47:47 +00:00
duration = int ( doc . find ( ' .//details/lengthSec ' ) . text )
2013-11-25 02:12:26 +00:00
uploader_node = doc . find ( ' .//details/originChannelTitle ' )
uploader = None if uploader_node is None else uploader_node . text
2014-02-24 06:47:47 +00:00
uploader_id_node = doc . find ( ' .//details/originChannelId ' )
uploader_id = None if uploader_id_node is None else uploader_id_node . text
2013-11-25 02:12:26 +00:00
upload_date = unified_strdate ( doc . find ( ' .//details/airtime ' ) . text )
2013-06-23 18:24:07 +00:00
2013-11-25 02:12:26 +00:00
def xml_to_format ( fnode ) :
video_url = fnode . find ( ' url ' ) . text
2014-02-24 06:47:47 +00:00
is_available = ' http://www.metafilegenerator ' not in video_url
2013-11-25 02:12:26 +00:00
format_id = fnode . attrib [ ' basetype ' ]
format_m = re . match ( r ''' (?x)
( ? P < vcodec > [ ^ _ ] + ) _ ( ? P < acodec > [ ^ _ ] + ) _ ( ? P < container > [ ^ _ ] + ) _
( ? P < proto > [ ^ _ ] + ) _ ( ? P < index > [ ^ _ ] + ) _ ( ? P < indexproto > [ ^ _ ] + )
''' , format_id)
2013-11-25 02:28:55 +00:00
ext = format_m . group ( ' container ' )
2013-12-25 14:33:09 +00:00
proto = format_m . group ( ' proto ' ) . lower ( )
2013-09-28 13:43:34 +00:00
2013-11-25 02:12:26 +00:00
quality = fnode . find ( ' ./quality ' ) . text
abr = int ( fnode . find ( ' ./audioBitrate ' ) . text ) / / 1000
2014-02-24 06:47:47 +00:00
vbr_node = fnode . find ( ' ./videoBitrate ' )
vbr = None if vbr_node is None else int ( v br_node . text ) / / 1000
2013-11-25 02:28:55 +00:00
2014-02-24 06:47:47 +00:00
width_node = fnode . find ( ' ./width ' )
width = None if width_node is None else int_or_none ( width_node . text )
height_node = fnode . find ( ' ./height ' )
height = None if height_node is None else int_or_none ( height_node . text )
format_note = ' '
2013-11-25 02:28:55 +00:00
if not format_note :
format_note = None
2013-06-23 18:24:07 +00:00
2013-11-25 02:12:26 +00:00
return {
2014-02-24 06:47:47 +00:00
' format_id ' : format_id + ' - ' + quality ,
2013-11-25 02:12:26 +00:00
' url ' : video_url ,
2013-11-25 02:28:55 +00:00
' ext ' : ext ,
2013-11-25 02:12:26 +00:00
' acodec ' : format_m . group ( ' acodec ' ) ,
' vcodec ' : format_m . group ( ' vcodec ' ) ,
' abr ' : abr ,
' vbr ' : vbr ,
2014-02-24 06:47:47 +00:00
' width ' : width ,
' height ' : height ,
2013-12-25 14:33:09 +00:00
' filesize ' : int_or_none ( fnode . find ( ' ./filesize ' ) . text ) ,
2013-11-25 02:28:55 +00:00
' format_note ' : format_note ,
2013-12-25 14:33:09 +00:00
' protocol ' : proto ,
2013-11-25 02:28:55 +00:00
' _available ' : is_available ,
2013-11-25 02:12:26 +00:00
}
2013-06-23 18:24:07 +00:00
2013-11-25 02:12:26 +00:00
format_nodes = doc . findall ( ' .//formitaeten/formitaet ' )
2013-12-24 22:32:04 +00:00
formats = list ( filter (
lambda f : f [ ' _available ' ] ,
map ( xml_to_format , format_nodes ) ) )
self . _sort_formats ( formats )
2013-06-23 18:24:07 +00:00
2013-09-28 13:43:34 +00:00
return {
' id ' : video_id ,
' title ' : title ,
2013-11-25 02:12:26 +00:00
' description ' : description ,
' duration ' : duration ,
2014-02-24 06:47:47 +00:00
' uploader ' : uploader ,
' uploader_id ' : uploader_id ,
2013-11-25 02:12:26 +00:00
' upload_date ' : upload_date ,
2014-02-24 06:47:47 +00:00
' formats ' : formats ,
}