Compare commits

..

18 Commits

Author SHA1 Message Date
Francisco Pombal
9ae7df9a22 [ie/tviplayer] Fix extractor (#16527)
Closes #13132
Authored by: FranciscoPombal
2026-06-18 22:05:08 +00:00
doe1080
2b27a203f7 [ie/periscope] Improve metadata extraction (#16084)
Authored by: doe1080
2026-06-18 04:58:54 +00:00
doe1080
4af5541bb5 [ie/niconico] Support shorts (#16992)
Authored by: doe1080
2026-06-18 02:32:03 +00:00
doe1080
c4f94545c9 [ie/niconico] Fix error detection (#16991)
Closes #16844
Authored by: doe1080
2026-06-17 22:53:29 +00:00
InvalidUsernameException
cfee151fcd [ie/ZDF] Detect livestreams (#16954)
Authored by: InvalidUsernameException
2026-06-17 22:34:00 +00:00
BOplaid
24aecad5df [ie/telewebion] Fix extractor (#16986)
Closes #16826
Authored by: BOplaid
2026-06-17 22:32:41 +00:00
0xvd
01f4f06fdd [ie/mxplayer] Fix extractors (#16988)
Closes #16987
Authored by: 0xvd
2026-06-17 21:14:06 +00:00
bashonly
ad6b5f4b35 [ie/youtube:tab] Fix flat playlist metadata extraction (#16965)
Closes #13879, Closes #16665, Closes #16952
Authored by: bashonly
2026-06-16 02:00:53 +00:00
Masterjun
8902f6ba8c [fd/mhtml] Fix storyboard content-length calculation (#13998)
Authored by: Masterjun3
2026-06-15 22:35:04 +02:00
bashonly
b23046bbc8 [ie/youtube:tab] Fix pagination (#16948)
* Support `continuationViewModel` continuation tokens

Closes #16692, Closes #16943
Authored by: bashonly
2026-06-13 23:06:23 +00:00
bashonly
9055188250 [ie/bandcamp:weekly] Fix metadata extraction (#16941)
Fix bug in a541df1ea5

Authored by: bashonly
2026-06-12 22:55:22 +00:00
doe1080
b05b408d10 [utils] Deprecate make_dir in favor of make_parent_dirs (#16931)
Authored by: doe1080
2026-06-12 22:53:58 +00:00
selfhoster1312
a2483524fb [ie/peertube] Support password-protected videos (#16873)
Authored by: selfhoster1312
2026-06-12 17:22:08 +00:00
0xvd
707537a039 [ie/patreon] Support new URL format (#16926)
Closes #16923
Authored by: 0xvd
2026-06-12 09:14:10 +00:00
Deltadroid
7937a139cf [fd/external] curl: Support development versions (#16922)
Authored by: syphyr
2026-06-12 09:00:06 +00:00
doe1080
cb309b3293 [utils] HTTPHeaderDict: Fix __ior__ (#16930)
Authored by: doe1080
2026-06-11 16:43:24 +02:00
bashonly
e47691215f Fix allow-unsafe-ext compat option (#16920)
Fix bug in e578e265f7

Closes #16919
Authored by: bashonly
2026-06-10 23:00:05 +00:00
bashonly
a541df1ea5 [ie/bandcamp:weekly] Fix extractor (#16925)
Closes #16924
Authored by: bashonly
2026-06-10 22:34:16 +00:00
22 changed files with 455 additions and 268 deletions

View File

@@ -395,6 +395,7 @@ banned-from = [
"yt_dlp.utils.bytes_to_intlist".msg = "Use `list` instead."
"yt_dlp.utils.intlist_to_bytes".msg = "Use `bytes` instead."
"yt_dlp.utils.jwt_encode_hs256".msg = "Use `yt_dlp.utils.jwt_encode` instead."
"yt_dlp.utils.make_dir".msg = "Use `yt_dlp.utils.make_parent_dirs` instead."
"yt_dlp.utils.decodeArgument".msg = "Do not use"
"yt_dlp.utils.decodeFilename".msg = "Do not use"
"yt_dlp.utils.encodeFilename".msg = "Do not use"

View File

@@ -327,6 +327,12 @@ class TestUtil(unittest.TestCase):
with self.assertRaises(_UnsafeExtensionError):
prepend_extension('abc.unexpected_ext', ext, 'ext')
# Test allow-unsafe-ext compat option
_UnsafeExtensionError._enabled = False
self.assertEqual(prepend_extension('abc.ext', 'un/safe'), 'abc.un/safe.ext')
# Re-enable sanitization for other tests
_UnsafeExtensionError._enabled = True
def test_replace_extension(self):
self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp')
self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp')
@@ -345,6 +351,12 @@ class TestUtil(unittest.TestCase):
with self.assertRaises(_UnsafeExtensionError):
replace_extension('abc.unexpected_ext', ext, 'ext')
# Test allow-unsafe-ext compat option
_UnsafeExtensionError._enabled = False
self.assertEqual(replace_extension('abc.ext', 'bin'), 'abc.bin')
# Re-enable sanitization for other tests
_UnsafeExtensionError._enabled = True
def test_subtitles_filename(self):
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt')
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt')
@@ -2160,6 +2172,10 @@ Line 1
headers6 = HTTPHeaderDict(a=1, b=2)
self.assertEqual(pickle.loads(pickle.dumps(headers6)), headers6)
headers7 = HTTPHeaderDict()
headers7 |= {'X-dlp': 'data'}
self.assertEqual(headers7.sensitive(), {'X-dlp': 'data'})
def test_extract_basic_auth(self):
assert extract_basic_auth('http://:foo.bar') == ('http://:foo.bar', None)
assert extract_basic_auth('http://foo.bar') == ('http://foo.bar', None)

View File

@@ -139,7 +139,7 @@ from .utils import (
join_nonempty,
locked_file,
make_archive_id,
make_dir,
make_parent_dirs,
number_of_digits,
orderedSet,
orderedSet_from_options,
@@ -2036,7 +2036,12 @@ class YoutubeDL:
raise Exception(f'Invalid result type: {result_type}')
def _ensure_dir_exists(self, path):
return make_dir(path, self.report_error)
try:
make_parent_dirs(path)
return True
except OSError as e:
self.report_error(f'Unable to create directory: {e}')
return False
@staticmethod
def _playlist_infodict(ie_result, strict=False, **kwargs):

View File

@@ -619,7 +619,7 @@ def validate_options(opts):
warnings.append(
'Using allow-unsafe-ext opens you up to potential attacks. '
'Use with great care!')
_UnsafeExtensionError.sanitize_extension = lambda x, prepend=False: x
_UnsafeExtensionError._enabled = False
return warnings, deprecation_warnings

View File

@@ -209,7 +209,7 @@ class CurlFD(ExternalFD):
return False
cls.exe = path
cls._curl_version = version_tuple(parts[1])
cls._curl_version = version_tuple(parts[1], lenient=True)
return path
def _make_cmd(self, tmpfilename, info_dict):

View File

@@ -119,7 +119,7 @@ body > figure > img {
fragments=fragments,
frag_boundary=frag_boundary,
title=title,
)
).encode()
ctx['dest_stream'].write((
'MIME-Version: 1.0\r\n'
@@ -135,7 +135,7 @@ body > figure > img {
'Content-Type: text/html; charset=utf-8\r\n'
f'Content-Length: {len(stub)}\r\n'
'\r\n'
f'{stub}\r\n').encode())
).encode() + stub + b'\r\n')
extra_state['header_written'] = True
for i, fragment in enumerate(fragments):

View File

@@ -421,6 +421,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
'id': '224',
'ext': 'mp3',
'title': 'Bandcamp Weekly, 2017-04-04',
'episode': 'Magic Moments',
'description': 'md5:5d48150916e8e02d030623a48512c874',
'thumbnail': 'https://f4.bcbits.com/img/9982549_0.jpg',
'series': 'Bandcamp Weekly',
@@ -440,22 +441,23 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
def _real_extract(self, url):
show_id = self._match_id(url)
show_data = self._download_json(
'https://bandcamp.com/api/bcradio_api/1/get_show',
'https://bandcamp.com/api/player/2/player_data_web',
show_id, 'Downloading radio show JSON',
data=json.dumps({'id': show_id}).encode(),
headers={'Content-Type': 'application/json'})
data=json.dumps({'item_id': int(show_id), 'item_type': 'radio'}).encode(),
headers={'Content-Type': 'application/json'})['tracklist']
audio_data = show_data['compiledTrack']
stream_url = audio_data['streamUrl']
format_id = traverse_obj(stream_url, ({parse_qs}, 'enc', -1))
encoding, _, bitrate_str = (format_id or '').partition('-')
series_title = show_data.get('title')
series_title = show_data.get('subtitle')
release_timestamp = unified_timestamp(show_data.get('date'))
return {
'id': show_id,
'episode_id': show_id,
'episode': show_data.get('title'),
'title': join_nonempty(series_title, strftime_or_none(release_timestamp, '%Y-%m-%d'), delim=', '),
'series': series_title,
'thumbnail': format_field(show_data, 'imageId', 'https://f4.bcbits.com/img/%s_0.jpg', default=None),

View File

@@ -16,12 +16,12 @@ class MxplayerIE(InfoExtractor):
'display_id': 'episode-1-online',
'ext': 'mp4',
'title': 'Episode 1',
'description': 'md5:62ed43eb9fec5efde5cf3bd1040b7670',
'description': 'md5:e90dc55a393f557049284eb36efdb773',
'season_number': 1,
'episode_number': 1,
'duration': 2451,
'season': 'Season 1',
'series': 'My Girlfriend Is An Alien (Hindi Dubbed)',
'series': 'My Girlfriend Is An Alien',
'episode': 'Episode 1',
},
'params': {
@@ -74,7 +74,7 @@ class MxplayerIE(InfoExtractor):
'episode_number': 3,
'duration': 2568,
'season': 'Season 1',
'series': 'Aashram',
'series': 'Ek Badnaam Aashram',
'episode': 'Episode 3',
},
'params': {
@@ -128,16 +128,17 @@ class MxplayerIE(InfoExtractor):
'format': 'bv',
'skip_download': True,
},
'skip': 'video removed',
}, {
'url': 'https://www.mxplayer.in/show/watch-ek-thi-begum-hindi/season-2/game-of-power-online-5e5305c28f1409847cdc4520b6ad77cf',
'info_dict': {
'id': '5e5305c28f1409847cdc4520b6ad77cf',
'display_id': 'game-of-power-online',
'title': 'Game Of Power',
'duration': 1845,
'duration': 1851,
'ext': 'mp4',
'description': 'md5:1d0948d2a5312d7013792d53542407f9',
'series': 'Ek Thi Begum (Hindi)',
'series': 'Ek Thi Begum',
'season': 'Season 2',
'season_number': 2,
'episode': 'Episode 2',
@@ -158,6 +159,7 @@ class MxplayerIE(InfoExtractor):
'description': 'md5:d17bd5c651016c4ed2e6f8a4ace15534',
},
'params': {'skip_download': 'm3u8'},
'skip': 'video removed',
}]
def _real_extract(self, url):
@@ -166,7 +168,7 @@ class MxplayerIE(InfoExtractor):
video_type = 'episode'
data_json = self._download_json(
f'https://api.mxplay.com/v1/web/detail/video?type={video_type}&id={video_id}', display_id)
f'https://api.mxplayer.in/v1/web/detail/video?type={video_type}&id={video_id}', display_id)
formats, subtitles = [], {}
m3u8_url = urljoin('https://llvod.mxplay.com/', traverse_obj(
@@ -208,8 +210,8 @@ class MxplayerShowIE(InfoExtractor):
},
}]
_API_SHOW_URL = 'https://api.mxplay.com/v1/web/detail/tab/tvshowseasons?type=tv_show&id={}&device-density=2&platform=com.mxplay.desktop&content-languages=hi,en'
_API_EPISODES_URL = 'https://api.mxplay.com/v1/web/detail/tab/tvshowepisodes?type=season&id={}&device-density=1&platform=com.mxplay.desktop&content-languages=hi,en&{}'
_API_SHOW_URL = 'https://api.mxplayer.in/v1/web/detail/tab/tvshowseasons?type=tv_show&id={}&device-density=2&platform=com.mxplay.desktop&content-languages=hi,en'
_API_EPISODES_URL = 'https://api.mxplayer.in/v1/web/detail/tab/tvshowepisodes?type=season&id={}&device-density=1&platform=com.mxplay.desktop&content-languages=hi,en&{}'
def _entries(self, show_id):
show_json = self._download_json(

View File

@@ -104,7 +104,7 @@ class NiconicoIE(NiconicoBaseIE):
IE_NAME = 'niconico'
IE_DESC = 'ニコニコ動画'
_VALID_URL = r'https?://(?:(?:embed|sp|www)\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?\d+)'
_VALID_URL = r'https?://(?:(?:embed|sp|www)\.)?nicovideo\.jp/(?:shorts|watch)/(?P<id>(?:[a-z]{2})?\d+)'
_ERROR_MAP = {
'FORBIDDEN': {
'ADMINISTRATOR_DELETE_VIDEO': 'Video unavailable, possibly removed by admins',
@@ -361,6 +361,29 @@ class NiconicoIE(NiconicoBaseIE):
},
'params': {'skip_download': 'm3u8'},
'skip': 'Channel members only; specified continuous membership period required',
}, {
'url': 'https://www.nicovideo.jp/shorts/ss46441082',
'info_dict': {
'id': 'ss46441082',
'ext': 'mp4',
'title': '『超かぐや姫』WEB予告 アクション編 ',
'availability': 'public',
'channel': '『超かぐや姫!』公式',
'channel_id': '141907929',
'comment_count': int,
'description': 'md5:86cd619f675377c7d77ddc13b4dda8bf',
'duration': 15,
'genres': ['アニメ'],
'like_count': int,
'tags': 'mincount:5',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1781600400,
'upload_date': '20260616',
'uploader': '『超かぐや姫!』公式',
'uploader_id': '141907929',
'view_count': int,
},
'params': {'skip_download': 'm3u8'},
}]
def _extract_formats(self, api_data, video_id):
@@ -428,7 +451,7 @@ class NiconicoIE(NiconicoBaseIE):
'actionTrackId': f'AAAAAAAAAA_{round(time_seconds() * 1000)}',
}, expected_status=[400, 404])
api_data = api_resp['data']
api_data = traverse_obj(api_resp, ('data', {dict}))
scheduled_time = traverse_obj(api_data, ('publishScheduledAt', {str}))
status = traverse_obj(api_resp, ('meta', 'status', {int}))
@@ -465,7 +488,7 @@ class NiconicoIE(NiconicoBaseIE):
if not formats and err_msg:
self.raise_login_required(err_msg, metadata_available=True)
thumb_prefs = qualities(['url', 'middleUrl', 'largeUrl', 'player', 'ogp'])
thumb_prefs = qualities(['url', 'middleUrl', 'largeUrl', 'player', 'ogp', 'short'])
return {
'availability': availability,
@@ -482,7 +505,8 @@ class NiconicoIE(NiconicoBaseIE):
'url': url,
**parse_resolution(url, lenient=True),
} for key, url in traverse_obj(api_data, (
'video', 'thumbnail', {dict}), default={}).items()],
'video', 'thumbnail', {dict.items}, lambda _, v: url_or_none(v[1])),
)],
**traverse_obj(api_data, (('channel', 'owner'), any, {
'channel': (('name', 'nickname'), {str}, any),
'channel_id': ('id', {str_or_none}),

View File

@@ -65,8 +65,9 @@ class PatreonBaseIE(InfoExtractor):
class PatreonIE(PatreonBaseIE):
IE_NAME = 'patreon'
_VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|(?:[^/?#]+/)?posts/(?:[\w-]+-)?)(?P<id>\d+)'
_TESTS = [{
# FIXME: Fails due to no description extracted
'url': 'http://www.patreon.com/creation?hid=743933',
'md5': 'e25505eec1053a6e6813b8ed369875cc',
'info_dict': {
@@ -107,17 +108,17 @@ class PatreonIE(PatreonBaseIE):
'id': 'SU4fj_aEMVw',
'ext': 'mp4',
'title': 'I\'m on Patreon!',
'uploader': 'TraciJHines',
'uploader': 'Traci Oden',
'thumbnail': 're:^https?://.*$',
'upload_date': '20150211',
'description': 'md5:8af6425f50bd46fbf29f3db0fc3a8364',
'uploader_id': '@TraciHinesMusic',
'uploader_id': '@TraciOden',
'categories': ['Entertainment'],
'duration': 282,
'view_count': int,
'tags': 'count:39',
'age_limit': 0,
'channel': 'TraciJHines',
'channel': 'Traci Oden',
'channel_url': 'https://www.youtube.com/channel/UCGLim4T2loE5rwCMdpCIPVg',
'live_status': 'not_live',
'like_count': int,
@@ -125,7 +126,7 @@ class PatreonIE(PatreonBaseIE):
'availability': 'public',
'channel_follower_count': int,
'playable_in_embed': True,
'uploader_url': 'https://www.youtube.com/@TraciHinesMusic',
'uploader_url': 'https://www.youtube.com/@TraciOden',
'comment_count': int,
'channel_is_verified': True,
'chapters': 'count:4',
@@ -157,6 +158,7 @@ class PatreonIE(PatreonBaseIE):
},
'skip': 'Patron-only content',
}, {
# FIXME: Fails due to no description extracted
# m3u8 video (https://github.com/yt-dlp/yt-dlp/issues/2277)
'url': 'https://www.patreon.com/posts/video-sketchbook-32452882',
'info_dict': {
@@ -220,6 +222,7 @@ class PatreonIE(PatreonBaseIE):
'channel_id': '2147162',
'uploader_url': 'https://www.patreon.com/yaboyroshi',
},
'skip': 'HTTP Error 401 for m3u8 request; site now requires login to play the video',
}, {
# NSFW vimeo embed URL
'url': 'https://www.patreon.com/posts/4k-spiderman-4k-96414599',
@@ -242,6 +245,7 @@ class PatreonIE(PatreonBaseIE):
},
'params': {'skip_download': 'm3u8'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
'skip': 'Video removed',
}, {
# multiple attachments/embeds
'url': 'https://www.patreon.com/posts/holy-wars-solos-100601977',
@@ -285,6 +289,7 @@ class PatreonIE(PatreonBaseIE):
},
'params': {'getcomments': True},
}, {
# FIXME: Error: No supported media found in this post
# Inlined media in post; uses _extract_from_media_api
'url': 'https://www.patreon.com/posts/scottfalco-146966245',
'info_dict': {
@@ -304,6 +309,26 @@ class PatreonIE(PatreonBaseIE):
'timestamp': 1767061800,
'upload_date': '20251230',
},
}, {
# FIXME: need to extract description
'url': 'https://www.patreon.com/Insanimate/posts/meatcanyon-in-142663524',
'md5': '132332e3bb345f75d8b471242346dee6',
'info_dict': {
'id': '142663524',
'ext': 'mp4',
'title': 'Meatcanyon in Playground',
'uploader': 'Insanimate',
'uploader_id': '2828146',
'uploader_url': 'https://www.patreon.com/Insanimate',
'channel_id': '6260877',
'channel_url': 'https://www.patreon.com/Insanimate',
'channel_follower_count': int,
'comment_count': int,
'like_count': int,
'thumbnail': 're:^https?://.*$',
'timestamp': 1762101034,
'upload_date': '20251102',
},
}]
_RETURN_TYPE = 'video'
_HTTP_HEADERS = {
@@ -357,7 +382,7 @@ class PatreonIE(PatreonBaseIE):
post = self._call_api(
f'posts/{video_id}', video_id, query={
'fields[media]': 'download_url,mimetype,size_bytes,file_name',
'fields[post]': 'comment_count,content,embed,image,like_count,post_file,published_at,title,current_user_can_view',
'fields[post]': 'comment_count,content,content_teaser_text,cleaned_teaser_text,embed,image,like_count,post_file,published_at,title,current_user_can_view',
'fields[user]': 'full_name,url',
'fields[post_tag]': 'value',
'fields[campaign]': 'url,name,patron_count',
@@ -367,7 +392,7 @@ class PatreonIE(PatreonBaseIE):
attributes = post['data']['attributes']
info = traverse_obj(attributes, {
'title': ('title', {str.strip}),
'description': ('content', {clean_html}),
'description': (('content', 'content_teaser_text', 'cleaned_teaser_text'), {clean_html}, any),
'thumbnail': ('image', ('large_url', 'url'), {url_or_none}, any),
'timestamp': ('published_at', {parse_iso8601}),
'like_count': ('like_count', {int_or_none}),

View File

@@ -4,6 +4,7 @@ import re
from .common import InfoExtractor
from ..utils import (
OnDemandPagedList,
filter_dict,
format_field,
int_or_none,
parse_resolution,
@@ -1358,7 +1359,7 @@ class PeerTubeIE(InfoExtractor):
'ext': 'mp4',
'title': 'E2E tests',
'categories': ['Unknown'],
'channel': 'Main chocobozzz channel',
'channel': 'Chocobozzz test channel',
'channel_id': '5187',
'channel_url': 'https://peertube2.cpy.re/video-channels/chocobozzz_channel',
'description': 'md5:67daf92c833c41c95db874e18fcb2786',
@@ -1382,7 +1383,7 @@ class PeerTubeIE(InfoExtractor):
'ext': 'mp4',
'title': 'E2E tests',
'categories': ['Unknown'],
'channel': 'Main chocobozzz channel',
'channel': 'Chocobozzz test channel',
'channel_id': '5187',
'channel_url': 'https://peertube2.cpy.re/video-channels/chocobozzz_channel',
'description': 'md5:67daf92c833c41c95db874e18fcb2786',
@@ -1406,7 +1407,7 @@ class PeerTubeIE(InfoExtractor):
'ext': 'mp4',
'title': 'E2E tests',
'categories': ['Unknown'],
'channel': 'Main chocobozzz channel',
'channel': 'Chocobozzz test channel',
'channel_id': '5187',
'channel_url': 'https://peertube2.cpy.re/video-channels/chocobozzz_channel',
'description': 'md5:67daf92c833c41c95db874e18fcb2786',
@@ -1452,6 +1453,36 @@ class PeerTubeIE(InfoExtractor):
}, {
'url': 'peertube:framatube.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205',
'only_matching': True,
}, {
'url': 'https://videos.john-livingston.fr/w/mna1A6SxZ94cra4hMtjRQm',
'md5': '6a5faad22916e41ba4078ef59c33bc9f',
'info_dict': {
'id': 'mna1A6SxZ94cra4hMtjRQm',
'ext': 'mp4',
'title': 'test yt-dlp',
'description': 'md5:d8556ee790ad9b3fac6f0bb3eb5b67bd',
'thumbnail': r're:https?://videos.john-livingston\.fr/lazy-static/thumbnails/.+\.jpg',
'timestamp': 1780645286,
'upload_date': '20260605',
'uploader': 'John Livingston',
'uploader_id': '5',
'uploader_url': 'https://videos.john-livingston.fr/accounts/john',
'channel': 'john_livingston',
'channel_id': '4',
'channel_url': 'https://videos.john-livingston.fr/video-channels/john_livingston',
'license': 'Unknown',
'duration': 16,
'view_count': int,
'like_count': int,
'dislike_count': int,
'tags': 'count:0',
'categories': ['Unknown'],
},
'params': {
'videopassword': 'thepassword',
'format': '600p',
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
}]
_WEBPAGE_TESTS = [{
'url': 'https://video.macver.org/w/6gvhZpUGQVd4SQ6oYDc9pC',
@@ -1492,6 +1523,9 @@ class PeerTubeIE(InfoExtractor):
'>We are sorry but it seems that PeerTube is not compatible with your web browser.<')):
return 'peertube:{}:{}'.format(*mobj.group('host', 'id'))
def _get_headers(self):
return filter_dict({'x-peertube-video-password': self.get_param('videopassword')})
@classmethod
def _extract_embed_urls(cls, url, webpage):
embeds = tuple(super()._extract_embed_urls(url, webpage))
@@ -1505,7 +1539,7 @@ class PeerTubeIE(InfoExtractor):
def _call_api(self, host, video_id, path, note=None, errnote=None, fatal=True):
return self._download_json(
self._API_BASE % (host, video_id, path), video_id,
note=note, errnote=errnote, fatal=fatal)
note=note, errnote=errnote, fatal=fatal, headers=self._get_headers())
def _get_subtitles(self, host, video_id):
captions = self._call_api(
@@ -1545,7 +1579,7 @@ class PeerTubeIE(InfoExtractor):
if playlist_url := url_or_none(playlist.get('playlistUrl')):
is_live = True
formats.extend(self._extract_m3u8_formats(
playlist_url, video_id, fatal=False, live=True))
playlist_url, video_id, fatal=False, live=True, headers=self._get_headers()))
playlist_files = playlist.get('files')
if not (playlist_files and isinstance(playlist_files, list)):
continue
@@ -1629,6 +1663,8 @@ class PeerTubeIE(InfoExtractor):
'subtitles': subtitles,
'is_live': is_live,
'webpage_url': webpage_url,
# Headers are needed for ALL format requests, but not thumbnails
'http_headers': self._get_headers(),
}

View File

@@ -1,8 +1,11 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
int_or_none,
parse_iso8601,
str_or_none,
unescapeHTML,
url_or_none,
)
from ..utils.traversal import traverse_obj
@@ -18,29 +21,24 @@ class PeriscopeBaseIE(InfoExtractor):
item_id, query=query)
def _parse_broadcast_data(self, broadcast, video_id):
title = broadcast.get('status') or 'Periscope Broadcast'
uploader = broadcast.get('user_display_name') or broadcast.get('username')
title = f'{uploader} - {title}' if uploader else title
thumbnails = [{
'url': broadcast[image],
} for image in ('image_url', 'image_url_medium', 'image_url_small') if broadcast.get(image)]
return {
'id': broadcast.get('id') or video_id,
'title': title,
'timestamp': parse_iso8601(broadcast.get('created_at')) or int_or_none(
broadcast.get('created_at_ms'), scale=1000),
'release_timestamp': int_or_none(broadcast.get('scheduled_start_ms'), scale=1000),
'uploader': uploader,
'uploader_id': broadcast.get('user_id') or broadcast.get('username'),
'thumbnails': thumbnails,
'view_count': int_or_none(broadcast.get('total_watched')),
'concurrent_view_count': int_or_none(broadcast.get('total_watching')),
'tags': broadcast.get('tags'),
'display_id': video_id,
'live_status': {
'running': 'is_live',
'not_started': 'is_upcoming',
}.get(traverse_obj(broadcast, ('state', {str.lower}))) or 'was_live',
**traverse_obj(broadcast, {
'id': ('id', {str_or_none}),
'title': ('status', {clean_html}, filter),
'concurrent_view_count': ('total_watching', {int_or_none}),
'release_timestamp': (('scheduled_start_ms', 'start_ms'), {int_or_none(scale=1000)}, any),
'tags': ('tags', ..., {clean_html}, filter, all, filter),
'thumbnails': (('image_url_small', 'image_url_medium', 'image_url'), {'url': {url_or_none}}),
'timestamp': ((('created_at', {parse_iso8601}), ('created_at_ms', {int_or_none(scale=1000)})), any),
'uploader': ('user_display_name', {clean_html}, filter),
'uploader_id': ('username', {clean_html}, filter),
'view_count': ('total_watched', {int_or_none}),
}),
}
@staticmethod
@@ -69,22 +67,22 @@ class PeriscopeBaseIE(InfoExtractor):
class PeriscopeIE(PeriscopeBaseIE):
IE_DESC = 'Periscope'
IE_NAME = 'periscope'
_VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)'
_VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/?#]+/(?P<id>[^/?#]+)'
_EMBED_REGEX = [r'<iframe[^>]+src=([\'"])(?P<url>(?:https?:)?//(?:www\.)?(?:periscope|pscp)\.tv/(?:(?!\1).)+)\1']
# Alive example URLs can be found here https://www.periscope.tv/
_TESTS = [{
'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
'md5': '65b57957972e503fcbbaeed8f4fa04ca',
'url': 'https://www.periscope.tv/LularoeHusbandMike/1mrGmgaXAVqxy',
'info_dict': {
'id': '56102209',
'id': '1mrGmgaXAVqxy',
'ext': 'mp4',
'title': 'Bec Boop - 🚠✈️🇬🇧 Fly above #London in Emirates Air Line cable car at night 🇬🇧✈️🚠 #BoopScope 🎀💗',
'timestamp': 1438978559,
'upload_date': '20150807',
'uploader': 'Bec Boop',
'uploader_id': '1465763',
'title': '🎉👍🏼 BROWSE OUR ENTIRE 1,900 +PIECE INVENTORY! 👍🏼🎉 #lularoe',
'live_status': 'was_live',
'tags': 'count:1',
'thumbnail': r're:https?://prod-fastly-us-east-1\.video\.pscp\.tv/.+',
'timestamp': 1498621952,
'upload_date': '20170628',
'uploader': 'LuLaRoe Husband Mike',
'uploader_id': 'LularoeHusbandMike',
},
'skip': 'Expires in 24 hours',
}, {
'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv',
'only_matching': True,

View File

@@ -14,10 +14,9 @@ def _fmt_url(url):
class TelewebionIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?telewebion\.com/episode/(?P<id>(?:0x[a-fA-F\d]+|\d+))'
_VALID_URL = r'https?://(?:www\.)?telewebion\.ir/episode/(?P<id>(?:0x[a-fA-F\d]+|\d+))'
_TESTS = [{
'url': 'http://www.telewebion.com/episode/0x1b3139c/',
'url': 'http://www.telewebion.ir/episode/0x1b3139c/',
'info_dict': {
'id': '0x1b3139c',
'ext': 'mp4',
@@ -26,7 +25,7 @@ class TelewebionIE(InfoExtractor):
'series_id': '0x1b2505c',
'channel': 'شبکه 3',
'channel_id': '0x1b1a761',
'channel_url': 'https://telewebion.com/live/tv3',
'channel_url': 'https://telewebion.ir/live/tv3',
'timestamp': 1425522414,
'upload_date': '20150305',
'release_timestamp': 1425517020,
@@ -34,11 +33,11 @@ class TelewebionIE(InfoExtractor):
'duration': 420,
'view_count': int,
'tags': ['ورزشی', 'لیگ اروپا', 'اروپا'],
'thumbnail': 'https://static.telewebion.com/episodeImages/YjFhM2MxMDBkMDNiZTU0MjE5YjQ3ZDY0Mjk1ZDE0ZmUwZWU3OTE3OWRmMDAyODNhNzNkNjdmMWMzMWIyM2NmMA/default',
'thumbnail': 'https://static.telewebion.ir/episodeImages/YjFhM2MxMDBkMDNiZTU0MjE5YjQ3ZDY0Mjk1ZDE0ZmUwZWU3OTE3OWRmMDAyODNhNzNkNjdmMWMzMWIyM2NmMA/default',
},
'skip_download': 'm3u8',
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://telewebion.com/episode/162175536',
'url': 'https://telewebion.ir/episode/162175536',
'info_dict': {
'id': '0x9aa9a30',
'ext': 'mp4',
@@ -47,7 +46,7 @@ class TelewebionIE(InfoExtractor):
'series_id': '0x29a7426',
'channel': 'شبکه 2',
'channel_id': '0x1b1a719',
'channel_url': 'https://telewebion.com/live/tv2',
'channel_url': 'https://telewebion.ir/live/tv2',
'timestamp': 1699979968,
'upload_date': '20231114',
'release_timestamp': 1699991638,
@@ -55,9 +54,9 @@ class TelewebionIE(InfoExtractor):
'duration': 78,
'view_count': int,
'tags': ['کلیپ های منتخب', ' کلیپ طنز ', ' کلیپ سیاست ', 'پاورقی', 'ویژه فلسطین'],
'thumbnail': 'https://static.telewebion.com/episodeImages/871e9455-7567-49a5-9648-34c22c197f5f/default',
'thumbnail': 'https://static.telewebion.ir/episodeImages/871e9455-7567-49a5-9648-34c22c197f5f/default',
},
'skip_download': 'm3u8',
'skip': 'Dead link',
}]
def _call_graphql_api(
@@ -70,7 +69,7 @@ class TelewebionIE(InfoExtractor):
parameters = ', '.join(f'${name}: {type_}' for name, (type_, _) in variables.items())
parameters = f'({parameters})'
result = self._download_json('https://graph.telewebion.com/graphql', video_id, note, data=json.dumps({
result = self._download_json('https://graph.telewebion.ir/graphql', video_id, note, data=json.dumps({
'operationName': operation,
'query': f'query {operation}{parameters} @cacheControl(maxAge: 60) {{{query}\n}}\n',
'variables': {name: value for name, (_, value) in (variables or {}).items()},
@@ -123,11 +122,11 @@ class TelewebionIE(InfoExtractor):
'series_id': ('program', 'ProgramID', {str}),
'channel': ('channel', 'name', {str}),
'channel_id': ('channel', 'ChannelID', {str}),
'channel_url': ('channel', 'descriptor', {_fmt_url('https://telewebion.com/live/%s')}),
'thumbnail': ('image', {_fmt_url('https://static.telewebion.com/episodeImages/%s/default')}),
'channel_url': ('channel', 'descriptor', {_fmt_url('https://telewebion.ir/live/%s')}),
'thumbnail': ('image', {_fmt_url('https://static.telewebion.ir/episodeImages/%s/default')}),
'formats': (
'channel', 'descriptor', {str},
{_fmt_url(f'https://cdna.telewebion.com/%s/episode/{video_id}/playlist.m3u8')},
{_fmt_url(f'https://cdna.telewebion.ir/%s/episode/{video_id}/playlist.m3u8')},
{functools.partial(self._extract_m3u8_formats, video_id=video_id, ext='mp4', m3u8_id='hls')}),
}))
info_dict['id'] = video_id

View File

@@ -1,5 +1,6 @@
from .common import InfoExtractor
from ..utils import traverse_obj
from ..utils import int_or_none, js_to_json
from ..utils.traversal import traverse_obj
class TVIPlayerIE(InfoExtractor):
@@ -11,21 +12,10 @@ class TVIPlayerIE(InfoExtractor):
'ext': 'mp4',
'duration': 4167,
'title': 'Jornal das 8 - 26 de dezembro de 2021',
'thumbnail': 'https://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/61c8ee630cf2cc58e7d98d9f/',
'thumbnail': 'https://img.iol.pt/image/id/61c8ee630cf2cc58e7d98d9f/',
'season_number': 8,
'season': 'Season 8',
},
}, {
'url': 'https://tviplayer.iol.pt/programa/isabel/62b471090cf26256cd2a8594/video/62be445f0cf2ea4f0a5218e5',
'info_dict': {
'id': '62be445f0cf2ea4f0a5218e5',
'ext': 'mp4',
'duration': 3255,
'season': 'Season 1',
'title': 'Isabel - Episódio 1',
'thumbnail': 'https://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/62beac200cf2f9a86eab856b/',
'season_number': 1,
},
}, {
# no /programa/
'url': 'https://tviplayer.iol.pt/video/62c4131c0cf2f9a86eac06bb',
@@ -33,10 +23,8 @@ class TVIPlayerIE(InfoExtractor):
'id': '62c4131c0cf2f9a86eac06bb',
'ext': 'mp4',
'title': 'David e Mickael Carreira respondem: «Qual é o próximo a ser pai?»',
'thumbnail': 'https://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/62c416490cf2ea367d4433fd/',
'season': 'Season 2',
'thumbnail': 'https://img.iol.pt/image/id/62c416490cf2ea367d4433fd/',
'duration': 148,
'season_number': 2,
},
}, {
# episodio url
@@ -46,9 +34,11 @@ class TVIPlayerIE(InfoExtractor):
'ext': 'mp4',
'season': 'Season 1',
'title': 'Quem denunciou Pedro?',
'thumbnail': 'https://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/62eda30b0cf2ea367d48973b/',
'thumbnail': 'https://img.iol.pt/image/id/62eda30b0cf2ea367d48973b/',
'duration': 1250,
'season_number': 1,
'episode_number': 187,
'episode': 'Episode 187',
},
}]
@@ -62,7 +52,8 @@ class TVIPlayerIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
json_data = self._search_json(
r'<script>\s*jsonData\s*=', webpage, 'json_data', video_id)
r'\sopts\s*=\s*\{\s*video\s*:\s*\[', webpage, 'JSON data', video_id,
transform_source=js_to_json)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
f'{json_data["videoUrl"]}?wmsAuthSign={self.wms_auth_sign_token}',
@@ -71,8 +62,11 @@ class TVIPlayerIE(InfoExtractor):
'id': video_id,
'title': json_data.get('title') or self._og_search_title(webpage),
'thumbnail': json_data.get('cover') or self._og_search_thumbnail(webpage),
'duration': json_data.get('duration'),
'duration': int_or_none(json_data.get('duration')),
'formats': formats,
'subtitles': subtitles,
'season_number': traverse_obj(json_data, ('program', 'seasonNum')),
'season_number': traverse_obj(
self._yield_json_ld(webpage, video_id, default=None),
(lambda _, v: v['@type'] == 'TVEpisode', 'seasonNumber', {int_or_none}, any)),
'episode_number': int_or_none(json_data.get('episodeNum')),
}

View File

@@ -10,6 +10,7 @@ from ..jsinterp import js_number_to_string
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
clean_html,
dict_get,
filter_dict,
float_or_none,
@@ -1451,16 +1452,18 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?P<type>broadcasts|events)/(?P<id>\w+)'
_TESTS = [{
# untitled Periscope video
# Untitled broadcast
'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
'info_dict': {
'id': '1yNGaQLWpejGj',
'ext': 'mp4',
'title': 'Andrea May Sahouri - Periscope Broadcast',
'title': 'Andrea May Sahouri - Twitter Broadcast',
'display_id': '1yNGaQLWpejGj',
'uploader': 'Andrea May Sahouri',
'uploader_id': 'andreamsahouri',
'uploader_url': 'https://twitter.com/andreamsahouri',
'release_date': '20200601',
'release_timestamp': 1590973647,
'timestamp': 1590973638,
'upload_date': '20200601',
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
@@ -1478,6 +1481,8 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
'uploader': 'SpaceX',
'uploader_id': 'SpaceX',
'uploader_url': 'https://twitter.com/SpaceX',
'release_date': '20210303',
'release_timestamp': 1614812964,
'timestamp': 1614812942,
'upload_date': '20210303',
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
@@ -1495,6 +1500,8 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
'uploader': 'SpaceX',
'uploader_id': 'SpaceX',
'uploader_url': 'https://twitter.com/SpaceX',
'release_date': '20230420',
'release_timestamp': 1681994486,
'timestamp': 1681993964,
'upload_date': '20230420',
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
@@ -1544,7 +1551,9 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
info = self._parse_broadcast_data(broadcast, broadcast_id)
info.update({
'display_id': display_id,
'title': broadcast.get('status') or info.get('title'),
'title': traverse_obj(broadcast, (
'status', {clean_html}, filter,
)) or join_nonempty(info.get('uploader'), 'Twitter Broadcast', delim=' - '),
'uploader_id': broadcast.get('twitter_username') or info.get('uploader_id'),
'uploader_url': format_field(
broadcast, 'twitter_username', 'https://twitter.com/%s', default=None),

View File

@@ -1037,8 +1037,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
return next_continuation
return traverse_obj(renderer, (
('contents', 'items', 'rows', 'subThreads'), ..., 'continuationItemRenderer',
('continuationEndpoint', ('button', 'buttonRenderer', 'command')),
('contents', 'items', 'rows', 'subThreads'), ..., (
('continuationItemRenderer', ('continuationEndpoint', ('button', 'buttonRenderer', 'command'))),
('continuationItemViewModel', 'continuationCommand', 'innertubeCommand'),
),
), get_all=False, expected_type=cls._extract_continuation_ep_data)
@classmethod

View File

@@ -333,20 +333,58 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
only_once=True)
return
lockup_mdvm = traverse_obj(view_model, ('metadata', 'lockupMetadataViewModel', {dict}))
content_mdvm = traverse_obj(lockup_mdvm, ('metadata', 'contentMetadataViewModel', {dict}))
thumbnail_badge_view_models = traverse_obj(view_model, (
'contentImage', 'thumbnailViewModel', 'overlays', ..., (
('thumbnailBottomOverlayViewModel', 'badges'),
('thumbnailOverlayBadgeViewModel', 'thumbnailBadges'),
), ..., 'thumbnailBadgeViewModel', {dict}))
duration_text = traverse_obj(thumbnail_badge_view_models, (..., 'text', {str.lower}, any))
thumbnail_badge_styles = traverse_obj(thumbnail_badge_view_models, (..., 'badgeStyle', {str}))
channel_info = traverse_obj(content_mdvm, (
'metadataRows', ..., 'metadataParts',
lambda _, v: v['text']['commandRuns'][0]['onTap']['innertubeCommand']['browseEndpoint']['browseId'],
'text', any, {
'channel': ('content', {str}),
'channel_id': ('commandRuns', 0, 'onTap', 'innertubeCommand', 'browseEndpoint', 'browseId', {self.ucid_or_none}),
'uploader': ('content', {str}),
'uploader_id': ('commandRuns', 0, 'onTap', 'innertubeCommand', 'browseEndpoint', 'canonicalBaseUrl', {self.handle_from_url}),
}))
views_and_time = traverse_obj(content_mdvm, (
'metadataRows', lambda _, v: 'accessibilityLabel' in v['metadataParts'][-1],
'metadataParts', ...))
relative_time_text = traverse_obj(views_and_time, (-1, 'text', 'content', {str.lower}))
badge_styles = traverse_obj(content_mdvm, (
'metadataRows', ..., 'badges', ..., 'badgeViewModel', 'badgeStyle', {str}))
return self.url_result(
url, ie, content_id,
title=traverse_obj(view_model, (
'metadata', 'lockupMetadataViewModel', 'title', 'content', {str})),
title=traverse_obj(lockup_mdvm, ('title', 'content', {str})),
thumbnails=self._extract_thumbnails(view_model, (
'contentImage', *thumb_keys, 'thumbnailViewModel', 'image'), final_key='sources'),
duration=traverse_obj(view_model, (
'contentImage', 'thumbnailViewModel', 'overlays', ...,
(('thumbnailBottomOverlayViewModel', 'badges'), ('thumbnailOverlayBadgeViewModel', 'thumbnailBadges')),
..., 'thumbnailBadgeViewModel', 'text', {parse_duration}, any)),
timestamp=(traverse_obj(view_model, (
'metadata', 'lockupMetadataViewModel', 'metadata', 'contentMetadataViewModel', 'metadataRows',
..., 'metadataParts', ..., 'text', 'content', {lambda t: self._parse_time_text(t, report_failure=False)}, any))
if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE) else None))
duration=parse_duration(duration_text),
view_count=(
traverse_obj(views_and_time, (0, 'text', 'content', {parse_count}))
# view_count isn't always available; only extract if this metadataRow is 2 metadataParts
if len(views_and_time) == 2 else None),
timestamp=(
self._parse_time_text(relative_time_text, report_failure=False)
if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE) else None),
live_status=(
'is_upcoming' if duration_text == 'upcoming'
else 'is_live' if 'THUMBNAIL_OVERLAY_BADGE_STYLE_LIVE' in thumbnail_badge_styles
else 'was_live' if relative_time_text and 'streamed' in relative_time_text
else None),
# XXX: We cannot assume 'public' since we have no way to differentiate from 'unlisted'
availability=self._availability(needs_subscription='BADGE_MEMBERS_ONLY' in badge_styles),
channel_url=format_field(channel_info, 'channel_id', 'https://www.youtube.com/channel/%s', default=None),
uploader_url=format_field(channel_info, 'uploader_id', 'https://www.youtube.com/%s', default=None),
**channel_info)
def _rich_entries(self, rich_grid_renderer):
if lockup_view_model := traverse_obj(rich_grid_renderer, ('content', 'lockupViewModel', {dict})):
@@ -1032,14 +1070,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 94,
'info_dict': {
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
'title': 'Igor Kleiner - Playlists',
'description': r're:(?s)Добро пожаловать на мой канал! Здесь вы найдете видео .{504}/a1/50b/10a$',
'uploader': 'Igor Kleiner ',
'title': 'Igor DS: ИИ, Наука и Творчество - Playlists',
'description': r're:(?s)Добро пожаловать! Здесь сложные технологии встречаются.+\n$',
'uploader': 'Igor DS: ИИ, Наука и Творчество ',
'uploader_id': '@IgorDataScience',
'uploader_url': 'https://www.youtube.com/@IgorDataScience',
'channel': 'Igor Kleiner ',
'channel': 'Igor DS: ИИ, Наука и Творчество ',
'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
'tags': 'count:23',
'tags': 'count:19',
'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
'channel_follower_count': int,
},
@@ -1049,14 +1087,13 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 94,
'info_dict': {
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
'title': 'Igor Kleiner - Playlists',
'description': r're:(?s)Добро пожаловать на мой канал! Здесь вы найдете видео .{504}/a1/50b/10a$',
'uploader': 'Igor Kleiner ',
'title': 'Igor DS: ИИ, Наука и Творчество - Playlists',
'description': r're:(?s)Добро пожаловать! Здесь сложные технологии встречаются.+\n$',
'uploader_id': '@IgorDataScience',
'uploader_url': 'https://www.youtube.com/@IgorDataScience',
'tags': 'count:23',
'tags': 'count:19',
'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
'channel': 'Igor Kleiner ',
'channel': 'Igor DS: ИИ, Наука и Творчество ',
'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
'channel_follower_count': int,
},
@@ -1139,90 +1176,89 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_count': 0,
}, {
'note': 'Home tab',
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
'url': 'https://www.youtube.com/channel/UCTwECeGqMZee77BjdoYtI2Q/featured',
'info_dict': {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Home',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
'uploader': 'lex will',
'uploader_id': '@lexwill718',
'channel': 'lex will',
'tags': ['bible', 'history', 'prophesy'],
'uploader_url': 'https://www.youtube.com/@lexwill718',
'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'id': 'UCTwECeGqMZee77BjdoYtI2Q',
'title': 'Creative Commons - Home',
'description': 'md5:7cfc22824277588d26a66054f22d93c8',
'uploader': 'Creative Commons',
'uploader_id': '@creativecommons',
'uploader_url': 'https://www.youtube.com/@creativecommons',
'channel': 'Creative Commons',
'channel_id': 'UCTwECeGqMZee77BjdoYtI2Q',
'channel_url': 'https://www.youtube.com/channel/UCTwECeGqMZee77BjdoYtI2Q',
'channel_follower_count': int,
'tags': ['creative commons', 'remix', 'culture', 'nonprofit'],
},
'playlist_mincount': 2,
'playlist_mincount': 6,
}, {
'note': 'Videos tab',
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
'url': 'https://www.youtube.com/channel/UCTwECeGqMZee77BjdoYtI2Q/videos',
'info_dict': {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Videos',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
'uploader': 'lex will',
'uploader_id': '@lexwill718',
'tags': ['bible', 'history', 'prophesy'],
'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'uploader_url': 'https://www.youtube.com/@lexwill718',
'channel': 'lex will',
'id': 'UCTwECeGqMZee77BjdoYtI2Q',
'title': 'Creative Commons - Videos',
'description': 'md5:7cfc22824277588d26a66054f22d93c8',
'uploader': 'Creative Commons',
'uploader_id': '@creativecommons',
'uploader_url': 'https://www.youtube.com/@creativecommons',
'channel': 'Creative Commons',
'channel_id': 'UCTwECeGqMZee77BjdoYtI2Q',
'channel_url': 'https://www.youtube.com/channel/UCTwECeGqMZee77BjdoYtI2Q',
'channel_follower_count': int,
'tags': ['creative commons', 'remix', 'culture', 'nonprofit'],
},
'playlist_mincount': 975,
'playlist_mincount': 239,
}, {
'note': 'Videos tab, sorted by popular',
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
'url': 'https://www.youtube.com/channel/UCTwECeGqMZee77BjdoYtI2Q/videos?view=0&sort=p&flow=grid',
'info_dict': {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Videos',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
'uploader': 'lex will',
'uploader_id': '@lexwill718',
'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'uploader_url': 'https://www.youtube.com/@lexwill718',
'channel': 'lex will',
'tags': ['bible', 'history', 'prophesy'],
'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
'id': 'UCTwECeGqMZee77BjdoYtI2Q',
'title': 'Creative Commons - Videos',
'description': 'md5:7cfc22824277588d26a66054f22d93c8',
'uploader': 'Creative Commons',
'uploader_id': '@creativecommons',
'uploader_url': 'https://www.youtube.com/@creativecommons',
'channel': 'Creative Commons',
'channel_id': 'UCTwECeGqMZee77BjdoYtI2Q',
'channel_url': 'https://www.youtube.com/channel/UCTwECeGqMZee77BjdoYtI2Q',
'channel_follower_count': int,
'tags': ['creative commons', 'remix', 'culture', 'nonprofit'],
},
'playlist_mincount': 199,
'playlist_mincount': 239,
}, {
'note': 'Playlists tab',
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
'url': 'https://www.youtube.com/channel/UCTwECeGqMZee77BjdoYtI2Q/playlists',
'info_dict': {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Playlists',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
'uploader': 'lex will',
'uploader_id': '@lexwill718',
'uploader_url': 'https://www.youtube.com/@lexwill718',
'channel': 'lex will',
'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'tags': ['bible', 'history', 'prophesy'],
'id': 'UCTwECeGqMZee77BjdoYtI2Q',
'title': 'Creative Commons - Playlists',
'description': 'md5:7cfc22824277588d26a66054f22d93c8',
'uploader': 'Creative Commons',
'uploader_id': '@creativecommons',
'uploader_url': 'https://www.youtube.com/@creativecommons',
'channel': 'Creative Commons',
'channel_id': 'UCTwECeGqMZee77BjdoYtI2Q',
'channel_url': 'https://www.youtube.com/channel/UCTwECeGqMZee77BjdoYtI2Q',
'channel_follower_count': int,
'tags': ['creative commons', 'remix', 'culture', 'nonprofit'],
},
'playlist_mincount': 17,
'playlist_mincount': 20,
}, {
'note': 'Posts tab',
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
'url': 'https://www.youtube.com/channel/UCtS3BcCw-tITPFYSvkbP0Bg/posts',
'info_dict': {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Posts',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
'channel': 'lex will',
'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'tags': ['bible', 'history', 'prophesy'],
'id': 'UCtS3BcCw-tITPFYSvkbP0Bg',
'title': 'Office Hours Live with Tim Heidecker - Posts',
'description': 'md5:01ec1460ea6c6e2aa47d3be9c756559c',
'uploader': 'Office Hours Live with Tim Heidecker',
'uploader_id': '@OfficeHoursLive',
'uploader_url': 'https://www.youtube.com/@OfficeHoursLive',
'channel': 'Office Hours Live with Tim Heidecker',
'channel_id': 'UCtS3BcCw-tITPFYSvkbP0Bg',
'channel_url': 'https://www.youtube.com/channel/UCtS3BcCw-tITPFYSvkbP0Bg',
'channel_follower_count': int,
'uploader_url': 'https://www.youtube.com/@lexwill718',
'uploader_id': '@lexwill718',
'uploader': 'lex will',
'tags': 'count:17',
},
'playlist_mincount': 18,
'skip': 'This Community isn\'t available',
'playlist_mincount': 145,
}, {
# TODO: fix channel_is_verified extraction
'note': 'Search tab',
@@ -1272,6 +1308,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
},
'playlist_count': 96,
}, {
# TODO: fix availability extraction
'note': 'Large playlist',
'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
'info_dict': {
@@ -1296,6 +1333,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
'only_matching': True,
}, {
# TODO: fix availability extraction
# The 'note' below is outdated: there is no longer a "Load more" button
'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
'info_dict': {
@@ -1313,7 +1352,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader': 'Interstellar Movie',
'uploader_url': 'https://www.youtube.com/@InterstellarMovie',
},
'playlist_mincount': 21,
'playlist_mincount': 10,
}, {
# TODO: fix availability extraction
'note': 'Playlist with "show unavailable videos" button',
@@ -1336,6 +1375,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 150,
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
}, {
# TODO: fix availability extraction
'note': 'Playlist with unavailable videos in page 7',
'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
'info_dict': {
@@ -1407,7 +1447,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, {
'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
'info_dict': {
'id': 'VFGoUmo74wE', # This will keep changing
'id': 'ubIX-TwVqZI', # This will keep changing
'ext': 'mp4',
'title': str,
'upload_date': r're:\d{8}',
@@ -1586,6 +1626,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_count': 50,
'expected_warnings': ['YouTube Music is not directly supported'],
}, {
# YoutubeTab_25: use to test _extract_lockup_view_model
'note': 'unlisted single video playlist',
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQLfIN0MMgp0wVV6MP3bM4_',
'info_dict': {
@@ -1809,7 +1850,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'title': 'Not Just Bikes - Shorts',
'tags': 'count:10',
'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
'description': 'md5:295758591d0d43d8594277be54584da7',
'description': 'md5:2cb3ccdafa58608fa016f1de4930ec54',
'channel_follower_count': int,
'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
'channel': 'Not Just Bikes',
@@ -1831,8 +1872,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel': '中村悠一',
'channel_follower_count': int,
'description': 'md5:76b312b48a26c3b0e4d90e2dfc1b417d',
'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
'uploader_id': '@Yuichi-Nakamura',
'uploader_url': 'https://www.youtube.com/@中村悠一のあそびば',
'uploader_id': '@中村悠一のあそびば',
'uploader': '中村悠一',
},
'playlist_mincount': 60,
@@ -2010,7 +2051,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel': '99% Invisible',
'uploader_id': '@99percentinvisiblepodcast',
},
'playlist_count': 5,
'playlist_mincount': 5,
}, {
# Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
# TODO: fix channel_is_verified extraction
@@ -2034,6 +2075,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
# Playlist with only shorts, shown as reel renderers
# FIXME: future: YouTube currently doesn't give continuation for this,
# may do in future.
# TODO: fix availability extraction
'url': 'https://www.youtube.com/playlist?list=UUxqPAgubo4coVn9Lx1FuKcg',
'info_dict': {
'id': 'UUxqPAgubo4coVn9Lx1FuKcg',

View File

@@ -202,52 +202,53 @@ class ZDFIE(ZDFBaseIE):
IE_NAME = 'zdf'
_TESTS = [{
# Standalone video (i.e. not part of a playlist), video URL
'url': 'https://www.zdf.de/video/dokus/sylt---deutschlands-edles-nordlicht-movie-100/sylt-deutschlands-edles-nordlicht-100',
'url': 'https://www.zdf.de/video/dokus/ein-tag-im-juli---ahrtalflut-2021-movie-100/terra-x-history-ein-tag-im-juli-ahrtalflut-2021-100',
'md5': '19dedfc5bca3f4f015deeb96c153ecec',
'info_dict': {
'id': 'sylt-deutschlands-edles-nordlicht-100',
'id': 'terra-x-history-ein-tag-im-juli-ahrtalflut-2021-100',
'ext': 'mp4',
'title': 'Sylt - Deutschlands edles Nordlicht',
'description': 'md5:35407b810c2e1e33efbe15ef6e4c06c3',
'duration': 810.0,
'thumbnail': r're:https://www\.zdf\.de/assets/sylt-118~original\?cb=\d+',
'series': 'Sylt - Deutschlands edles Nordlicht',
'series_id': 'sylt---deutschlands-edles-nordlicht-movie-100',
'timestamp': 1612462500,
'upload_date': '20210204',
'_old_archive_ids': ['zdf 210402_1915_sendung_dok'],
'title': 'Ein Tag im Juli - Ahrtalflut 2021',
'description': 'md5:7a426ce6f44d988eccbd4a485453e336',
'duration': 5304.0,
'thumbnail': r're:https://www\.zdf\.de/assets/terra-x-history-ein-tag-im-juli-ahrtal-flut-2021-102~original\?cb=\d+',
'series': 'Ein Tag im Juli - Ahrtalflut 2021',
'series_id': 'ein-tag-im-juli---ahrtalflut-2021-movie-100',
'timestamp': 1779214500,
'upload_date': '20260519',
'_old_archive_ids': ['zdf 260519_2015_sendung_his'],
},
}, {
# Standalone video (i.e. not part of a playlist), play URL
'url': 'https://www.zdf.de/play/dokus/sylt---deutschlands-edles-nordlicht-movie-100/sylt-deutschlands-edles-nordlicht-100',
'url': 'https://www.zdf.de/play/dokus/ein-tag-im-juli---ahrtalflut-2021-movie-100/terra-x-history-ein-tag-im-juli-ahrtalflut-2021-100',
'info_dict': {
'id': 'sylt-deutschlands-edles-nordlicht-100',
'id': 'terra-x-history-ein-tag-im-juli-ahrtalflut-2021-100',
'ext': 'mp4',
'title': 'Sylt - Deutschlands edles Nordlicht',
'description': 'md5:35407b810c2e1e33efbe15ef6e4c06c3',
'duration': 810.0,
'thumbnail': r're:https://www\.zdf\.de/assets/sylt-118~original\?cb=\d+',
'series': 'Sylt - Deutschlands edles Nordlicht',
'series_id': 'sylt---deutschlands-edles-nordlicht-movie-100',
'timestamp': 1612462500,
'upload_date': '20210204',
'_old_archive_ids': ['zdf 210402_1915_sendung_dok'],
'title': 'Ein Tag im Juli - Ahrtalflut 2021',
'description': 'md5:7a426ce6f44d988eccbd4a485453e336',
'duration': 5304.0,
'thumbnail': r're:https://www\.zdf\.de/assets/terra-x-history-ein-tag-im-juli-ahrtal-flut-2021-102~original\?cb=\d+',
'series': 'Ein Tag im Juli - Ahrtalflut 2021',
'series_id': 'ein-tag-im-juli---ahrtalflut-2021-movie-100',
'timestamp': 1779214500,
'upload_date': '20260519',
'_old_archive_ids': ['zdf 260519_2015_sendung_his'],
},
'params': {'skip_download': True},
}, {
# Standalone video (i.e. not part of a playlist), legacy URL before website redesign in 2025-03
'url': 'https://www.zdf.de/dokumentation/dokumentation-sonstige/sylt-deutschlands-edles-nordlicht-100.html',
'url': 'https://www.zdf.de/dokumentation/dokumentation-sonstige/terra-x-history-ein-tag-im-juli-ahrtalflut-2021-100.html',
'info_dict': {
'id': 'sylt-deutschlands-edles-nordlicht-100',
'id': 'terra-x-history-ein-tag-im-juli-ahrtalflut-2021-100',
'ext': 'mp4',
'title': 'Sylt - Deutschlands edles Nordlicht',
'description': 'md5:35407b810c2e1e33efbe15ef6e4c06c3',
'duration': 810.0,
'thumbnail': r're:https://www\.zdf\.de/assets/sylt-118~original\?cb=\d+',
'series': 'Sylt - Deutschlands edles Nordlicht',
'series_id': 'sylt---deutschlands-edles-nordlicht-movie-100',
'timestamp': 1612462500,
'upload_date': '20210204',
'_old_archive_ids': ['zdf 210402_1915_sendung_dok'],
'title': 'Ein Tag im Juli - Ahrtalflut 2021',
'description': 'md5:7a426ce6f44d988eccbd4a485453e336',
'duration': 5304.0,
'thumbnail': r're:https://www\.zdf\.de/assets/terra-x-history-ein-tag-im-juli-ahrtal-flut-2021-102~original\?cb=\d+',
'series': 'Ein Tag im Juli - Ahrtalflut 2021',
'series_id': 'ein-tag-im-juli---ahrtalflut-2021-movie-100',
'timestamp': 1779214500,
'upload_date': '20260519',
'_old_archive_ids': ['zdf 260519_2015_sendung_his'],
},
'params': {'skip_download': True},
}, {
@@ -258,12 +259,12 @@ class ZDFIE(ZDFBaseIE):
'info_dict': {
'id': 'page-video-ard-gelb-vom-hellen-glanz-zu-finsteren-abgruenden-102',
'ext': 'mp4',
'title': 'Gelb: Vom hellen Glanz zu finsteren Abgründen',
'title': 'Gelb · Vom hellen Glanz zu finsteren Abgründen (6/6)',
'description': 'md5:9aad4806b4c8ea152ab21e70c9d516be',
'duration': 895.0,
'thumbnail': r're:https://www\.zdf\.de/assets/image-ard-gelb-vom-hellen-glanz-zu-finsteren-abgruenden-102~original\?cb=\d+',
'thumbnail': r're:https://www\.zdf\.de/assets/image-ard-gelb--vom-hellen-glanz-zu-finsteren-abgruenden-66-100~original\?cb=\d+',
'series': 'Die Magie der Farben',
'series_id': 'collection-index-page-ard-collection-ard-dxjuomfyzdpzag93ojy2mzhhmmq3mzk2ztq4nda-132',
'series_id': 'collection-index-page-ard-collection-ard-dxjuomfyzdpzag93ojy2mzhhmmq3mzk2ztq4nda-140',
'season': 'Season 2023',
'season_number': 2023,
'episode': 'Episode 5',
@@ -278,12 +279,12 @@ class ZDFIE(ZDFBaseIE):
'info_dict': {
'id': 'page-video-ard-gelb-vom-hellen-glanz-zu-finsteren-abgruenden-102',
'ext': 'mp4',
'title': 'Gelb: Vom hellen Glanz zu finsteren Abgründen',
'title': 'Gelb · Vom hellen Glanz zu finsteren Abgründen (6/6)',
'description': 'md5:9aad4806b4c8ea152ab21e70c9d516be',
'duration': 895.0,
'thumbnail': r're:https://www\.zdf\.de/assets/image-ard-gelb-vom-hellen-glanz-zu-finsteren-abgruenden-102~original\?cb=\d+',
'thumbnail': r're:https://www\.zdf\.de/assets/image-ard-gelb--vom-hellen-glanz-zu-finsteren-abgruenden-66-100~original\?cb=\d+',
'series': 'Die Magie der Farben',
'series_id': 'collection-index-page-ard-collection-ard-dxjuomfyzdpzag93ojy2mzhhmmq3mzk2ztq4nda-132',
'series_id': 'collection-index-page-ard-collection-ard-dxjuomfyzdpzag93ojy2mzhhmmq3mzk2ztq4nda-140',
'season': 'Season 2023',
'season_number': 2023,
'episode': 'Episode 5',
@@ -299,12 +300,12 @@ class ZDFIE(ZDFBaseIE):
'info_dict': {
'id': 'page-video-ard-gelb-vom-hellen-glanz-zu-finsteren-abgruenden-102',
'ext': 'mp4',
'title': 'Gelb: Vom hellen Glanz zu finsteren Abgründen',
'title': 'Gelb · Vom hellen Glanz zu finsteren Abgründen (6/6)',
'description': 'md5:9aad4806b4c8ea152ab21e70c9d516be',
'duration': 895.0,
'thumbnail': r're:https://www\.zdf\.de/assets/image-ard-gelb-vom-hellen-glanz-zu-finsteren-abgruenden-102~original\?cb=\d+',
'thumbnail': r're:https://www\.zdf\.de/assets/image-ard-gelb--vom-hellen-glanz-zu-finsteren-abgruenden-66-100~original\?cb=\d+',
'series': 'Die Magie der Farben',
'series_id': 'collection-index-page-ard-collection-ard-dxjuomfyzdpzag93ojy2mzhhmmq3mzk2ztq4nda-132',
'series_id': 'collection-index-page-ard-collection-ard-dxjuomfyzdpzag93ojy2mzhhmmq3mzk2ztq4nda-140',
'season': 'Season 2023',
'season_number': 2023,
'episode': 'Episode 5',
@@ -325,7 +326,7 @@ class ZDFIE(ZDFBaseIE):
'title': 'heute journal vom 19.12.2021',
'description': 'md5:02504cf3b03777ff32fcc927d260c5dd',
'duration': 1770.0,
'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/273e5545-16e7-4ca3-898e-52fe9e06d964?layout=1920x1080',
'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/273e5545-16e7-4ca3-898e-52fe9e06d964?layout=2400x1350',
'chapters': 'count:11',
'series': 'heute journal',
'series_id': 'heute-journal-104',
@@ -383,18 +384,18 @@ class ZDFIE(ZDFBaseIE):
},
}, {
# zdfheute video, not available on zdf.de (uses the fallback extraction path)
'url': 'https://www.zdf.de/nachrichten/politik/deutschland/koalitionsverhandlungen-spd-cdu-csu-dobrindt-100.html',
'md5': 'c3a78514dd993a5781aa3afe50db51e2',
'url': 'https://www.zdfheute.de/politik/deutschland/wildberger-ki-einsatz-rede-texte-100.html',
'md5': '7e3f7bb4fcaf0ce2c7a56f6cfb33e054',
'info_dict': {
'id': 'koalitionsverhandlungen-spd-cdu-csu-dobrindt-100',
'id': 'wildberger-ki-einsatz-rede-texte-100',
'ext': 'mp4',
'title': 'Dobrindt schließt Steuererhöhungen aus',
'description': 'md5:9a117646d7b8df6bc902eb543a9c9023',
'duration': 325,
'thumbnail': r're:https://www\.zdfheute\.de/assets/dobrindt-csu-berlin-direkt-100~1920x1080\?cb=\d+',
'timestamp': 1743374520,
'upload_date': '20250330',
'_old_archive_ids': ['zdf 250330_clip_2_bdi'],
'title': 'Minister Wildberger nutzte KI für Texte und Reden',
'description': 'md5:6093c58c5aa98a9a1549ff66c93f3209',
'duration': 30.0,
'thumbnail': r're:https://www\.zdfheute\.de/assets/karsten-wildberger-120~2400x1350\?cb=\d+',
'timestamp': 1781388300,
'upload_date': '20260613',
'_old_archive_ids': ['zdf 260611_mario_voigt_viu'],
},
}, {
# logo! video, also available on zdf.de
@@ -426,7 +427,7 @@ class ZDFIE(ZDFBaseIE):
'id': 'kinderreporter-vivaan-trifft-alina-grijseels-100',
'ext': 'mp4',
'title': 'Vivaan trifft Handballerin Alina Grijseels',
'description': 'md5:9572e7f4340dda823ea4091a76624da6',
'description': 'md5:76bfa6581dd70ee2ef7b2679155e75dc',
'duration': 166.0,
'thumbnail': r're:https://www\.zdf\.de/assets/vivaan-alina-grijseels-100~original\?cb=\d+',
'series': 'logo!',
@@ -435,6 +436,21 @@ class ZDFIE(ZDFBaseIE):
'upload_date': '20251220',
'_old_archive_ids': ['zdf 251219_kr_alina_grijseels_neu_log'],
},
}, {
# Live stream
'url': 'https://www.zdf.de/play/live-tv/sender/zdf-live-beitrag-100',
'info_dict': {
'id': 'zdf-live-beitrag-100',
'ext': 'mp4',
'title': r're:ZDF Livestream',
'description': str,
'thumbnail': r're:https://www\.zdf\.de/assets/2400-zdf-100~original\?cb=\d+',
'timestamp': int,
'upload_date': str,
'live_status': 'is_live',
'_old_archive_ids': ['zdf zdf-live-beitrag-100'],
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
}, {
# Same as https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html
'url': 'https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html',
@@ -529,7 +545,7 @@ query VideoByCanonical($canonical: String!) {
ptmd_data['_old_archive_ids'] = [make_archive_id(self, old_archive_id)]
return ptmd_data
# This fallback should generally only happen for pages under `zdf.de/nachrichten`.
# This fallback should generally only happen for pages on zdfheute.de and logo.de
# They are on a separate website for which GraphQL often doesn't return results.
# The API used here is no longer in use by official clients and likely deprecated.
# Long-term, news documents probably should use the API used by the mobile apps:
@@ -583,12 +599,14 @@ query VideoByCanonical($canonical: String!) {
return self._extract_fallback(video_id)
aspect_ratio = None
is_live = False
ptmd_urls = []
for node in traverse_obj(video_data, ('currentMedia', 'nodes', lambda _, v: v['ptmdTemplate'])):
ptmd_url = self._expand_ptmd_template('https://api.zdf.de', node['ptmdTemplate'])
# Smuggle vod_media_type so that _extract_ptmd is aware of 'DGS' variants
if vod_media_type := node.get('vodMediaType'):
ptmd_url = smuggle_url(ptmd_url, {'vod_media_type': vod_media_type})
is_live = 'liveMediaType' in node
ptmd_urls.append(ptmd_url)
if not aspect_ratio:
aspect_ratio = self._parse_aspect_ratio(node.get('aspectRatio'))
@@ -606,6 +624,7 @@ query VideoByCanonical($canonical: String!) {
'chapters': ('currentMedia', 'nodes', 0, 'streamAnchorTags', 'nodes', {self._extract_chapters}),
}),
**self._extract_ptmd(ptmd_urls, video_id, self._get_api_token(), aspect_ratio),
'is_live': is_live,
'id': video_id,
}
@@ -634,19 +653,19 @@ class ZDFChannelIE(ZDFBaseIE):
}, {
# Standalone video (i.e. not part of a playlist), collection URL
'add_ie': [ZDFIE.ie_key()],
'url': 'https://www.zdf.de/dokus/sylt---deutschlands-edles-nordlicht-movie-100',
'url': 'https://www.zdf.de/dokus/ein-tag-im-juli---ahrtalflut-2021-movie-100',
'info_dict': {
'id': 'sylt-deutschlands-edles-nordlicht-100',
'id': 'terra-x-history-ein-tag-im-juli-ahrtalflut-2021-100',
'ext': 'mp4',
'title': 'Sylt - Deutschlands edles Nordlicht',
'description': 'md5:35407b810c2e1e33efbe15ef6e4c06c3',
'duration': 810.0,
'thumbnail': r're:https://www\.zdf\.de/assets/sylt-118~original\?cb=\d+',
'series': 'Sylt - Deutschlands edles Nordlicht',
'series_id': 'sylt---deutschlands-edles-nordlicht-movie-100',
'timestamp': 1612462500,
'upload_date': '20210204',
'_old_archive_ids': ['zdf 210402_1915_sendung_dok'],
'title': 'Ein Tag im Juli - Ahrtalflut 2021',
'description': 'md5:7a426ce6f44d988eccbd4a485453e336',
'duration': 5304.0,
'thumbnail': r're:https://www\.zdf\.de/assets/terra-x-history-ein-tag-im-juli-ahrtal-flut-2021-102~original\?cb=\d+',
'series': 'Ein Tag im Juli - Ahrtalflut 2021',
'series_id': 'ein-tag-im-juli---ahrtalflut-2021-movie-100',
'timestamp': 1779214500,
'upload_date': '20260519',
'_old_archive_ids': ['zdf 260519_2015_sendung_his'],
},
'params': {'skip_download': True},
}, {

View File

@@ -4,7 +4,7 @@ from .common import PostProcessor
from ..compat import shutil
from ..utils import (
PostProcessingError,
make_dir,
make_parent_dirs,
)
@@ -42,7 +42,10 @@ class MoveFilesAfterDownloadPP(PostProcessor):
self.report_warning(
f'Cannot move file "{oldfile}" out of temporary directory since "{newfile}" already exists. ')
continue
make_dir(newfile, PostProcessingError)
try:
make_parent_dirs(newfile)
except OSError as e:
raise PostProcessingError(f'Unable to create directory: {e}') from e
self.to_screen(f'Moving file "{oldfile}" to "{newfile}"')
shutil.move(oldfile, newfile) # os.rename cannot move between volumes

View File

@@ -46,4 +46,16 @@ def jwt_encode_hs256(payload_data, key, headers={}):
return header_b64 + b'.' + payload_b64 + b'.' + signature_b64
def make_dir(path, to_screen=None):
from . import make_parent_dirs
try:
make_parent_dirs(path)
return True
except OSError as e:
if to_screen is not None:
to_screen(f'Unable to create directory: {e}')
return False
compiled_regex_type = type(re.compile(''))

View File

@@ -4713,16 +4713,9 @@ def random_uuidv4():
return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
def make_dir(path, to_screen=None):
try:
dn = os.path.dirname(path)
if dn:
os.makedirs(dn, exist_ok=True)
return True
except OSError as err:
if callable(to_screen) is not None:
to_screen(f'unable to create directory {err}')
return False
def make_parent_dirs(path):
if dir_name := os.path.dirname(path):
os.makedirs(dir_name, exist_ok=True)
def get_executable_path():
@@ -5218,12 +5211,17 @@ class _UnsafeExtensionError(Exception):
'sbv',
])
_enabled = True
def __init__(self, extension, /):
super().__init__(f'unsafe file extension: {extension!r}')
self.extension = extension
@classmethod
def sanitize_extension(cls, extension, /, *, prepend=False, _allowed_exts=()):
if not cls._enabled:
return extension
if extension is None:
return None

View File

@@ -64,7 +64,7 @@ class HTTPHeaderDict(dict):
other = other.sensitive()
if isinstance(other, dict):
self.update(other)
return
return self
return NotImplemented
def __or__(self, other, /) -> typing.Self: