feat(toDash): Add option to include WebVTT or TTML captions (#673)

This commit is contained in:
absidue
2024-06-25 06:22:11 +02:00
committed by GitHub
parent e5aab9a9b3
commit bd9f6ac64c
5 changed files with 108 additions and 11 deletions

View File

@@ -54,11 +54,16 @@ export default class MediaInfo {
}
let storyboards;
let captions;
if (options.include_thumbnails && player_response.storyboards) {
storyboards = player_response.storyboards;
}
if (typeof options.captions_format === 'string' && player_response.captions?.caption_tracks) {
captions = player_response.captions.caption_tracks;
}
return FormatUtils.toDash(
this.streaming_data,
this.page[0].video_details?.is_post_live_dvr,
@@ -68,6 +73,7 @@ export default class MediaInfo {
this.#actions.session.player,
this.#actions,
storyboards,
captions,
options
);
}

View File

@@ -2,17 +2,19 @@ import Text from './misc/Text.js';
import { YTNode } from '../helpers.js';
import type { RawNode } from '../index.js';
export interface CaptionTrackData {
base_url: string;
name: Text;
vss_id: string;
language_code: string;
kind?: 'asr' | 'frc';
is_translatable: boolean;
}
export default class PlayerCaptionsTracklist extends YTNode {
static type = 'PlayerCaptionsTracklist';
caption_tracks?: {
base_url: string;
name: Text;
vss_id: string;
language_code: string;
kind?: 'asr' | 'frc';
is_translatable: boolean;
}[];
caption_tracks?: CaptionTrackData[];
audio_tracks?: {
audio_track_id: string;

View File

@@ -1,4 +1,13 @@
export interface StreamingInfoOptions {
/**
* The format to use for the captions, when the video has captions.
* If this option is not set, the DASH manifest will not include the captions.
*
* Possible values:
* * `vtt`: Tells YouTube to return the captions in the WebVTT format
* * `ttml`: Tells YouTube to return the captions in the TTML format
*/
captions_format?: 'vtt' | 'ttml';
/**
* The label to use for the non-DRC streams when a video has DRC and streams.
*

View File

@@ -13,6 +13,7 @@ import type { SegmentInfo as FSegmentInfo } from './StreamingInfo.js';
import type { FormatFilter, URLTransformer } from '../types/FormatUtils.js';
import type PlayerLiveStoryboardSpec from '../parser/classes/PlayerLiveStoryboardSpec.js';
import type { StreamingInfoOptions } from '../types/StreamingInfoOptions.js';
import type { CaptionTrackData } from '../parser/classes/PlayerCaptionsTracklist.js';
interface DashManifestProps {
streamingData: IStreamingData;
@@ -24,6 +25,7 @@ interface DashManifestProps {
player?: Player;
actions?: Actions;
storyboards?: PlayerStoryboardSpec | PlayerLiveStoryboardSpec;
captionTracks?: CaptionTrackData[];
}
async function OTFPostLiveDvrSegmentInfo({ info }: { info: FSegmentInfo }) {
@@ -73,14 +75,16 @@ async function DashManifest({
player,
actions,
storyboards,
captionTracks,
options
}: DashManifestProps) {
const {
getDuration,
audio_sets,
video_sets,
image_sets
} = getStreamingInfo(streamingData, isPostLiveDvr, transformURL, rejectFormat, cpn, player, actions, storyboards, options);
image_sets,
text_sets
} = getStreamingInfo(streamingData, isPostLiveDvr, transformURL, rejectFormat, cpn, player, actions, storyboards, captionTracks, options);
// XXX: DASH spec: https://standards.iso.org/ittf/PubliclyAvailableStandards/c083314_ISO_IEC%2023009-1_2022(en).zip
@@ -229,6 +233,32 @@ async function DashManifest({
</adaptation-set>;
})
}
{
text_sets.map((set, index) => {
return <adaptation-set
id={index + audio_sets.length + video_sets.length + image_sets.length}
mimeType={set.mime_type}
lang={set.language}
contentType="text"
>
<role
schemeIdUri="urn:mpeg:dash:role:2011"
value="caption"
/>
<label id={index + audio_sets.length}>
{set.track_name}
</label>
<representation
id={set.representation.uid}
bandwidth="0"
>
<base-url>
{set.representation.base_url}
</base-url>
</representation>
</adaptation-set>;
})
}
</period>
</mpd>;
}
@@ -242,6 +272,7 @@ export function toDash(
player?: Player,
actions?: Actions,
storyboards?: PlayerStoryboardSpec | PlayerLiveStoryboardSpec,
caption_tracks?: CaptionTrackData[],
options?: StreamingInfoOptions
) {
if (!streaming_data)
@@ -258,6 +289,7 @@ export function toDash(
player={player}
actions={actions}
storyboards={storyboards}
captionTracks={caption_tracks}
/>
);
}

View File

@@ -12,6 +12,7 @@ import type { Format } from '../parser/misc.js';
import type { PlayerLiveStoryboardSpec } from '../parser/nodes.js';
import type { FormatFilter, URLTransformer } from '../types/FormatUtils.js';
import type { StreamingInfoOptions } from '../types/StreamingInfoOptions.js';
import type { CaptionTrackData } from '../parser/classes/PlayerCaptionsTracklist.js';
const TAG_ = 'StreamingInfo';
@@ -20,6 +21,7 @@ export interface StreamingInfo {
audio_sets: AudioSet[];
video_sets: VideoSet[];
image_sets: ImageSet[];
text_sets: TextSet[];
}
export interface AudioSet {
@@ -122,6 +124,18 @@ export interface ImageRepresentation {
getURL(n: number): string;
}
export interface TextSet {
mime_type: string;
language: string;
track_name: string;
representation: TextRepresentation;
}
export interface TextRepresentation {
uid: string;
base_url: string;
}
interface PostLiveDvrInfo {
duration: number,
segment_count: number
@@ -735,6 +749,29 @@ function getImageSets(
}));
}
function getTextSets(
caption_tracks: CaptionTrackData[],
format: 'vtt' | 'ttml',
transform_url: URLTransformer
): TextSet[] {
const mime_type = format === 'vtt' ? 'text/vtt' : 'application/ttml+xml';
return caption_tracks.map((caption_track) => {
const url = new URL(caption_track.base_url);
url.searchParams.set('fmt', format);
return {
mime_type,
language: caption_track.language_code,
track_name: caption_track.name.toString(),
representation: {
uid: `text-${caption_track.vss_id}`,
base_url: transform_url(url).toString()
}
};
});
}
export function getStreamingInfo(
streaming_data?: IStreamingData,
is_post_live_dvr = false,
@@ -744,6 +781,7 @@ export function getStreamingInfo(
player?: Player,
actions?: Actions,
storyboards?: PlayerStoryboardSpec | PlayerLiveStoryboardSpec,
caption_tracks?: CaptionTrackData[],
options?: StreamingInfoOptions
) {
if (!streaming_data)
@@ -839,11 +877,21 @@ export function getStreamingInfo(
image_sets = getImageSets(duration, actions, storyboards, url_transformer);
}
let text_sets: TextSet[] = [];
if (caption_tracks && options?.captions_format) {
if ((options.captions_format as string) !== 'vtt' && (options.captions_format as string) !== 'ttml') {
throw new InnertubeError('Invalid captions format', options.captions_format);
}
text_sets = getTextSets(caption_tracks, options.captions_format, url_transformer);
}
const info : StreamingInfo = {
getDuration,
audio_sets,
video_sets,
image_sets
image_sets,
text_sets
};
return info;