feat: add qoi and qoa formats & refactor ffmpeg a bit

sets this up a bit to add special handling for formats
This commit is contained in:
Maya 2026-05-30 15:34:10 +03:00
parent efdc96e1c9
commit b6497de556
No known key found for this signature in database
8 changed files with 487 additions and 192 deletions

View File

@ -27,6 +27,7 @@
"overlayscrollbars": "^2.14.0",
"overlayscrollbars-svelte": "^0.5.5",
"p-queue": "^9.1.0",
"qoa-format": "^1.0.1",
"riff-file": "^1.0.3",
"sanitize-html": "^2.17.2",
"svelte-stripe": "^1.4.0",
@ -319,6 +320,10 @@
"@swc/wasm": ["@swc/wasm@1.13.5", "", {}, "sha512-ZBZcxieydxNwgEU9eFAXGMaDb1Xoh+ZkZcUQ27LNJzc2lPSByoL6CSVqnYiaVo+n9JgqbYyHlMq+i7z0wRNTfA=="],
"@thi.ng/bitstream": ["@thi.ng/bitstream@2.4.52", "", { "dependencies": { "@thi.ng/errors": "^2.6.14" } }, "sha512-uOOJ4QxJ8lK7juR2gGZHbISPsPGLp4w7PLFG7pTPG+t+8Hkanl3Syu3LMPq0DGJbAX7U5l1TrKvLoNeNta3OsA=="],
"@thi.ng/errors": ["@thi.ng/errors@2.6.14", "", {}, "sha512-dSqLPZh5wOe329Ks2pJqoDmtjSv2g4KpXEP5/IQ5J9qvrEyNrRBCuaKHUKYUltQ1OUHGd9L5hBqGJl65Hlnu+g=="],
"@tokenizer/inflate": ["@tokenizer/inflate@0.4.1", "", { "dependencies": { "debug": "^4.4.3", "token-types": "^6.1.1" } }, "sha512-2mAv+8pkG6GIZiF1kNg1jAjh27IDxEPKwdGul3snfztFerfPGI1LjDezZp3i7BElXompqEtPmoPx6c2wgtWsOA=="],
"@tokenizer/token": ["@tokenizer/token@0.3.0", "", {}, "sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A=="],
@ -759,6 +764,8 @@
"punycode": ["punycode@2.3.1", "", {}, "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg=="],
"qoa-format": ["qoa-format@1.0.1", "", { "dependencies": { "@thi.ng/bitstream": "^2.2.12" } }, "sha512-dMB0Z6XQjdpz/Cw4Rf6RiBpQvUSPCfYlQMWvmuWlWkAT7nDQD29cVZ1SwDUB6DYJSitHENwbt90lqfI+7bvMcw=="],
"queue-microtask": ["queue-microtask@1.2.3", "", {}, "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A=="],
"read-cache": ["read-cache@1.0.0", "", { "dependencies": { "pify": "^2.3.0" } }, "sha512-Owdv/Ft7IjOgm/i0xvNDZ1LrRANRfew4b2prF3OWMQLxLfu3bS8FVhCsrSCMK4lR56Y9ya+AThoTpDCTxCmpRA=="],

View File

@ -61,6 +61,7 @@
"overlayscrollbars": "^2.14.0",
"overlayscrollbars-svelte": "^0.5.5",
"p-queue": "^9.1.0",
"qoa-format": "^1.0.1",
"riff-file": "^1.0.3",
"sanitize-html": "^2.17.2",
"svelte-stripe": "^1.4.0",

View File

@ -14,6 +14,12 @@ import {
SAMPLE_RATES,
} from "./ffmpeg.codecs";
import { buildImageSequenceCommand } from "./ffmpeg.animated";
import {
ffprobeValue,
detectAudioBitrate,
detectAudioSampleRate,
} from "./utils/ffprobe";
import { extractAlbumArt, avWithArt, avWithBg } from "./utils/ffmpeg";
import type {
SettingDefinition,
ConversionSettings,
@ -40,6 +46,7 @@ export class FFmpegConverter extends Converter {
new FormatInfo("alac", true, true), // outputted as m4a
new FormatInfo("m4a", true, true), // can be alac
new FormatInfo("caf", true, false), // can be alac
new FormatInfo("qoa", true, true),
new FormatInfo("wma", true, true),
new FormatInfo("amr", true, true),
new FormatInfo("ac3", true, true),
@ -239,36 +246,62 @@ export class FFmpegConverter extends Converter {
ffmpeg.on("log", errorListener);
try {
const buf = new Uint8Array(await input.file.arrayBuffer());
let buf = new Uint8Array(await input.file.arrayBuffer());
if (input.from === ".qoa") {
const { decodeQoa, encodeWav } =
await import("$lib/util/parse/qoa");
const decoded = decodeQoa(buf);
buf = new Uint8Array(
encodeWav(
decoded.pcm,
decoded.sampleRate,
decoded.channels,
true,
),
);
}
await ffmpeg.writeFile("input", buf);
this.log(`wrote ${input.name} to ffmpeg virtual fs`);
const command = await this.buildConversionCommand(
const specialHandled = await handleSpecialOutput(
ffmpeg,
input,
to,
conversionSettings,
isAlac,
conversionError,
);
this.log(`FFmpeg command: ${command.join(" ")}`);
await ffmpeg.exec(command);
this.log("executed ffmpeg command");
if (specialHandled) {
return specialHandled;
} else {
const command = await this.buildConversionCommand(
ffmpeg,
input,
to,
conversionSettings,
isAlac,
);
this.log(`FFmpeg command: ${command.join(" ")}`);
await ffmpeg.exec(command);
this.log("executed ffmpeg command");
if (conversionError) throw new Error(conversionError);
if (conversionError) throw new Error(conversionError);
const output = (await ffmpeg.readFile(
"output" + to,
)) as unknown as Uint8Array;
const output = (await ffmpeg.readFile(
"output" + to,
)) as unknown as Uint8Array;
if (!output || output.length === 0)
throw new Error("empty file returned");
if (!output || output.length === 0)
throw new Error("empty file returned");
const outputFileName =
input.name.split(".").slice(0, -1).join(".") + to;
this.log(`read ${outputFileName} from ffmpeg virtual fs`);
const outputFileName =
input.name.split(".").slice(0, -1).join(".") + to;
this.log(`read ${outputFileName} from ffmpeg virtual fs`);
const outBuf = new Uint8Array(output).buffer.slice(0);
return new VertFile(new File([outBuf], outputFileName), to);
const outBuf = new Uint8Array(output).buffer.slice(0);
return new VertFile(new File([outBuf], outputFileName), to);
}
} finally {
ffmpeg.off("log", errorListener);
this.activeConversions.delete(input.id);
@ -315,82 +348,6 @@ export class FFmpegConverter extends Converter {
return ffmpeg;
}
private async detectAudioBitrate(ffmpeg: FFmpeg): Promise<number | null> {
const args = [
"-v",
"quiet",
"-select_streams",
"a:0",
"-show_entries",
"stream=bit_rate",
"-of",
"default=noprint_wrappers=1:nokey=1",
"input",
];
try {
let bitrate: number | null = null;
const bitrateListener = (event: { message: string }) => {
if (bitrate !== null) return;
const n = parseInt(event.message.trim(), 10);
if (!n) return;
bitrate = Math.round(n / 1000);
this.log(`Detected stream audio bitrate: ${bitrate} kbps`);
};
ffmpeg.on("log", bitrateListener);
try {
await ffmpeg.ffprobe.call(ffmpeg, args);
return bitrate;
} finally {
ffmpeg.off("log", bitrateListener);
}
} catch {
return null;
}
}
private async detectAudioSampleRate(
ffmpeg: FFmpeg,
): Promise<number | null> {
const args = [
"-v",
"quiet",
"-select_streams",
"a:0",
"-show_entries",
"stream=sample_rate",
"-of",
"default=noprint_wrappers=1:nokey=1",
"input",
];
try {
let sampleRate: number | null = null;
const sampleRateListener = (event: { message: string }) => {
if (sampleRate !== null) return;
const n = parseInt(event.message.trim(), 10);
if (!n) return;
sampleRate = n;
this.log(`Detected stream audio sample rate: ${sampleRate} Hz`);
};
ffmpeg.on("log", sampleRateListener);
try {
await ffmpeg.ffprobe.call(ffmpeg, args);
return sampleRate;
} finally {
ffmpeg.off("log", sampleRateListener);
}
} catch {
return null;
}
}
private async buildConversionCommand(
ffmpeg: FFmpeg,
input: VertFile,
@ -494,7 +451,7 @@ export class FFmpegConverter extends Converter {
`converting from lossless to lossy, using default audio bitrate: 128k`,
);
} else {
const inputBitrate = await this.detectAudioBitrate(ffmpeg);
const inputBitrate = await detectAudioBitrate(ffmpeg);
audioBitrateArgs = inputBitrate
? ["-b:a", `${inputBitrate}k`]
: [];
@ -519,7 +476,7 @@ export class FFmpegConverter extends Converter {
);
sampleRateArgs = ["-ar", defaultRate];
} else {
let inputSampleRate = await this.detectAudioSampleRate(ffmpeg);
let inputSampleRate = await detectAudioSampleRate(ffmpeg);
if (to === ".opus" && inputSampleRate === 44100) {
// special case: opus does not support 44100Hz which is more common - adjust to 48000Hz
this.log(
@ -582,56 +539,32 @@ export class FFmpegConverter extends Converter {
this.log(`Converting audio ${input.from} to video ${to}`);
const hasAlbumArt = keepMetadata
? await this.extractAlbumArt(ffmpeg)
? await extractAlbumArt(ffmpeg)
: false;
const codecArgs = toArgs(to, isAlac);
if (hasAlbumArt) {
this.log("Using album art as video background");
return [
"-loop",
"1",
"-i",
"cover.jpg",
"-i",
"input",
"-vf",
"scale=trunc(iw/2)*2:trunc(ih/2)*2",
"-shortest",
"-pix_fmt",
"yuv420p",
"-r",
"1",
...codecArgs,
...metadataArgs,
...audioBitrateArgs,
...sampleRateArgs,
...channelsArgs,
...tracksArgs,
"output" + to,
];
return avWithArt(
to,
codecArgs,
metadataArgs,
audioBitrateArgs,
sampleRateArgs,
channelsArgs,
tracksArgs,
);
} else {
this.log("Using solid color background");
return [
"-f",
"lavfi",
"-i",
"color=c=black:s=512x512:rate=1",
"-i",
"input",
"-shortest",
"-pix_fmt",
"yuv420p",
"-r",
"1",
...toArgs(to, isAlac),
...metadataArgs,
...audioBitrateArgs,
...sampleRateArgs,
...channelsArgs,
...tracksArgs,
"output" + to,
];
return avWithBg(
to,
toArgs(to, isAlac),
metadataArgs,
audioBitrateArgs,
sampleRateArgs,
channelsArgs,
tracksArgs,
);
}
}
@ -654,57 +587,97 @@ export class FFmpegConverter extends Converter {
"output" + to,
];
}
private async extractAlbumArt(ffmpeg: FFmpeg): Promise<boolean> {
// extract using stream mapping (should work for most)
if (
await this.tryExtractAlbumArt(ffmpeg, [
"-i",
"input",
"-map",
"0:1",
"-c:v",
"copy",
"-update",
"1",
"cover.jpg",
])
) {
this.log("Successfully extracted album art from stream 0:1");
return true;
}
// fallback: extract without stream mapping (this probably won't happen)
if (
await this.tryExtractAlbumArt(ffmpeg, [
"-i",
"input",
"-an",
"-c:v",
"copy",
"-update",
"1",
"cover.jpg",
])
) {
this.log("Successfully extracted album art (fallback method)");
return true;
}
this.log("No album art found, will create solid color background");
return false;
}
private async tryExtractAlbumArt(
ffmpeg: FFmpeg,
command: string[],
): Promise<boolean> {
try {
await ffmpeg.exec(command);
const coverData = await ffmpeg.readFile("cover.jpg");
return !!(coverData && (coverData as Uint8Array).length > 0);
} catch {
return false;
}
}
}
// const handleSpecialInput = async (
// ffmpeg: FFmpeg,
// input: VertFile,
// ): Promise<VertFile | null> => {
//
// }
const handleSpecialOutput = async (
ffmpeg: FFmpeg,
input: VertFile,
to: string,
conversionSettings: ConversionSettings,
conversionError: string | null,
): Promise<VertFile | null> => {
if (to === ".qoa") {
let sampleRate: number | null = null;
if (
conversionSettings.sampleRate &&
conversionSettings.sampleRate !== "auto"
) {
sampleRate =
conversionSettings.sampleRate === "custom"
? (conversionSettings.customSampleRate as number)
: (conversionSettings.sampleRate as number);
} else {
const args = [
"-v",
"quiet",
"-select_streams",
"a:0",
"-show_entries",
"stream=sample_rate",
"-of",
"default=noprint_wrappers=1:nokey=1",
"input",
];
const probed = await ffprobeValue(ffmpeg, args, (s) => {
const n = parseInt(s, 10);
return Number.isFinite(n) ? n : null;
});
sampleRate = probed ?? 48000;
}
let channels = 2;
if (
conversionSettings.channels &&
conversionSettings.channels !== "auto"
)
channels = conversionSettings.channels as number;
const pcmArgs = [
"-i",
"input",
"-f",
"f32le",
"-ar",
String(sampleRate),
"-ac",
String(channels),
"-c:a",
"pcm_f32le",
"output.raw",
];
await ffmpeg.exec(pcmArgs);
if (conversionError) throw new Error(conversionError);
const pcmRaw = (await ffmpeg.readFile(
"output.raw",
)) as unknown as Uint8Array;
const { encodeQoa } = await import("$lib/util/parse/qoa");
const qoaBytes = encodeQoa(
new Uint8Array(pcmRaw),
sampleRate!,
channels,
);
const outputFileName =
input.name.split(".").slice(0, -1).join(".") + ".qoa";
return new VertFile(
new File([new Uint8Array(qoaBytes)], outputFileName),
".qoa",
);
}
// if (whatever other formats need special parsing)
return null;
};
/* probeFfprobeValue moved to ./ffprobe.ts */

View File

@ -0,0 +1,115 @@
import type { FFmpeg } from "@ffmpeg/ffmpeg";
const tryExtractAlbumArt = async (
ffmpeg: FFmpeg,
command: string[],
): Promise<boolean> => {
try {
await ffmpeg.exec(command);
const coverData = await ffmpeg.readFile("cover.jpg");
return !!(coverData && (coverData as Uint8Array).length > 0);
} catch {
return false;
}
};
export const extractAlbumArt = async (ffmpeg: FFmpeg): Promise<boolean> => {
if (
await tryExtractAlbumArt(ffmpeg, [
"-i",
"input",
"-map",
"0:1",
"-c:v",
"copy",
"-update",
"1",
"cover.jpg",
])
) {
return true;
}
if (
await tryExtractAlbumArt(ffmpeg, [
"-i",
"input",
"-an",
"-c:v",
"copy",
"-update",
"1",
"cover.jpg",
])
) {
return true;
}
return false;
};
// audo -> video with album art
export const avWithArt = (
to: string,
codecArgs: string[],
metadataArgs: string[],
audioBitrateArgs: string[],
sampleRateArgs: string[],
channelsArgs: string[],
tracksArgs: string[],
): string[] => {
return [
"-loop",
"1",
"-i",
"cover.jpg",
"-i",
"input",
"-vf",
"scale=trunc(iw/2)*2:trunc(ih/2)*2",
"-shortest",
"-pix_fmt",
"yuv420p",
"-r",
"1",
...codecArgs,
...metadataArgs,
...audioBitrateArgs,
...sampleRateArgs,
...channelsArgs,
...tracksArgs,
"output" + to,
];
};
// audio -> video with solid color bg
export const avWithBg = (
to: string,
codecArgs: string[],
metadataArgs: string[],
audioBitrateArgs: string[],
sampleRateArgs: string[],
channelsArgs: string[],
tracksArgs: string[],
): string[] => {
return [
"-f",
"lavfi",
"-i",
"color=c=black:s=512x512:rate=1",
"-i",
"input",
"-shortest",
"-pix_fmt",
"yuv420p",
"-r",
"1",
...codecArgs,
...metadataArgs,
...audioBitrateArgs,
...sampleRateArgs,
...channelsArgs,
...tracksArgs,
"output" + to,
];
};

View File

@ -0,0 +1,70 @@
import type { FFmpeg } from "@ffmpeg/ffmpeg";
// captures the first numeric value emitted while executing ffprobe lol
export const ffprobeValue = async (
ffmpeg: FFmpeg,
targs: string[],
parse?: (s: string) => number | null,
): Promise<number | null> => {
let value: number | null = null;
const listener = (event: { message: string }) => {
if (value !== null) return;
const parsed = parse
? parse(event.message.trim())
: parseInt(event.message.trim(), 10);
if (!parsed && parsed !== 0) return;
value = parsed as number;
};
ffmpeg.on("log", listener);
try {
await ffmpeg.ffprobe.call(ffmpeg, targs);
return value;
} catch {
return null;
} finally {
ffmpeg.off("log", listener);
}
};
export const detectAudioBitrate = async (
ffmpeg: FFmpeg,
): Promise<number | null> => {
const args = [
"-v",
"quiet",
"-select_streams",
"a:0",
"-show_entries",
"stream=bit_rate",
"-of",
"default=noprint_wrappers=1:nokey=1",
"input",
];
return await ffprobeValue(ffmpeg, args, (s) => {
const n = parseInt(s, 10);
return Number.isFinite(n) ? Math.round(n / 1000) : null;
});
};
export const detectAudioSampleRate = async (
ffmpeg: FFmpeg,
): Promise<number | null> => {
const args = [
"-v",
"quiet",
"-select_streams",
"a:0",
"-show_entries",
"stream=sample_rate",
"-of",
"default=noprint_wrappers=1:nokey=1",
"input",
];
return await ffprobeValue(ffmpeg, args, (s) => {
const n = parseInt(s, 10);
return Number.isFinite(n) ? n : null;
});
};

View File

@ -53,6 +53,7 @@ export class MagickConverter extends Converter {
new FormatInfo("psd", true, true),
new FormatInfo("dcm", true, false),
new FormatInfo("qoi", true, false),
// raw camera formats
new FormatInfo("arw", true, false),

127
src/lib/util/parse/qoa.ts Normal file
View File

@ -0,0 +1,127 @@
// @ts-expect-error - no types for qoa-format
import { encode, decode } from "qoa-format";
export type QoaPcmData = {
sampleRate: number;
channels: number;
pcm: Uint8Array;
};
// decodes to PCM
export const decodeQoa = (input: Uint8Array): QoaPcmData => {
const decoded = decode(input);
const pcm = interleaveChannelData(decoded.channelData);
return {
sampleRate: decoded.sampleRate,
channels: decoded.channels,
pcm,
};
};
// encodes from PCM
export const encodeQoa = (
pcm: Uint8Array,
sampleRate: number,
channels: number,
): Uint8Array => {
const channelData = deinterleavePcm(pcm, channels);
return encode({ sampleRate, channelData });
};
// encodes from PCM to WAV
export const encodeWav = (
pcm: Uint8Array,
sampleRate: number,
channels: number,
float32 = true,
): Uint8Array => {
// WAV header (RIFF) for IEEE float (format 3) or PCM (format 1)
const bitsPerSample = float32 ? 32 : 16;
const bytesPerSample = bitsPerSample / 8;
const blockAlign = channels * bytesPerSample;
const byteRate = sampleRate * blockAlign;
const dataLength = pcm.byteLength;
const headerSize = 44;
const buffer = new ArrayBuffer(headerSize + dataLength);
const view = new DataView(buffer);
let offset = 0;
// RIFF identifier
writeString(view, offset, "RIFF");
offset += 4;
view.setUint32(offset, 36 + dataLength, true); // file length - 8
offset += 4;
writeString(view, offset, "WAVE");
offset += 4;
// fmt chunk
writeString(view, offset, "fmt ");
offset += 4;
view.setUint32(offset, 16, true); // fmt chunk length
offset += 4;
view.setUint16(offset, float32 ? 3 : 1, true); // audio format: 3 = IEEE float, 1 = PCM
offset += 2;
view.setUint16(offset, channels, true);
offset += 2;
view.setUint32(offset, sampleRate, true);
offset += 4;
view.setUint32(offset, byteRate, true);
offset += 4;
view.setUint16(offset, blockAlign, true);
offset += 2;
view.setUint16(offset, bitsPerSample, true);
offset += 2;
// data chunk
writeString(view, offset, "data");
offset += 4;
view.setUint32(offset, dataLength, true);
offset += 4;
// copy pcm bytes
const uint8 = new Uint8Array(buffer);
uint8.set(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength), headerSize);
return uint8;
};
const writeString = (view: DataView, offset: number, str: string) => {
for (let i = 0; i < str.length; i++) view.setUint8(offset + i, str.charCodeAt(i));
};
const interleaveChannelData = (channelData: Float32Array[]) => {
const channels = channelData.length;
const samples = channelData[0]?.length ?? 0;
const interleaved = new Float32Array(samples * channels);
for (let sample = 0; sample < samples; sample++) {
for (let channel = 0; channel < channels; channel++) {
interleaved[sample * channels + channel] =
channelData[channel]?.[sample] ?? 0;
}
}
return new Uint8Array(interleaved.buffer);
};
const deinterleavePcm = (pcm: Uint8Array, channels: number) => {
const floatPcm = new Float32Array(
pcm.buffer.slice(pcm.byteOffset, pcm.byteOffset + pcm.byteLength),
);
const samples = Math.floor(floatPcm.length / channels);
const channelData = Array.from(
{ length: channels },
() => new Float32Array(samples),
);
for (let sample = 0; sample < samples; sample++) {
for (let channel = 0; channel < channels; channel++) {
channelData[channel][sample] =
floatPcm[sample * channels + channel] ?? 0;
}
}
return channelData;
};

View File

@ -170,7 +170,8 @@ const handleSpecialInput = async (
);
}
}
// else if (whatever other formats need special parsing)
// if (whatever other formats need special parsing)
return null;
};