mirror of https://github.com/VERT-sh/VERT.git
feat: documents support
This commit is contained in:
parent
6223e9f20b
commit
a84837cff4
Binary file not shown.
After Width: | Height: | Size: 86 KiB |
|
@ -0,0 +1,91 @@
|
|||
import { VertFile } from "$lib/types";
|
||||
import { Converter } from "./converter.svelte";
|
||||
import { browser } from "$app/environment";
|
||||
import PandocWorker from "$lib/workers/pandoc?worker";
|
||||
|
||||
export class PandocConverter extends Converter {
|
||||
public name = "pandoc";
|
||||
public ready = $state(false);
|
||||
public wasm: ArrayBuffer = null!;
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
if (!browser) return;
|
||||
(async () => {
|
||||
this.wasm = await fetch("/pandoc.wasm").then((r) =>
|
||||
r.arrayBuffer(),
|
||||
);
|
||||
this.ready = true;
|
||||
})();
|
||||
}
|
||||
|
||||
public async convert(input: VertFile, to: string): Promise<VertFile> {
|
||||
const worker = new PandocWorker();
|
||||
worker.postMessage({ type: "load", wasm: this.wasm });
|
||||
await waitForMessage(worker, "loaded");
|
||||
worker.postMessage({
|
||||
type: "convert",
|
||||
to,
|
||||
file: input.file,
|
||||
});
|
||||
const result = await waitForMessage(worker);
|
||||
if (result.type === "error") {
|
||||
worker.terminate();
|
||||
// throw new Error(result.error);
|
||||
switch (result.errorKind) {
|
||||
case "PandocUnknownReaderError": {
|
||||
throw new Error(
|
||||
`${input.from} is not a supported input format for documents.`,
|
||||
);
|
||||
}
|
||||
|
||||
case "PandocUnknownWriterError": {
|
||||
throw new Error(
|
||||
`${to} is not a supported output format for documents.`,
|
||||
);
|
||||
}
|
||||
|
||||
default:
|
||||
throw new Error(`[${result.errorKind}] ${result.error}`);
|
||||
}
|
||||
}
|
||||
worker.terminate();
|
||||
if (!to.startsWith(".")) to = `.${to}`;
|
||||
return new VertFile(new File([result.output], input.name), to);
|
||||
}
|
||||
|
||||
// public name = "pandoc";
|
||||
// public ready = $state(false);
|
||||
// public wasm: ArrayBuffer = null!;
|
||||
|
||||
public supportedFormats = [
|
||||
".docx",
|
||||
".md",
|
||||
".rtf",
|
||||
".doc",
|
||||
".csv",
|
||||
".tsv",
|
||||
".json",
|
||||
".rst",
|
||||
".epub",
|
||||
".odt",
|
||||
".docbook",
|
||||
".html",
|
||||
];
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
function waitForMessage(worker: Worker, type?: string): Promise<any> {
|
||||
return new Promise((resolve) => {
|
||||
const onMessage = (e: MessageEvent) => {
|
||||
if (type && e.data.type === type) {
|
||||
worker.removeEventListener("message", onMessage);
|
||||
resolve(e.data);
|
||||
} else {
|
||||
worker.removeEventListener("message", onMessage);
|
||||
resolve(e.data);
|
||||
}
|
||||
};
|
||||
worker.addEventListener("message", onMessage);
|
||||
});
|
||||
}
|
|
@ -0,0 +1,189 @@
|
|||
import * as wasiShim from "@bjorn3/browser_wasi_shim";
|
||||
|
||||
self.onmessage = async (e) => {
|
||||
const message = e.data;
|
||||
try {
|
||||
const res = await handleMessage(message);
|
||||
if (!res) return;
|
||||
self.postMessage({
|
||||
...res,
|
||||
id: message.id,
|
||||
});
|
||||
} catch (e) {
|
||||
self.postMessage({
|
||||
type: "error",
|
||||
error: e,
|
||||
id: message.id,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
let wasm: ArrayBuffer = null!;
|
||||
|
||||
type Format =
|
||||
| ".md"
|
||||
| ".docx"
|
||||
| ".csv"
|
||||
| ".tsv"
|
||||
| ".json"
|
||||
| ".doc"
|
||||
| ".rtf"
|
||||
| ".rst"
|
||||
| ".epub"
|
||||
| ".odt"
|
||||
| ".docbook"
|
||||
| ".html"
|
||||
| ".markdown";
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const handleMessage = async (message: any): Promise<any> => {
|
||||
switch (message.type) {
|
||||
case "load": {
|
||||
wasm = message.wasm;
|
||||
postMessage({ type: "loaded" });
|
||||
break;
|
||||
}
|
||||
|
||||
case "convert": {
|
||||
try {
|
||||
// eslint-disable-next-line prefer-const
|
||||
let { to, file }: { to: Format; file: File } = message;
|
||||
const buf = new Uint8Array(await file.arrayBuffer());
|
||||
const args = `-f ${formatToReader(`.${file.name.split(".").pop() || ""}` as Format)} -t ${formatToReader(to)}`;
|
||||
const [result, stderr] = await pandoc(args, buf);
|
||||
if (stderr) {
|
||||
return {
|
||||
type: "error",
|
||||
error: stderr
|
||||
.replaceAll("\\n", "\n")
|
||||
.replaceAll('\\"', '"')
|
||||
.split('"')
|
||||
.slice(1, -1)
|
||||
.join('"'),
|
||||
errorKind: stderr.split(" ")[0],
|
||||
};
|
||||
}
|
||||
return {
|
||||
type: "finished",
|
||||
output: result,
|
||||
};
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
return { type: "error", error: e };
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const formatToReader = (format: Format): string => {
|
||||
switch (format) {
|
||||
case ".md":
|
||||
case ".markdown":
|
||||
return "markdown";
|
||||
case ".doc":
|
||||
case ".docx":
|
||||
return "docx";
|
||||
case ".csv":
|
||||
return "csv";
|
||||
case ".tsv":
|
||||
return "tsv";
|
||||
case ".docbook":
|
||||
return "docbook";
|
||||
case ".epub":
|
||||
return "epub";
|
||||
case ".html":
|
||||
return "html";
|
||||
case ".json":
|
||||
return "json";
|
||||
case ".odt":
|
||||
return "odt";
|
||||
case ".rtf":
|
||||
return "rtf";
|
||||
case ".rst":
|
||||
return "rst";
|
||||
}
|
||||
|
||||
throw new Error(`Unsupported format: ${format}`);
|
||||
};
|
||||
|
||||
async function pandoc(
|
||||
args_str: string,
|
||||
in_data: Uint8Array,
|
||||
): Promise<[Uint8Array, string]> {
|
||||
if (!wasm) throw new Error("WASM not loaded");
|
||||
let stderr = "";
|
||||
const args = ["pandoc.wasm", "+RTS", "-H64m", "-RTS"];
|
||||
const env: string[] = [];
|
||||
const in_file = new wasiShim.File(in_data, {
|
||||
readonly: true,
|
||||
});
|
||||
const out_file = new wasiShim.File(new Uint8Array(), {
|
||||
readonly: false,
|
||||
});
|
||||
const map = new Map<string, wasiShim.File>([
|
||||
["in", in_file],
|
||||
["out", out_file],
|
||||
]);
|
||||
const fds = [
|
||||
new wasiShim.OpenFile(
|
||||
new wasiShim.File(new Uint8Array(), { readonly: true }),
|
||||
),
|
||||
wasiShim.ConsoleStdout.lineBuffered((msg) => {
|
||||
console.log(`[WASI stdout] ${msg}`);
|
||||
}),
|
||||
wasiShim.ConsoleStdout.lineBuffered((msg) => {
|
||||
console.warn(`[WASI stderr] ${msg}`);
|
||||
stderr += msg + "\n";
|
||||
}),
|
||||
new wasiShim.PreopenDirectory("/", map),
|
||||
new wasiShim.PreopenDirectory("/tmp", new Map()),
|
||||
];
|
||||
|
||||
const wasi = new wasiShim.WASI(args, env, fds, { debug: true });
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const { instance }: { instance: any } = await WebAssembly.instantiate(
|
||||
wasm,
|
||||
{
|
||||
wasi_snapshot_preview1: wasi.wasiImport,
|
||||
},
|
||||
);
|
||||
|
||||
wasi.initialize(instance);
|
||||
|
||||
instance.exports.__wasm_call_ctors();
|
||||
|
||||
function memory_data_view() {
|
||||
return new DataView(instance.exports.memory.buffer);
|
||||
}
|
||||
|
||||
const argc_ptr = instance.exports.malloc(4);
|
||||
memory_data_view().setUint32(argc_ptr, args.length, true);
|
||||
const argv = instance.exports.malloc(4 * (args.length + 1));
|
||||
for (let i = 0; i < args.length; ++i) {
|
||||
const arg = instance.exports.malloc(args[i].length + 1);
|
||||
new TextEncoder().encodeInto(
|
||||
args[i],
|
||||
new Uint8Array(instance.exports.memory.buffer, arg, args[i].length),
|
||||
);
|
||||
memory_data_view().setUint8(arg + args[i].length, 0);
|
||||
memory_data_view().setUint32(argv + 4 * i, arg, true);
|
||||
}
|
||||
memory_data_view().setUint32(argv + 4 * args.length, 0, true);
|
||||
const argv_ptr = instance.exports.malloc(4);
|
||||
memory_data_view().setUint32(argv_ptr, argv, true);
|
||||
|
||||
instance.exports.hs_init_with_rtsopts(argc_ptr, argv_ptr);
|
||||
|
||||
const args_ptr = instance.exports.malloc(args_str.length);
|
||||
new TextEncoder().encodeInto(
|
||||
args_str,
|
||||
new Uint8Array(
|
||||
instance.exports.memory.buffer,
|
||||
args_ptr,
|
||||
args_str.length,
|
||||
),
|
||||
);
|
||||
|
||||
instance.exports.wasm_main(args_ptr, args_str.length);
|
||||
return [out_file.data, stderr];
|
||||
}
|
Binary file not shown.
Loading…
Reference in New Issue