diff --git a/src/lib/assets/avatars/liam.jpg b/src/lib/assets/avatars/liam.jpg new file mode 100644 index 0000000..5f4e2b6 Binary files /dev/null and b/src/lib/assets/avatars/liam.jpg differ diff --git a/src/lib/converters/pandoc.svelte.ts b/src/lib/converters/pandoc.svelte.ts new file mode 100644 index 0000000..329b2c6 --- /dev/null +++ b/src/lib/converters/pandoc.svelte.ts @@ -0,0 +1,91 @@ +import { VertFile } from "$lib/types"; +import { Converter } from "./converter.svelte"; +import { browser } from "$app/environment"; +import PandocWorker from "$lib/workers/pandoc?worker"; + +export class PandocConverter extends Converter { + public name = "pandoc"; + public ready = $state(false); + public wasm: ArrayBuffer = null!; + + constructor() { + super(); + if (!browser) return; + (async () => { + this.wasm = await fetch("/pandoc.wasm").then((r) => + r.arrayBuffer(), + ); + this.ready = true; + })(); + } + + public async convert(input: VertFile, to: string): Promise { + const worker = new PandocWorker(); + worker.postMessage({ type: "load", wasm: this.wasm }); + await waitForMessage(worker, "loaded"); + worker.postMessage({ + type: "convert", + to, + file: input.file, + }); + const result = await waitForMessage(worker); + if (result.type === "error") { + worker.terminate(); + // throw new Error(result.error); + switch (result.errorKind) { + case "PandocUnknownReaderError": { + throw new Error( + `${input.from} is not a supported input format for documents.`, + ); + } + + case "PandocUnknownWriterError": { + throw new Error( + `${to} is not a supported output format for documents.`, + ); + } + + default: + throw new Error(`[${result.errorKind}] ${result.error}`); + } + } + worker.terminate(); + if (!to.startsWith(".")) to = `.${to}`; + return new VertFile(new File([result.output], input.name), to); + } + + // public name = "pandoc"; + // public ready = $state(false); + // public wasm: ArrayBuffer = null!; + + public supportedFormats = [ + ".docx", + ".md", + ".rtf", + ".doc", + ".csv", + ".tsv", + ".json", + ".rst", + ".epub", + ".odt", + ".docbook", + ".html", + ]; +} + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +function waitForMessage(worker: Worker, type?: string): Promise { + return new Promise((resolve) => { + const onMessage = (e: MessageEvent) => { + if (type && e.data.type === type) { + worker.removeEventListener("message", onMessage); + resolve(e.data); + } else { + worker.removeEventListener("message", onMessage); + resolve(e.data); + } + }; + worker.addEventListener("message", onMessage); + }); +} diff --git a/src/lib/workers/pandoc.ts b/src/lib/workers/pandoc.ts new file mode 100644 index 0000000..804b886 --- /dev/null +++ b/src/lib/workers/pandoc.ts @@ -0,0 +1,189 @@ +import * as wasiShim from "@bjorn3/browser_wasi_shim"; + +self.onmessage = async (e) => { + const message = e.data; + try { + const res = await handleMessage(message); + if (!res) return; + self.postMessage({ + ...res, + id: message.id, + }); + } catch (e) { + self.postMessage({ + type: "error", + error: e, + id: message.id, + }); + } +}; + +let wasm: ArrayBuffer = null!; + +type Format = + | ".md" + | ".docx" + | ".csv" + | ".tsv" + | ".json" + | ".doc" + | ".rtf" + | ".rst" + | ".epub" + | ".odt" + | ".docbook" + | ".html" + | ".markdown"; + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +const handleMessage = async (message: any): Promise => { + switch (message.type) { + case "load": { + wasm = message.wasm; + postMessage({ type: "loaded" }); + break; + } + + case "convert": { + try { + // eslint-disable-next-line prefer-const + let { to, file }: { to: Format; file: File } = message; + const buf = new Uint8Array(await file.arrayBuffer()); + const args = `-f ${formatToReader(`.${file.name.split(".").pop() || ""}` as Format)} -t ${formatToReader(to)}`; + const [result, stderr] = await pandoc(args, buf); + if (stderr) { + return { + type: "error", + error: stderr + .replaceAll("\\n", "\n") + .replaceAll('\\"', '"') + .split('"') + .slice(1, -1) + .join('"'), + errorKind: stderr.split(" ")[0], + }; + } + return { + type: "finished", + output: result, + }; + } catch (e) { + console.error(e); + return { type: "error", error: e }; + } + } + } +}; + +const formatToReader = (format: Format): string => { + switch (format) { + case ".md": + case ".markdown": + return "markdown"; + case ".doc": + case ".docx": + return "docx"; + case ".csv": + return "csv"; + case ".tsv": + return "tsv"; + case ".docbook": + return "docbook"; + case ".epub": + return "epub"; + case ".html": + return "html"; + case ".json": + return "json"; + case ".odt": + return "odt"; + case ".rtf": + return "rtf"; + case ".rst": + return "rst"; + } + + throw new Error(`Unsupported format: ${format}`); +}; + +async function pandoc( + args_str: string, + in_data: Uint8Array, +): Promise<[Uint8Array, string]> { + if (!wasm) throw new Error("WASM not loaded"); + let stderr = ""; + const args = ["pandoc.wasm", "+RTS", "-H64m", "-RTS"]; + const env: string[] = []; + const in_file = new wasiShim.File(in_data, { + readonly: true, + }); + const out_file = new wasiShim.File(new Uint8Array(), { + readonly: false, + }); + const map = new Map([ + ["in", in_file], + ["out", out_file], + ]); + const fds = [ + new wasiShim.OpenFile( + new wasiShim.File(new Uint8Array(), { readonly: true }), + ), + wasiShim.ConsoleStdout.lineBuffered((msg) => { + console.log(`[WASI stdout] ${msg}`); + }), + wasiShim.ConsoleStdout.lineBuffered((msg) => { + console.warn(`[WASI stderr] ${msg}`); + stderr += msg + "\n"; + }), + new wasiShim.PreopenDirectory("/", map), + new wasiShim.PreopenDirectory("/tmp", new Map()), + ]; + + const wasi = new wasiShim.WASI(args, env, fds, { debug: true }); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const { instance }: { instance: any } = await WebAssembly.instantiate( + wasm, + { + wasi_snapshot_preview1: wasi.wasiImport, + }, + ); + + wasi.initialize(instance); + + instance.exports.__wasm_call_ctors(); + + function memory_data_view() { + return new DataView(instance.exports.memory.buffer); + } + + const argc_ptr = instance.exports.malloc(4); + memory_data_view().setUint32(argc_ptr, args.length, true); + const argv = instance.exports.malloc(4 * (args.length + 1)); + for (let i = 0; i < args.length; ++i) { + const arg = instance.exports.malloc(args[i].length + 1); + new TextEncoder().encodeInto( + args[i], + new Uint8Array(instance.exports.memory.buffer, arg, args[i].length), + ); + memory_data_view().setUint8(arg + args[i].length, 0); + memory_data_view().setUint32(argv + 4 * i, arg, true); + } + memory_data_view().setUint32(argv + 4 * args.length, 0, true); + const argv_ptr = instance.exports.malloc(4); + memory_data_view().setUint32(argv_ptr, argv, true); + + instance.exports.hs_init_with_rtsopts(argc_ptr, argv_ptr); + + const args_ptr = instance.exports.malloc(args_str.length); + new TextEncoder().encodeInto( + args_str, + new Uint8Array( + instance.exports.memory.buffer, + args_ptr, + args_str.length, + ), + ); + + instance.exports.wasm_main(args_ptr, args_str.length); + return [out_file.data, stderr]; +} diff --git a/static/pandoc.wasm b/static/pandoc.wasm new file mode 100644 index 0000000..36321b5 Binary files /dev/null and b/static/pandoc.wasm differ