mirror of https://github.com/garrytan/gstack.git
420 lines
14 KiB
TypeScript
420 lines
14 KiB
TypeScript
/**
|
|
* CLI-side client for the design daemon.
|
|
*
|
|
* Responsible for the lifecycle dance that `$D compare --serve` (default
|
|
* path) goes through:
|
|
*
|
|
* ensureDaemon() → publishBoard(html, opts) → openBrowser(url) → exit 0
|
|
*
|
|
* Mirrors browse/src/cli.ts:317-415 — same health-check-first attach
|
|
* decision, same fs.openSync('wx') lock, same re-read-under-lock guard.
|
|
* Adds two design-specific safety properties Codex flagged on the daemon
|
|
* plan:
|
|
*
|
|
* 1. Identity verification before any SIGTERM. Browse signals on PID
|
|
* alone; here we require the cmdline to contain CMDLINE_MARKER so a
|
|
* stale state file pointing at a reused PID doesn't kill an
|
|
* unrelated process.
|
|
*
|
|
* 2. Refuse-to-kill on version mismatch with active boards. Browse will
|
|
* restart on version drift; here in-memory boards would be lost, so
|
|
* we exit 1 with a user-actionable message instead of silent loss.
|
|
*
|
|
* Spawn uses Node's child_process.spawn with detached: true + stdio
|
|
* pointed at a log file. Bun.spawn().unref() has macOS session-detach
|
|
* quirks browse already discovered (browse/src/cli.ts:225-275).
|
|
*/
|
|
|
|
import { spawn as nodeSpawn } from "child_process";
|
|
import fs from "fs";
|
|
import path from "path";
|
|
import { setTimeout as delay } from "timers/promises";
|
|
|
|
import {
|
|
acquireLock,
|
|
CMDLINE_MARKER,
|
|
healthCheck,
|
|
isProcessAlive,
|
|
readStateFile,
|
|
readVersionString,
|
|
resolveLockFilePath,
|
|
resolveStartupLogPath,
|
|
resolveStateFilePath,
|
|
verifyIdentity,
|
|
} from "./daemon-state";
|
|
|
|
const MAX_START_WAIT_MS = parseInt(
|
|
process.env.DESIGN_DAEMON_START_TIMEOUT_MS || "8000",
|
|
10,
|
|
);
|
|
const POLL_INTERVAL_MS = 100;
|
|
const SIGTERM_GRACE_MS = 2000;
|
|
|
|
export interface EnsureDaemonOptions {
|
|
/** Default: package version. Used for version-match check. */
|
|
version?: string;
|
|
/** Default: `<repo>/design/src/daemon.ts`. */
|
|
daemonScript?: string;
|
|
/** Extra env vars passed to the spawned daemon. */
|
|
daemonEnv?: Record<string, string>;
|
|
/** Print noisy progress to stderr. Default true. */
|
|
verbose?: boolean;
|
|
/**
|
|
* Override the state-file path. Default: resolveStateFilePath() (env
|
|
* DESIGN_DAEMON_STATE_FILE or .gstack/design.json under the git root /
|
|
* cwd). Tests inject a per-test path; the same path is forwarded to the
|
|
* spawned daemon via env so client + daemon agree.
|
|
*/
|
|
stateFile?: string;
|
|
}
|
|
|
|
export interface EnsureDaemonResult {
|
|
port: number;
|
|
version: string;
|
|
spawned: boolean;
|
|
}
|
|
|
|
function log(verbose: boolean, msg: string): void {
|
|
if (verbose) process.stderr.write(`[design-daemon] ${msg}\n`);
|
|
}
|
|
|
|
/**
|
|
* Ensure a design daemon is reachable on the project's state file. Returns
|
|
* the port to talk to. Spawns a new daemon under an exclusive lock when
|
|
* needed; attaches to an existing healthy daemon otherwise.
|
|
*
|
|
* Exits with code 1 (not throws) on the refuse-kill-with-active-boards
|
|
* branch — that's a user-actionable situation, not a programming error.
|
|
*/
|
|
export async function ensureDaemon(
|
|
opts: EnsureDaemonOptions = {},
|
|
): Promise<EnsureDaemonResult> {
|
|
const verbose = opts.verbose !== false;
|
|
const expectedVersion = opts.version ?? readPackageVersion();
|
|
const stateFile = opts.stateFile ?? resolveStateFilePath();
|
|
|
|
const existing = readStateFile(stateFile);
|
|
if (existing) {
|
|
const health = await healthCheck(existing.port);
|
|
if (health) {
|
|
if (health.version === expectedVersion) {
|
|
log(verbose, `attached to existing daemon pid=${existing.pid} port=${existing.port}`);
|
|
return { port: existing.port, version: health.version, spawned: false };
|
|
}
|
|
// Version mismatch: refuse if active boards exist (Codex finding).
|
|
if (health.activeBoards > 0) {
|
|
process.stderr.write(
|
|
`[design-daemon] WARNING: existing daemon is gstack ${health.version}; this CLI is ${expectedVersion}.\n` +
|
|
`[design-daemon] ${health.activeBoards} active board(s) detected. Refusing to auto-kill.\n` +
|
|
`[design-daemon] Submit or close the open boards, then re-run.\n` +
|
|
`[design-daemon] Or force restart: $D daemon stop (will drop in-memory history).\n`,
|
|
);
|
|
process.exit(1);
|
|
}
|
|
// No active boards — safe to graceful-shutdown and respawn.
|
|
log(verbose, `daemon version mismatch (${health.version} vs ${expectedVersion}); shutting down`);
|
|
await gracefulShutdownExistingDaemon(existing.port);
|
|
await killByPidWithIdentity(existing.pid, existing.cmdlineMarker, verbose);
|
|
} else {
|
|
// State file points at an unresponsive port. Either the daemon
|
|
// crashed or the PID got reused. Identity-verify before any SIGTERM
|
|
// so we don't kill an unrelated process (Codex finding).
|
|
log(verbose, `state file present (pid=${existing.pid}) but /health unresponsive`);
|
|
await killByPidWithIdentity(existing.pid, existing.cmdlineMarker, verbose);
|
|
}
|
|
}
|
|
|
|
// Spawn under exclusive lock; re-read state INSIDE the lock so we don't
|
|
// race a concurrent CLI that won the lock first.
|
|
const lockPath = resolveLockFilePath(stateFile);
|
|
const release = acquireLock(lockPath);
|
|
if (!release) {
|
|
// Another process is starting the daemon. Wait for it.
|
|
log(verbose, "another CLI is spawning the daemon; waiting…");
|
|
const start = Date.now();
|
|
while (Date.now() - start < MAX_START_WAIT_MS) {
|
|
const fresh = readStateFile(stateFile);
|
|
if (fresh) {
|
|
const h = await healthCheck(fresh.port);
|
|
if (h) return { port: fresh.port, version: h.version, spawned: false };
|
|
}
|
|
await delay(POLL_INTERVAL_MS);
|
|
}
|
|
throw new Error("Timed out waiting for concurrent daemon spawn");
|
|
}
|
|
|
|
try {
|
|
// Re-read under lock. Another caller may have already finished spawning
|
|
// between our first check and our lock acquisition.
|
|
const fresh = readStateFile(stateFile);
|
|
if (fresh) {
|
|
const h = await healthCheck(fresh.port);
|
|
if (h && h.version === expectedVersion) {
|
|
log(verbose, `another CLI won the lock; attaching pid=${fresh.pid} port=${fresh.port}`);
|
|
return { port: fresh.port, version: h.version, spawned: false };
|
|
}
|
|
}
|
|
|
|
log(verbose, "spawning new daemon");
|
|
const port = await spawnDaemon({
|
|
script: opts.daemonScript,
|
|
env: { ...opts.daemonEnv, DESIGN_DAEMON_STATE_FILE: stateFile },
|
|
stateFile,
|
|
expectedVersion,
|
|
});
|
|
return { port, version: expectedVersion, spawned: true };
|
|
} finally {
|
|
release();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Publish a board to the daemon and return its URL. Wraps the HTTP POST
|
|
* with a friendlier error surface than raw fetch.
|
|
*/
|
|
export interface PublishBoardOptions {
|
|
port: number;
|
|
html: string;
|
|
title?: string;
|
|
publisherPid?: number;
|
|
}
|
|
|
|
export interface PublishBoardResult {
|
|
id: string;
|
|
url: string;
|
|
sourceDir: string;
|
|
}
|
|
|
|
export async function publishBoard(opts: PublishBoardOptions): Promise<PublishBoardResult> {
|
|
const body: Record<string, unknown> = {
|
|
html: opts.html,
|
|
publisherPid: opts.publisherPid ?? process.pid,
|
|
};
|
|
if (opts.title) body.title = opts.title;
|
|
const resp = await fetch(`http://127.0.0.1:${opts.port}/api/boards`, {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify(body),
|
|
});
|
|
if (!resp.ok) {
|
|
let errText: string;
|
|
try {
|
|
const j = (await resp.json()) as { error?: string; existing?: { id: string; url: string } };
|
|
if (j.existing) {
|
|
// 409: surface the existing-board URL so the caller can reuse it
|
|
return { id: j.existing.id, url: j.existing.url, sourceDir: "" };
|
|
}
|
|
errText = j.error || `HTTP ${resp.status}`;
|
|
} catch {
|
|
errText = `HTTP ${resp.status}`;
|
|
}
|
|
throw new Error(`Daemon refused publish: ${errText}`);
|
|
}
|
|
return (await resp.json()) as PublishBoardResult;
|
|
}
|
|
|
|
// ─── Internals ───────────────────────────────────────────────────
|
|
|
|
function readPackageVersion(): string {
|
|
return readVersionString();
|
|
}
|
|
|
|
function defaultDaemonScript(): string {
|
|
// design/src/daemon-client.ts → daemon.ts is a sibling. Only used in dev
|
|
// when this process is `bun run cli.ts`; the compiled-binary path
|
|
// self-execs instead (see resolveSpawnCommand).
|
|
return path.join(import.meta.dir, "daemon.ts");
|
|
}
|
|
|
|
/**
|
|
* Compute the argv to spawn the daemon. Two modes:
|
|
*
|
|
* Compiled binary (`design/dist/design`): re-exec ourselves with
|
|
* --daemon-mode. process.execPath IS the compiled design binary;
|
|
* spawning it again with the flag runs the daemon (see the
|
|
* --daemon-mode branch at the bottom of cli.ts).
|
|
*
|
|
* Dev (`bun run design/src/cli.ts`): process.execPath is bun, so we
|
|
* invoke `bun run <daemon.ts> --marker ...` directly.
|
|
*
|
|
* Tests can override the dev script via opts.script.
|
|
*/
|
|
function resolveSpawnCommand(scriptOverride: string | undefined): {
|
|
command: string;
|
|
args: string[];
|
|
} {
|
|
const execBase = path.basename(process.execPath).toLowerCase();
|
|
const isCompiledHost = execBase !== "bun" && execBase !== "bun.exe" && execBase !== "node";
|
|
if (isCompiledHost && !scriptOverride) {
|
|
return {
|
|
command: process.execPath,
|
|
args: ["--daemon-mode", "--marker", CMDLINE_MARKER],
|
|
};
|
|
}
|
|
const script = scriptOverride ?? defaultDaemonScript();
|
|
return {
|
|
command: "bun",
|
|
args: ["run", script, "--marker", CMDLINE_MARKER],
|
|
};
|
|
}
|
|
|
|
interface SpawnDaemonOpts {
|
|
script?: string;
|
|
env?: Record<string, string>;
|
|
stateFile: string;
|
|
expectedVersion: string;
|
|
}
|
|
|
|
async function spawnDaemon(opts: SpawnDaemonOpts): Promise<number> {
|
|
const logPath = resolveStartupLogPath();
|
|
fs.mkdirSync(path.dirname(logPath), { recursive: true });
|
|
// Truncate the startup log on each spawn so a later read finds only THIS
|
|
// attempt's output (mirrors browse's per-spawn log truncation).
|
|
fs.writeFileSync(logPath, "");
|
|
const logFd = fs.openSync(logPath, "a");
|
|
|
|
const { command, args } = resolveSpawnCommand(opts.script);
|
|
|
|
const child = nodeSpawn(command, args, {
|
|
detached: true,
|
|
stdio: ["ignore", logFd, logFd],
|
|
env: {
|
|
...process.env,
|
|
DESIGN_DAEMON_VERSION: opts.expectedVersion,
|
|
...(opts.env ?? {}),
|
|
},
|
|
});
|
|
child.unref();
|
|
fs.closeSync(logFd);
|
|
|
|
// Poll the state file + /health until the daemon is up, or until timeout.
|
|
const deadline = Date.now() + MAX_START_WAIT_MS;
|
|
while (Date.now() < deadline) {
|
|
const fresh = readStateFile(opts.stateFile);
|
|
if (fresh) {
|
|
const h = await healthCheck(fresh.port);
|
|
if (h) return fresh.port;
|
|
}
|
|
await delay(POLL_INTERVAL_MS);
|
|
}
|
|
|
|
// Timed out — surface the startup log so the user sees the actual error
|
|
// instead of "daemon failed silently."
|
|
let tail = "";
|
|
try {
|
|
tail = fs.readFileSync(logPath, "utf-8").trim();
|
|
} catch {
|
|
// log file may not exist
|
|
}
|
|
throw new Error(
|
|
`Design daemon failed to start within ${MAX_START_WAIT_MS}ms.\n` +
|
|
`Startup log (${logPath}):\n${tail || "(empty)"}`,
|
|
);
|
|
}
|
|
|
|
async function gracefulShutdownExistingDaemon(port: number): Promise<void> {
|
|
try {
|
|
await fetch(`http://127.0.0.1:${port}/shutdown`, {
|
|
method: "POST",
|
|
signal: AbortSignal.timeout(2000),
|
|
});
|
|
} catch {
|
|
// Daemon may have already exited or be unresponsive — fall through
|
|
// to the SIGTERM path with identity verification.
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Send SIGTERM (then SIGKILL) to `pid`, but ONLY if the running cmdline
|
|
* contains `marker`. Prevents a stale state file from causing us to signal
|
|
* an unrelated process that inherited the PID.
|
|
*/
|
|
async function killByPidWithIdentity(
|
|
pid: number,
|
|
marker: string,
|
|
verbose: boolean,
|
|
): Promise<void> {
|
|
if (!pid || pid <= 0) return;
|
|
if (!isProcessAlive(pid)) return;
|
|
if (!verifyIdentity(pid, marker || CMDLINE_MARKER)) {
|
|
log(
|
|
verbose,
|
|
`pid ${pid} is alive but cmdline doesn't match marker '${marker || CMDLINE_MARKER}'; skipping signal (possible PID reuse)`,
|
|
);
|
|
return;
|
|
}
|
|
try {
|
|
process.kill(pid, "SIGTERM");
|
|
} catch {
|
|
// already gone
|
|
return;
|
|
}
|
|
// Give it a grace period; SIGKILL if still alive AND still ours.
|
|
const deadline = Date.now() + SIGTERM_GRACE_MS;
|
|
while (Date.now() < deadline) {
|
|
if (!isProcessAlive(pid)) return;
|
|
await delay(50);
|
|
}
|
|
if (isProcessAlive(pid) && verifyIdentity(pid, marker || CMDLINE_MARKER)) {
|
|
log(verbose, `pid ${pid} survived SIGTERM; SIGKILL`);
|
|
try {
|
|
process.kill(pid, "SIGKILL");
|
|
} catch {
|
|
// raced with exit
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Public: $D daemon stop. Posts /shutdown if no active boards; otherwise
|
|
* reports refusal. Used by the CLI sub-command (next commit).
|
|
*/
|
|
export async function shutdownDaemon(opts: { force?: boolean } = {}): Promise<{
|
|
stopped: boolean;
|
|
reason?: string;
|
|
activeBoards?: number;
|
|
}> {
|
|
const stateFile = resolveStateFilePath();
|
|
const existing = readStateFile(stateFile);
|
|
if (!existing) return { stopped: false, reason: "no daemon running" };
|
|
const health = await healthCheck(existing.port);
|
|
if (!health) {
|
|
// unresponsive: try SIGTERM via identity-checked path
|
|
await killByPidWithIdentity(existing.pid, existing.cmdlineMarker, true);
|
|
return { stopped: true, reason: "unresponsive daemon killed via SIGTERM" };
|
|
}
|
|
if (health.activeBoards > 0 && !opts.force) {
|
|
return {
|
|
stopped: false,
|
|
reason: "active boards present",
|
|
activeBoards: health.activeBoards,
|
|
};
|
|
}
|
|
await gracefulShutdownExistingDaemon(existing.port);
|
|
// Best-effort: SIGTERM if /shutdown didn't take effect
|
|
if (isProcessAlive(existing.pid)) {
|
|
await killByPidWithIdentity(existing.pid, existing.cmdlineMarker, true);
|
|
}
|
|
return { stopped: true };
|
|
}
|
|
|
|
/** $D daemon status — for the CLI sub-command. */
|
|
export async function daemonStatus(): Promise<
|
|
| { running: false }
|
|
| { running: true; port: number; pid: number; version: string; boards: number; activeBoards: number; uptime: number }
|
|
> {
|
|
const existing = readStateFile();
|
|
if (!existing) return { running: false };
|
|
const h = await healthCheck(existing.port);
|
|
if (!h) return { running: false };
|
|
return {
|
|
running: true,
|
|
port: existing.port,
|
|
pid: existing.pid,
|
|
version: h.version,
|
|
boards: h.boards,
|
|
activeBoards: h.activeBoards,
|
|
uptime: h.uptime,
|
|
};
|
|
}
|