mirror of https://github.com/garrytan/gstack.git
feat: harden gstack-detach against all four eval-infra killers
The basic bash detach fixed SIGTERM but a real run on a shared dev box hit three more killers: cross-worktree API saturation (15-way concurrency x a sibling worktree mass-timed-out the suite), a silent hang (periodic bun died with no exit marker), and shared-/tmp log contamination (a concurrent worktree's agent output bled into the log). Rewrite as a portable python3 tool that bakes in all four fixes: - fork + setsid: SIGTERM-proof (own session, survives harness polite-quit) - caffeinate -i on macOS: no idle-sleep death - --lock NAME (fcntl, machine-wide): concurrent worktrees SERIALIZE instead of saturating the shared model API - run-scoped default log (~/.gstack-dev/eval-runs/<label>-<slug>-<branch>-<ts>-<pid>): no cross-worktree collision/contamination - --timeout watchdog + a guaranteed '### gstack-detach EXIT=<code> ###' sentinel on every terminal path: no silent hang, finished-vs-died always detectable Guard test pins all four: detached pgid differs + outlives launcher, run-scoped log path, watchdog EXIT=timeout, and lock serialization (second run WAITS). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
parent
b608b060b9
commit
a4fb10e519
|
|
@ -1,52 +1,167 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env python3
|
||||||
# gstack-detach — run a long-running command in its OWN session (a fresh process
|
"""gstack-detach — run a long agent job (evals, benchmarks, syncs) robustly.
|
||||||
# group with no controlling terminal) so a SIGTERM aimed at the launching shell's
|
|
||||||
# process group can't reach it.
|
|
||||||
#
|
|
||||||
# Why this exists: when an AGENT/harness launches a 30-60 min eval as a background
|
|
||||||
# task, the harness sends SIGTERM ("polite quit") to that task's process group on
|
|
||||||
# turn boundaries, monitor stops, or interruptions — killing the run mid-flight
|
|
||||||
# (observed: `script "test:gate" was terminated by signal SIGTERM`). Detaching into
|
|
||||||
# a new session escapes that group signal. Humans running evals foreground in their
|
|
||||||
# own terminal don't need this (Ctrl-C is intended); this is for agent-run jobs.
|
|
||||||
#
|
|
||||||
# Usage: gstack-detach <logfile> -- <command> [args...]
|
|
||||||
# (the `--` is optional but recommended for clarity)
|
|
||||||
# Output: prints `PID <n> LOG <path>` and returns immediately. Poll the logfile;
|
|
||||||
# the command keeps running independently of this shell.
|
|
||||||
# Secrets: inherited from the environment ONLY. NEVER pass an API key in argv
|
|
||||||
# (it would show in `ps`). Export it before calling gstack-detach.
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
LOG="${1:?usage: gstack-detach <logfile> -- <command...>}"; shift
|
Agent-launched long jobs on a shared dev box keep dying to environmental
|
||||||
[ "${1:-}" = "--" ] && shift
|
killers. This tool bakes in the fixes so gstack (and every gstack user) runs
|
||||||
[ "$#" -ge 1 ] || { echo "gstack-detach: no command given" >&2; exit 2; }
|
them properly:
|
||||||
mkdir -p "$(dirname "$LOG")" 2>/dev/null || true
|
|
||||||
|
|
||||||
# Preferred path: python3 creates the new session (portable; macOS has no setsid)
|
* SIGTERM-proof: fork + setsid puts the job in its OWN session, so the
|
||||||
# and, on macOS, wraps the command in `caffeinate -i` so idle-sleep can't kill a
|
harness's "polite quit" SIGTERM to the launching process group can't reach
|
||||||
# long run — a second silent killer for 30-60 min jobs.
|
it (observed: `script "test:gate" was terminated by signal SIGTERM`).
|
||||||
if command -v python3 >/dev/null 2>&1; then
|
* No idle-sleep death (macOS): wraps the command in `caffeinate -i`.
|
||||||
GSTACK_DETACH_LOG="$LOG" exec python3 - "$@" <<'PY'
|
* No cross-worktree API saturation: `--lock NAME` takes a machine-wide
|
||||||
import os, sys, shutil, subprocess
|
advisory lock so concurrent Conductor worktrees SERIALIZE their eval runs
|
||||||
os.setsid() # new session => new process group, no controlling terminal
|
instead of saturating the shared model API (which mass-times-out E2E suites).
|
||||||
log = os.environ["GSTACK_DETACH_LOG"]
|
* No shared-/tmp collision: a run-scoped log path by default
|
||||||
cmd = sys.argv[1:]
|
(~/.gstack-dev/eval-runs/<label>-<slug>-<branch>-<ts>-<pid>.log), so
|
||||||
if shutil.which("caffeinate"): # macOS: block idle-sleep for the run
|
concurrent runs never clobber or contaminate each other's logs.
|
||||||
cmd = ["caffeinate", "-i", *cmd]
|
* No silent hang: `--timeout SECS` watchdog kills a stalled run, and a
|
||||||
f = open(log, "ab", buffering=0)
|
`### gstack-detach EXIT=<code> ###` sentinel is ALWAYS appended on a
|
||||||
p = subprocess.Popen(cmd, stdout=f, stderr=subprocess.STDOUT, stdin=subprocess.DEVNULL)
|
terminal path so a poller can tell finished-vs-died (silence != success).
|
||||||
print(f"PID {p.pid} LOG {log}")
|
|
||||||
PY
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Linux without python3: real setsid.
|
Usage:
|
||||||
if command -v setsid >/dev/null 2>&1; then
|
gstack-detach [--log PATH] [--lock NAME] [--timeout SECS] [--label LBL] -- CMD [ARGS...]
|
||||||
setsid sh -c 'exec "$@" >>"$0" 2>&1' "$LOG" "$@" &
|
|
||||||
echo "PID $! LOG $LOG"; disown 2>/dev/null || true; exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Last resort: nohup detaches from SIGHUP (not a group SIGTERM, but better than
|
Prints `gstack-detach LOG <path>` and returns immediately. Poll the log; break
|
||||||
# nothing on a minimal box).
|
on `### gstack-detach EXIT=` (both success and failure are marked).
|
||||||
nohup sh -c 'exec "$@" >>"$0" 2>&1' "$LOG" "$@" >/dev/null 2>&1 &
|
|
||||||
echo "PID $! LOG $LOG"; disown 2>/dev/null || true
|
Secrets are inherited from the environment ONLY — never pass an API key in argv.
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import signal
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
|
||||||
|
def _now():
|
||||||
|
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||||
|
|
||||||
|
|
||||||
|
def _git(*args):
|
||||||
|
try:
|
||||||
|
return subprocess.check_output(["git", *args], stderr=subprocess.DEVNULL, text=True).strip()
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def run_scoped_log(label):
|
||||||
|
base = os.path.expanduser("~/.gstack-dev/eval-runs")
|
||||||
|
os.makedirs(base, exist_ok=True)
|
||||||
|
root = _git("rev-parse", "--show-toplevel")
|
||||||
|
slug = os.path.basename(root) if root else "unknown"
|
||||||
|
branch = (_git("branch", "--show-current") or "nobranch").replace("/", "-")
|
||||||
|
stamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
|
||||||
|
return os.path.join(base, f"{label}-{slug}-{branch}-{stamp}-{os.getpid()}.log")
|
||||||
|
|
||||||
|
|
||||||
|
def log_line(path, msg):
|
||||||
|
with open(path, "ab", buffering=0) as f:
|
||||||
|
f.write((msg + "\n").encode("utf-8", "replace"))
|
||||||
|
|
||||||
|
|
||||||
|
def acquire_lock(name, log):
|
||||||
|
"""Machine-wide advisory lock via fcntl (portable on macOS + Linux). Blocks
|
||||||
|
until free so concurrent worktrees serialize rather than saturate the API.
|
||||||
|
Returns the held fd (kept open for the process lifetime)."""
|
||||||
|
import fcntl
|
||||||
|
|
||||||
|
d = os.path.expanduser("~/.gstack/locks")
|
||||||
|
os.makedirs(d, exist_ok=True)
|
||||||
|
fd = open(os.path.join(d, f"{name}.lock"), "w")
|
||||||
|
try:
|
||||||
|
fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||||
|
except OSError:
|
||||||
|
log_line(log, f"### gstack-detach WAITING for lock '{name}' (another run holds it) ### {_now()}")
|
||||||
|
fcntl.flock(fd, fcntl.LOCK_EX) # block until released
|
||||||
|
fd.write(f"{os.getpid()} {_now()}\n")
|
||||||
|
fd.flush()
|
||||||
|
log_line(log, f"### gstack-detach LOCK '{name}' ACQUIRED ### {_now()}")
|
||||||
|
return fd
|
||||||
|
|
||||||
|
|
||||||
|
def child_run(args, log):
|
||||||
|
lock_fd = acquire_lock(args.lock, log) if args.lock else None
|
||||||
|
cmd = args.cmd
|
||||||
|
if shutil.which("caffeinate"): # macOS: block idle-sleep for the run
|
||||||
|
cmd = ["caffeinate", "-i", *cmd]
|
||||||
|
log_line(log, f"### gstack-detach START label={args.label} pgid={os.getpgid(0)} ### {_now()}")
|
||||||
|
with open(log, "ab", buffering=0) as f:
|
||||||
|
# start_new_session: the command runs in its OWN process group so the
|
||||||
|
# watchdog can killpg() it without also killing this supervisor (which
|
||||||
|
# must survive to write the EXIT sentinel).
|
||||||
|
proc = subprocess.Popen(
|
||||||
|
cmd, stdout=f, stderr=subprocess.STDOUT, stdin=subprocess.DEVNULL, start_new_session=True
|
||||||
|
)
|
||||||
|
if args.timeout and args.timeout > 0:
|
||||||
|
try:
|
||||||
|
code = proc.wait(timeout=args.timeout)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
log_line(log, f"### gstack-detach WATCHDOG fired after {args.timeout}s — killing ### {_now()}")
|
||||||
|
try:
|
||||||
|
os.killpg(os.getpgid(proc.pid), signal.SIGTERM)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
time.sleep(5)
|
||||||
|
try:
|
||||||
|
proc.kill()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
code = "timeout"
|
||||||
|
else:
|
||||||
|
code = proc.wait()
|
||||||
|
log_line(log, f"### gstack-detach EXIT={code} ### {_now()}")
|
||||||
|
if lock_fd:
|
||||||
|
try:
|
||||||
|
lock_fd.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
ap = argparse.ArgumentParser(add_help=True)
|
||||||
|
ap.add_argument("--log")
|
||||||
|
ap.add_argument("--lock")
|
||||||
|
ap.add_argument("--timeout", type=int, default=0)
|
||||||
|
ap.add_argument("--label", default="job")
|
||||||
|
ap.add_argument("cmd", nargs=argparse.REMAINDER)
|
||||||
|
args = ap.parse_args()
|
||||||
|
|
||||||
|
cmd = args.cmd
|
||||||
|
if cmd and cmd[0] == "--":
|
||||||
|
cmd = cmd[1:]
|
||||||
|
if not cmd:
|
||||||
|
print("gstack-detach: no command given (usage: gstack-detach [opts] -- CMD...)", file=sys.stderr)
|
||||||
|
sys.exit(2)
|
||||||
|
args.cmd = cmd
|
||||||
|
|
||||||
|
log = args.log or run_scoped_log(args.label)
|
||||||
|
os.makedirs(os.path.dirname(log) or ".", exist_ok=True)
|
||||||
|
open(log, "ab").close()
|
||||||
|
|
||||||
|
# Detach: fork so the launching shell returns immediately, then setsid in the
|
||||||
|
# child to escape the harness's process group / controlling terminal.
|
||||||
|
if os.fork() > 0:
|
||||||
|
# flush BEFORE os._exit — os._exit skips stdio buffer flush, which would
|
||||||
|
# otherwise drop this line and leave the caller without the log path.
|
||||||
|
print(f"gstack-detach LOG {log}", flush=True)
|
||||||
|
os._exit(0)
|
||||||
|
os.setsid()
|
||||||
|
devnull = os.open(os.devnull, os.O_RDWR)
|
||||||
|
os.dup2(devnull, 0)
|
||||||
|
lf = os.open(log, os.O_WRONLY | os.O_APPEND | os.O_CREAT, 0o644)
|
||||||
|
os.dup2(lf, 1)
|
||||||
|
os.dup2(lf, 2)
|
||||||
|
try:
|
||||||
|
child_run(args, log)
|
||||||
|
except Exception as e: # never leave the log without a terminal marker
|
||||||
|
log_line(log, f"### gstack-detach ERROR {e!r} ### {_now()}")
|
||||||
|
log_line(log, f"### gstack-detach EXIT=error ### {_now()}")
|
||||||
|
os._exit(0)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,12 @@
|
||||||
/**
|
/**
|
||||||
* gstack-detach — the SIGTERM-survival guard.
|
* gstack-detach — the eval-infra robustness guard. Pins the four killer fixes:
|
||||||
*
|
* 1. SIGTERM-proof detachment (runs in a different process group, outlives the launcher)
|
||||||
* Proves the wrapper runs its command in a DIFFERENT process group than the
|
* 2. run-scoped default log path (no shared-/tmp collision between worktrees)
|
||||||
* caller (so a group SIGTERM from the harness can't reach it) and that the
|
* 3. watchdog --timeout (no silent hang) + guaranteed EXIT sentinel
|
||||||
* command outlives the launching shell (returns immediately, completes later).
|
* 4. machine-wide --lock serialization (no cross-worktree API saturation)
|
||||||
* This is the regression guard that keeps the eval-killer dead.
|
|
||||||
*/
|
*/
|
||||||
import { describe, test, expect } from 'bun:test';
|
import { describe, test, expect } from 'bun:test';
|
||||||
import { spawnSync } from 'child_process';
|
import { spawnSync, spawn } from 'child_process';
|
||||||
import * as fs from 'fs';
|
import * as fs from 'fs';
|
||||||
import * as os from 'os';
|
import * as os from 'os';
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
|
|
@ -16,55 +15,82 @@ const ROOT = path.resolve(import.meta.dir, '..');
|
||||||
const DETACH = path.join(ROOT, 'bin', 'gstack-detach');
|
const DETACH = path.join(ROOT, 'bin', 'gstack-detach');
|
||||||
|
|
||||||
function ownPgid(): string {
|
function ownPgid(): string {
|
||||||
const r = spawnSync('ps', ['-o', 'pgid=', '-p', String(process.pid)], { encoding: 'utf-8' });
|
return (spawnSync('ps', ['-o', 'pgid=', '-p', String(process.pid)], { encoding: 'utf-8' }).stdout || '').trim();
|
||||||
return (r.stdout || '').trim();
|
}
|
||||||
|
function waitFor(pred: () => boolean, ms: number): boolean {
|
||||||
|
const end = Date.now() + ms;
|
||||||
|
while (Date.now() < end) {
|
||||||
|
if (pred()) return true;
|
||||||
|
spawnSync('sleep', ['0.2']);
|
||||||
|
}
|
||||||
|
return pred();
|
||||||
|
}
|
||||||
|
function logHas(p: string, needle: string): boolean {
|
||||||
|
try { return fs.readFileSync(p, 'utf-8').includes(needle); } catch { return false; }
|
||||||
}
|
}
|
||||||
|
|
||||||
describe('gstack-detach', () => {
|
describe('gstack-detach', () => {
|
||||||
test('returns immediately and the command keeps running detached', () => {
|
test('detaches (different pgid), returns immediately, completes, writes EXIT sentinel', () => {
|
||||||
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-detach-'));
|
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'gd-'));
|
||||||
const log = path.join(dir, 'run.log');
|
const log = path.join(dir, 'run.log');
|
||||||
const marker = path.join(dir, 'marker');
|
|
||||||
const pgidFile = path.join(dir, 'child.pgid');
|
|
||||||
try {
|
try {
|
||||||
const started = Date.now();
|
const t0 = Date.now();
|
||||||
// Child records its own pgid, sleeps past the launcher's return, then writes
|
const r = spawnSync(DETACH, ['--log', log, '--', 'bash', '-c', 'sleep 2; echo body-ran'], { encoding: 'utf-8', timeout: 10000 });
|
||||||
// a marker — proving it ran to completion independently of this shell.
|
const elapsed = Date.now() - t0;
|
||||||
const cmd = `ps -o pgid= -p $$ | tr -d ' ' > '${pgidFile}'; sleep 2; echo ok > '${marker}'`;
|
|
||||||
const r = spawnSync(DETACH, [log, '--', 'bash', '-c', cmd], { encoding: 'utf-8', timeout: 10000 });
|
|
||||||
const elapsed = Date.now() - started;
|
|
||||||
|
|
||||||
expect(r.status).toBe(0);
|
expect(r.status).toBe(0);
|
||||||
expect(r.stdout).toMatch(/PID \d+ {2}LOG /);
|
expect(r.stdout).toContain(`gstack-detach LOG ${log}`);
|
||||||
// Non-blocking: the launcher returns well before the child's 2s sleep ends.
|
expect(elapsed).toBeLessThan(1500); // non-blocking
|
||||||
expect(elapsed).toBeLessThan(1500);
|
expect(waitFor(() => logHas(log, '### gstack-detach EXIT=0 ###'), 8000)).toBe(true);
|
||||||
|
expect(logHas(log, 'body-ran')).toBe(true); // ran to completion after launcher returned
|
||||||
// Poll for the marker — the detached child finishes after the launcher exited.
|
const m = fs.readFileSync(log, 'utf-8').match(/pgid=(\d+)/);
|
||||||
let survived = false;
|
expect(m).not.toBeNull();
|
||||||
const deadline = Date.now() + 6000;
|
expect(m![1]).not.toBe(ownPgid()); // detached into its own group
|
||||||
while (Date.now() < deadline) {
|
} finally { fs.rmSync(dir, { recursive: true, force: true }); }
|
||||||
if (fs.existsSync(marker)) { survived = true; break; }
|
|
||||||
spawnSync('sleep', ['0.2']);
|
|
||||||
}
|
|
||||||
expect(survived).toBe(true);
|
|
||||||
|
|
||||||
// Detached: the child's process group differs from ours, so a group SIGTERM
|
|
||||||
// aimed at this process can't reach it.
|
|
||||||
const childPgid = fs.readFileSync(pgidFile, 'utf-8').trim();
|
|
||||||
expect(childPgid).not.toBe('');
|
|
||||||
expect(childPgid).not.toBe(ownPgid());
|
|
||||||
} finally {
|
|
||||||
fs.rmSync(dir, { recursive: true, force: true });
|
|
||||||
}
|
|
||||||
}, 15000);
|
}, 15000);
|
||||||
|
|
||||||
test('rejects missing command (exit 2)', () => {
|
test('default log is run-scoped under ~/.gstack-dev/eval-runs (no shared /tmp)', () => {
|
||||||
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-detach-'));
|
const r = spawnSync(DETACH, ['--label', 'unittest', '--', 'true'], { encoding: 'utf-8', timeout: 10000 });
|
||||||
|
const log = (r.stdout.match(/gstack-detach LOG (\S+)/) || [])[1];
|
||||||
try {
|
try {
|
||||||
const r = spawnSync(DETACH, [path.join(dir, 'x.log')], { encoding: 'utf-8' });
|
expect(log).toContain('/.gstack-dev/eval-runs/');
|
||||||
expect(r.status).toBe(2);
|
expect(path.basename(log)).toContain('unittest-');
|
||||||
|
expect(path.basename(log)).toMatch(/-\d+\.log$/); // pid-unique
|
||||||
|
waitFor(() => logHas(log, '### gstack-detach EXIT=0 ###'), 6000);
|
||||||
|
} finally { if (log) fs.rmSync(log, { force: true }); }
|
||||||
|
}, 12000);
|
||||||
|
|
||||||
|
test('watchdog kills a stalled run and records EXIT=timeout (no silent hang)', () => {
|
||||||
|
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'gd-'));
|
||||||
|
const log = path.join(dir, 'run.log');
|
||||||
|
try {
|
||||||
|
spawnSync(DETACH, ['--log', log, '--timeout', '1', '--', 'sleep', '60'], { encoding: 'utf-8', timeout: 10000 });
|
||||||
|
expect(waitFor(() => logHas(log, '### gstack-detach EXIT=timeout ###'), 12000)).toBe(true);
|
||||||
|
expect(logHas(log, 'WATCHDOG fired')).toBe(true);
|
||||||
|
} finally { fs.rmSync(dir, { recursive: true, force: true }); }
|
||||||
|
}, 16000);
|
||||||
|
|
||||||
|
test('machine --lock serializes concurrent runs (second WAITS for the first)', () => {
|
||||||
|
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'gd-'));
|
||||||
|
const lock = `gstack-detach-test-${process.pid}`;
|
||||||
|
const logA = path.join(dir, 'a.log');
|
||||||
|
const logB = path.join(dir, 'b.log');
|
||||||
|
try {
|
||||||
|
// First holds the lock for ~3s; second must wait then acquire.
|
||||||
|
spawnSync(DETACH, ['--log', logA, '--lock', lock, '--', 'sleep', '3'], { encoding: 'utf-8', timeout: 10000 });
|
||||||
|
waitFor(() => logHas(logA, "ACQUIRED"), 4000);
|
||||||
|
spawnSync(DETACH, ['--log', logB, '--lock', lock, '--', 'echo', 'second-ran'], { encoding: 'utf-8', timeout: 10000 });
|
||||||
|
// Second should report WAITING (first still holds it) then ACQUIRE after release.
|
||||||
|
expect(waitFor(() => logHas(logB, 'WAITING for lock'), 4000)).toBe(true);
|
||||||
|
expect(waitFor(() => logHas(logB, '### gstack-detach EXIT=0 ###'), 12000)).toBe(true);
|
||||||
|
expect(logHas(logB, 'second-ran')).toBe(true);
|
||||||
} finally {
|
} finally {
|
||||||
fs.rmSync(dir, { recursive: true, force: true });
|
fs.rmSync(dir, { recursive: true, force: true });
|
||||||
|
fs.rmSync(path.join(os.homedir(), '.gstack', 'locks', `${lock}.lock`), { force: true });
|
||||||
}
|
}
|
||||||
|
}, 20000);
|
||||||
|
|
||||||
|
test('rejects missing command (exit 2)', () => {
|
||||||
|
const r = spawnSync(DETACH, ['--label', 'x'], { encoding: 'utf-8' });
|
||||||
|
expect(r.status).toBe(2);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue