Compare commits

..

1 Commits

Author SHA1 Message Date
Jayesh Betala 942e049514 fix(slug): avoid parent repo identity in subdirs 2026-05-20 11:33:55 +05:30
479 changed files with 9383 additions and 66142 deletions

View File

@ -51,15 +51,6 @@ jobs:
if: matrix.os == 'ubicloud-standard-8'
run: sudo apt-get update && sudo apt-get install -y poppler-utils
# Install a color-emoji font BEFORE Chromium launches so the emoji render
# gate has a fallback font. macOS ships Apple Color Emoji already.
- name: Install color-emoji font (Ubuntu)
if: matrix.os == 'ubicloud-standard-8'
run: |
sudo apt-get install -y fonts-noto-color-emoji
fc-cache -f || true
fc-match -f '%{family[0]}\t%{color}\n' ':lang=und-zsye:charset=1F600' || true
- name: Install Playwright Chromium
run: bunx playwright install chromium
@ -83,7 +74,7 @@ jobs:
- name: Run make-pdf unit tests
run: bun test make-pdf/test/*.test.ts
- name: Run E2E gates (combined-features copy-paste + emoji render)
- name: Run combined-features copy-paste gate (P0)
env:
BROWSE_BIN: ${{ github.workspace }}/browse/dist/browse
run: bun test make-pdf/test/e2e/
run: bun test make-pdf/test/e2e/combined-gate.test.ts

View File

@ -116,7 +116,6 @@ jobs:
test/setup-windows-fallback.test.ts \
test/build-script-shell-compat.test.ts \
test/docs-config-keys.test.ts \
test/brain-sync-windows-paths.test.ts \
make-pdf/test/browseClient.test.ts \
make-pdf/test/pdftotext.test.ts
shell: bash

View File

@ -1,96 +0,0 @@
name: Windows Setup E2E
# End-to-end fresh-install gate for Windows. Runs `./setup` on a clean
# windows-latest checkout and asserts the build completes, binaries
# resolve via find-browse, and the gstack-paths state root resolves
# cleanly. Catches Bun shell-parser regressions in package.json's build
# chain (#1538, #1537, #1530, #1457, #1561) before they reach users.
#
# Separate from windows-free-tests.yml because that one runs a curated
# unit-test subset; this one exercises the install path itself.
#
# Runner: GitHub-hosted free windows-latest. ~3-5 min total.
on:
pull_request:
branches: [main]
paths:
- 'package.json'
- 'scripts/build.sh'
- 'scripts/write-version-files.sh'
- 'setup'
- 'browse/src/cli.ts'
- 'browse/src/find-browse.ts'
- 'bin/gstack-paths'
- '.github/workflows/windows-setup-e2e.yml'
workflow_dispatch:
concurrency:
group: windows-setup-e2e-${{ github.head_ref }}
cancel-in-progress: true
jobs:
windows-setup:
runs-on: windows-latest
timeout-minutes: 15
steps:
- uses: actions/checkout@v4
- uses: oven-sh/setup-bun@v1
with:
bun-version: latest
- name: Configure git identity
run: |
git config --global user.email "windows-setup-e2e@gstack.test"
git config --global user.name "Windows Setup E2E"
git config --global init.defaultBranch main
shell: bash
- name: Install dependencies
run: bun install --frozen-lockfile
shell: bash
- name: Run bun run build (the previously-broken path)
# This is the regression gate. Bun's Windows shell parser rejected
# multiple constructs the old inline build chain used; the wave
# moved the build to scripts/build.sh. If this step fails on
# Windows, the build chain regressed.
run: bun run build
shell: bash
env:
GSTACK_SKIP_PLAYWRIGHT: '1'
- name: Verify binaries exist (with .exe extension on Windows)
run: |
set -e
test -f browse/dist/browse.exe || test -f browse/dist/browse || (echo "MISSING: browse" && exit 1)
test -f browse/dist/find-browse.exe || test -f browse/dist/find-browse || (echo "MISSING: find-browse" && exit 1)
test -f design/dist/design.exe || test -f design/dist/design || (echo "MISSING: design" && exit 1)
test -f bin/gstack-global-discover.exe || test -f bin/gstack-global-discover || (echo "MISSING: gstack-global-discover" && exit 1)
echo "All binaries present"
shell: bash
- name: Verify find-browse resolves to the .exe variant
run: |
set -e
OUT=$(bun browse/src/find-browse.ts 2>&1) || true
echo "find-browse output: $OUT"
# On Windows, find-browse should successfully resolve to a binary,
# whether or not it has the .exe extension on disk. Empty output
# or "not found" means the .exe extension resolver regressed.
echo "$OUT" | grep -qE '(browse\.exe|browse)$' || (echo "find-browse failed to resolve binary on Windows" && exit 1)
shell: bash
- name: Verify gstack-paths state root resolves
run: |
set -e
eval "$(bash bin/gstack-paths)"
test -n "$GSTACK_STATE_ROOT" || (echo "GSTACK_STATE_ROOT empty" && exit 1)
test -n "$PLAN_ROOT" || (echo "PLAN_ROOT empty" && exit 1)
test -n "$TMP_ROOT" || (echo "TMP_ROOT empty" && exit 1)
echo "GSTACK_STATE_ROOT=$GSTACK_STATE_ROOT"
echo "PLAN_ROOT=$PLAN_ROOT"
echo "TMP_ROOT=$TMP_ROOT"
shell: bash

2
.gitignore vendored
View File

@ -4,7 +4,7 @@ dist/
browse/dist/
design/dist/
make-pdf/dist/
bin/gstack-global-discover*
bin/gstack-global-discover
.gstack/
.claude/skills/
.claude/scheduled_tasks.lock

View File

@ -21,7 +21,6 @@ Invoke them by name (e.g., `/office-hours`).
| `/plan-tune` | Self-tune AskUserQuestion sensitivity per question. |
| `/autoplan` | One command runs CEO → design → eng → DX review. |
| `/design-consultation` | Build a complete design system from scratch. |
| `/spec` | Turn vague intent into a precise, executable spec in five phases. Files a GitHub issue, optionally spawns a Claude Code agent in a fresh worktree, and lets `/ship` close the source issue on merge. |
### Implementation + review
@ -76,25 +75,6 @@ Invoke them by name (e.g., `/office-hours`).
| `/setup-browser-cookies` | Import cookies from your real browser for authenticated testing. |
| `/pair-agent` | Pair a remote AI agent (OpenClaw, Codex, etc.) with your browser. |
### iOS QA — drive real iPhones over USB or Tailscale (v1.43.0.0+)
| Skill | What it does |
|-------|-------------|
| `/ios-qa` | Live-device iOS QA via USB CoreDevice tunnel + embedded StateServer. Optionally exposes the device over Tailscale so remote agents can drive it. |
| `/ios-fix` | Autonomous iOS bug fixer with regression snapshot capture. |
| `/ios-design-review` | Designer's-eye QA on a real iPhone — 10-dimension Apple HIG rubric. |
| `/ios-clean` | Convenience: strip DebugBridge + #if DEBUG wiring before a Release build. |
| `/ios-sync` | Regenerate the iOS debug bridge against the latest upstream templates. |
Companion CLIs (run on the Mac that's plugged into the device):
| Command | What it does |
|---------|-------------|
| `gstack-ios-qa-daemon` | Mac-side broker. Loopback by default; `--tailnet` adds a Tailscale-facing listener with capability tiers and audit logging. |
| `gstack-ios-qa-mint` | Owner-grant CLI for the tailnet allowlist (`grant`/`revoke`/`list`). |
End-to-end walkthrough: [docs/howto-ios-testing-with-gstack.md](docs/howto-ios-testing-with-gstack.md).
### Safety + scoping
| Skill | What it does |

View File

@ -317,7 +317,6 @@ from `snapshot`, or `@c` refs from `snapshot -C`. Full table:
| `disconnect` | Close headed Chrome, return to headless |
| `focus [@ref]` | Bring headed Chrome to foreground (macOS); `@ref` also scrolls into view |
| `state save\|load <name>` | Save or load browser state (cookies + URLs) |
| `memory [--json]` | Snapshot Bun heap + per-tab JS heap + Chromium process tree + bounded buffer sizes. Use `--json` for programmatic consumers; text mode renders sorted top-10 tabs with "and N more" tail. |
### Handoff

File diff suppressed because it is too large Load Diff

110
CLAUDE.md
View File

@ -27,16 +27,25 @@ bun run slop:diff # slop findings in files changed on this branch only
`test:evals` requires `ANTHROPIC_API_KEY`. Codex E2E tests (`test/codex-e2e.test.ts`)
use Codex's own auth from `~/.codex/` config — no `OPENAI_API_KEY` env var needed.
**Env keys in Conductor workspaces.** The `GSTACK_*` env-shim (v1.39.2.0+,
`lib/conductor-env-shim.ts`) promotes `GSTACK_ANTHROPIC_API_KEY` /
`GSTACK_OPENAI_API_KEY` to their canonical names inside gstack's TS binaries.
Tests run through gstack entrypoints inherit this promotion automatically.
Don't echo the key value to stdout, logs, or shell history. When passing to a
test's Agent SDK, do NOT pass `env: {...}` to `runAgentSdkTest` — the SDK's
auth pipeline doesn't pick up the key the same way when env is supplied as an
object (confirmed failure mode). Mutate `process.env.ANTHROPIC_API_KEY`
ambiently before the call and restore in `finally`.
**Where the keys live on this machine.** Conductor workspaces don't inherit the
user's interactive shell env, so `ANTHROPIC_API_KEY` and `OPENAI_API_KEY` aren't
in the default process env. Before running any paid eval / E2E, source them from
`~/.zshrc` (that's where Garry keeps them):
```bash
bash -c '
eval "$(grep -E "^export (ANTHROPIC_API_KEY|OPENAI_API_KEY)=" ~/.zshrc)"
export ANTHROPIC_API_KEY OPENAI_API_KEY
EVALS=1 EVALS_TIER=periodic bun test test/skill-e2e-<whatever>.test.ts
'
```
Do not echo the key value anywhere (stdout, logs, shell history). The grep+eval
pattern keeps it in process env only. When passing to a test's Agent SDK, do NOT
pass `env: {...}` to `runAgentSdkTest` — the SDK's auth pipeline doesn't pick up
the key the same way when env is supplied as an object (confirmed failure mode).
Instead, mutate `process.env.ANTHROPIC_API_KEY` ambiently before the call and
restore in `finally`.
E2E tests stream progress in real-time (tool-by-tool via `--output-format stream-json
--verbose`). Results are persisted to `~/.gstack-dev/evals/` with auto-comparison
against the previous run.
@ -111,7 +120,6 @@ gstack/
├── land-and-deploy/ # /land-and-deploy skill (merge → deploy → canary verify)
├── office-hours/ # /office-hours skill (YC Office Hours — startup diagnostic + builder brainstorm)
├── investigate/ # /investigate skill (systematic root-cause debugging)
├── spec/ # /spec skill (five-phase spec → GitHub issue, optional agent spawn, /ship auto-closes)
├── retro/ # Retrospective skill (includes /retro global cross-project mode)
├── bin/ # CLI utilities (gstack-repo-mode, gstack-slug, gstack-config, etc.)
├── document-release/ # /document-release skill (post-ship doc updates + Diataxis coverage map)
@ -228,24 +236,6 @@ Activity / Refs / Inspector as debug overlays behind the footer's
flow, dual-token model, and threat-model boundary — silent failures
here usually trace to not understanding the cross-component flow.
**Embedder terminal-agent ownership** (v1.42.1.0+, identity-based kill v1.44.0.0+).
`buildFetchHandler` in `browse/src/server.ts` accepts `ServerConfig.ownsTerminalAgent?:
boolean` (default `true`). When `true`, factory shutdown runs the full teardown:
identity-based kill via `killAgentByRecord(readAgentRecord(stateDir))` from
`browse/src/terminal-agent-control.ts` plus `safeUnlinkQuiet` on
`<stateDir>/terminal-port`, `<stateDir>/terminal-internal-token`, and
`<stateDir>/terminal-agent-pid` (the per-boot agent record introduced in v1.44).
Embedders (e.g. the gbrowser phoenix overlay) that pre-launch their own PTY
server must pass `false` so their discovery files survive gstack teardown cycles.
The flag is the third caller-owned teardown gate in `ServerConfig` (alongside
`xvfb?` and `proxyBridge?`); polarity is inverted (explicit bool vs presence) and
documented in the field's JSDoc. CLI `start()` always passes `true` explicitly —
the static-grep test in `browse/test/server-embedder-terminal-port.test.ts` fails
CI if a refactor drops it. Pre-v1.44 used `pkill -f terminal-agent\.ts` (regex
match) which would kill sibling gstack sessions on the same host; the new
`browse/test/terminal-agent-pid-identity.test.ts` static-grep tripwire fails CI
if any source file re-introduces `pkill ... terminal-agent` or `spawnSync('pkill', ...)`.
**WebSocket auth uses Sec-WebSocket-Protocol, not cookies.** Browsers
can't set `Authorization` on a WebSocket upgrade, but they CAN set
`Sec-WebSocket-Protocol` via `new WebSocket(url, [token])`. The agent
@ -294,26 +284,6 @@ response in `server.ts`, read
`browse/test/server-sanitize-surrogates.test.ts` pins the wiring with invariant
tests, so bypasses fail CI.
**SSE endpoint helper** (v1.51.0.0+). New SSE endpoints in `server.ts` MUST route
through `createSseEndpoint(req, config)` from `browse/src/sse-helpers.ts`. The
helper owns the cleanup contract (abort + enqueue-throw + heartbeat-throw, all
idempotent) and bakes in `sanitizeLoneSurrogates` on every JSON.stringify, so
new subscribers can't accidentally regress either invariant. Inline
`ReadableStream` wiring leaked subscribers when the TCP connection died without
firing `req.signal.abort` (Chromium MV3 service-worker suspend, intermediate
proxy half-close). `/activity/stream`, `/inspector/events`, and `/memory`
(SSE-eligible) all route through it. `browse/test/sse-helpers.test.ts` pins the
cleanup contract.
**CDP session lifecycle** (v1.51.0.0+). Direct `page.context().newCDPSession(page)`
calls outside `browse/src/cdp-bridge.ts` fail CI via the static-grep tripwire in
`browse/test/cdp-session-cleanup.test.ts`. Use `withCdpSession(page, async (s) => {...})`
for one-shot CDP work (try/finally detach) or `getOrCreateCdpSession(page, cache)`
for cached sessions tied to a page's lifetime (close-detach via `Map<page, session>`).
Three sites migrated: cdp-bridge frame events, write-commands archive capture,
cdp-inspector. The helpers prevent the per-session leak class where successful-path
detach happened but error-path detach was missed.
**Setup symlink hardening** (v1.38.0.0+). Every link site in `setup` MUST route
through the `_link_or_copy SRC DST` helper near the `IS_WINDOWS` detection. On
Windows without Developer Mode, plain `ln -snf` produces frozen file copies that
@ -418,44 +388,6 @@ because they're tracked despite `.gitignore` — ignore them. When staging files
always use specific filenames (`git add file1 file2`) — never `git add .` or
`git add -A`, which will accidentally include the binaries.
## Redaction guard (PII / secrets / legal content)
Shared redaction engine catches credentials, PII, and legal/damaging content
before it reaches an external sink (codex dispatch, GitHub issue/PR body, pushed
commit). It is a **guardrail, not airtight enforcement**`git push --no-verify`,
direct `gh issue create`, and `GSTACK_REDACT_PREPUSH=skip` all bypass it. It
catches accidents and carelessness, the 99% case. Do not claim it stops a
determined leaker (a CHANGELOG line that does would fail a hostile screenshotter).
- **Engine + taxonomy:** `lib/redact-patterns.ts` (the single source of truth —
3 tiers; HIGH = genuinely-secret credentials that block, MEDIUM = PII/legal/
internal + high-FP credential shapes that confirm via AskUserQuestion, LOW =
FYI) and `lib/redact-engine.ts` (pure `scan()` + `applyRedactions()`).
Calibration matters: a gate that cries wolf gets ignored, so context-variable
shapes (Stripe `pk_live_`, Google `AIza`, JWT, env `*_KEY=`) sit at MEDIUM.
- **CLI:** `bin/gstack-redact` (exit 0 clean / 2 MEDIUM / 3 HIGH; `--json`,
`--auto-redact`, `--repo-visibility`, `--from-file`). `bin/gstack-redact-prepush`
is the opt-in git hook.
- **Skill docs are generated** from `scripts/resolvers/redact-doc.ts`
(`{{REDACT_TAXONOMY_TABLE}}`, `{{REDACT_INVOCATION_BLOCK:<sink>}}`) so /spec,
/cso, /ship, /document-release, /document-generate never drift from the engine.
- **Scan-at-sink:** always scan the EXACT bytes that will be sent — write to a
temp file, scan that file, pass the SAME file to `gh`/`git`. Never scan a string
then re-render (that reopens a scan-vs-send gap).
- **Visibility (no tier promotion):** resolve once per run, order = local config
(`gstack-config get redact_repo_visibility`, ~/.gstack so never committed) → gh
→ glab → unknown(=public-strict). Public repos get STERNER per-finding
confirmation (no batch-acknowledge, no silent-proceed); MEDIUM is never
auto-promoted to HIGH.
- **Tool-attributed fences:** wrap Codex/Greptile/eval output in ` ```codex-review `
/ ` ```greptile ` fences so example credentials those tools quote WARN-degrade
instead of blocking. A live-format credential inside the fence still blocks.
- **Config keys:** `redact_repo_visibility` (public|private|unknown, local-only
override for repos gh/glab can't read), `redact_prepush_hook` (true|false).
There is intentionally NO key to disable HIGH blocking.
- **Audit:** the /spec semantic pass appends a content-free record (categories +
body sha256, no spec text) to `~/.gstack/security/semantic-reviews.jsonl` (0600).
## Commit style
**Always bisect commits.** Every commit should be a single logical change. When
@ -938,10 +870,4 @@ file globs. Run `/sync-gbrain` after meaningful code changes; for ongoing
auto-sync across all worktrees, run `gbrain autopilot --install` once per
machine — gbrain's daemon handles incremental refresh on a schedule.
Safety: don't run `/sync-gbrain` while `gbrain autopilot` is active — the
orchestrator refuses destructive source ops when it detects a running autopilot
to avoid racing it (#1734). Prefer registering user repos with `gbrain sources
add --path <dir>` (no `--url`): URL-managed sources can auto-reclone, and the
sync code walk for them requires an explicit `--allow-reclone` opt-in.
<!-- gstack-gbrain-search-guidance:end -->

View File

@ -326,13 +326,11 @@ If you're using [Conductor](https://conductor.build) to run multiple Claude Code
| Hook | Script | What it does |
|------|--------|-------------|
| `setup` | `bin/dev-setup` | Copies `.env` from main worktree, installs deps, symlinks skills, runs `./setup` non-interactively |
| `setup` | `bin/dev-setup` | Copies `.env` from main worktree, installs deps, symlinks skills |
| `archive` | `bin/dev-teardown` | Removes skill symlinks, cleans up `.claude/` directory |
When Conductor creates a new workspace, `bin/dev-setup` runs automatically. It detects the main worktree (via `git worktree list`), copies your `.env` so API keys carry over, and sets up dev mode — no manual steps needed.
`bin/dev-setup` runs `./setup` fully non-interactively (it passes `--plan-tune-hooks=prompt` and closes stdin), so a forwarded Conductor TTY can never hang on a hidden setup prompt. It also never installs the plan-tune Claude Code hooks, which means a throwaway workspace can't rewrite your global `~/.claude/settings.json` to point at an ephemeral worktree path. To install the plan-tune hooks deliberately, run `./setup --plan-tune-hooks` outside dev-setup (or `gstack-config set plan_tune_hooks yes`).
**First-time setup:** Put your `ANTHROPIC_API_KEY` in `.env` in the main repo (see `.env.example`). Every Conductor workspace inherits it automatically.
**`GSTACK_*` env prefix (Conductor-injected keys).** Conductor explicitly strips `ANTHROPIC_API_KEY` and `OPENAI_API_KEY` from every workspace's process env. The `.env` copy path doesn't restore them either — the strip happens after env inheritance. Users who want paid evals, `/sync-gbrain` embeddings, or `claude-agent-sdk` calls to work in a Conductor workspace must set `GSTACK_ANTHROPIC_API_KEY` and `GSTACK_OPENAI_API_KEY` in Conductor's workspace env config; Conductor passes those through untouched. On the gstack side, TS entry points import `lib/conductor-env-shim.ts` as a side effect, which promotes `GSTACK_FOO_API_KEY` to `FOO_API_KEY` when the canonical name is empty. If you add a new TS entry point that hits a paid API, add `import "../lib/conductor-env-shim";` to the top of the file. Today the shim is imported from `bin/gstack-gbrain-sync.ts`, `bin/gstack-model-benchmark`, `scripts/preflight-agent-sdk.ts`, and `test/helpers/e2e-helpers.ts`.

View File

@ -204,7 +204,6 @@ Each skill feeds into the next. `/office-hours` writes a design doc that `/plan-
| `/browse` | **QA Engineer** | Give the agent eyes. Real Chromium browser, real clicks, real screenshots. ~100ms per command. `/open-gstack-browser` launches GStack Browser with sidebar, anti-bot stealth, and auto model routing. |
| `/setup-browser-cookies` | **Session Manager** | Import cookies from your real browser (Chrome, Arc, Brave, Edge) into the headless session. Test authenticated pages. |
| `/autoplan` | **Review Pipeline** | One command, fully reviewed plan. Runs CEO → design → eng review automatically with encoded decision principles. Surfaces only taste decisions for your approval. |
| `/spec` | **Spec Author** | Turn vague intent into a precise, executable spec in five phases (why, scope, technical with mandatory code-reading, draft, file). Codex quality gate before file (blocks below 7/10), fail-closed secret redaction, dedupe against existing issues, archive to `$GSTACK_STATE_ROOT/projects/$SLUG/specs/` for team-corpus recall. `--execute` spawns `claude -p` in a fresh worktree; `/ship` auto-closes the source issue on merge. Plan-mode aware. |
| `/learn` | **Memory** | Manage what gstack learned across sessions. Review, search, prune, and export project-specific patterns, pitfalls, and preferences. Learnings compound across sessions so gstack gets smarter on your codebase over time. |
### Which review should I use?
@ -230,8 +229,6 @@ Each skill feeds into the next. `/office-hours` writes a design doc that `/plan-
| `/setup-gbrain` | **GBrain Onboarding** — from zero to running gbrain in under 5 minutes. PGLite local, Supabase existing URL, or auto-provision a new Supabase project via Management API. MCP registration for Claude Code + per-repo trust triad (read-write/read-only/deny). [Full guide](USING_GBRAIN_WITH_GSTACK.md). |
| `/sync-gbrain` | **Keep Brain Current** — re-index this repo's code into gbrain via `gbrain sources add` + `gbrain sync --strategy code`, refresh the `## GBrain Search Guidance` block in CLAUDE.md, and auto-remove guidance when the capability check fails. `--incremental` (default), `--full`, `--dry-run`. Idempotent; safe to re-run. |
| `/gstack-upgrade` | **Self-Updater** — upgrade gstack to latest. Detects global vs vendored install, syncs both, shows what changed. |
| `/ios-qa` | **iOS Live-Device QA (v1.43.0.0+)** — drive a real iPhone over USB CoreDevice via an embedded `StateServer` in the app. Read Swift source, codegen typed `@Observable` accessors, run the agent loop. Optional `--tailnet` flag exposes the device to OpenClaw or any HTTP-capable agent on your Tailscale tailnet so remote agents can run iOS QA without ever touching the hardware. Capability-tier allowlist (observe/interact/mutate/restore), per-device session lock, audit log. |
| `/ios-fix`, `/ios-design-review`, `/ios-clean`, `/ios-sync` | iOS bug-fix loop, designer's-eye HIG audit, debug-bridge cleanup, and accessor resync. See `docs/skills.md`. End-to-end walkthrough: [docs/howto-ios-testing-with-gstack.md](docs/howto-ios-testing-with-gstack.md). |
### New binaries (v0.19)
@ -241,8 +238,6 @@ Beyond the slash-command skills, gstack ships standalone CLIs for workflows that
|---------|-------------|
| `gstack-model-benchmark` | **Cross-model benchmark** — run the same prompt through Claude, GPT (via Codex CLI), and Gemini; compare latency, tokens, cost, and (optionally) LLM-judge quality score. Auth detected per provider, unavailable providers skip cleanly. Output as table, JSON, or markdown. `--dry-run` validates flags + auth without spending API calls. |
| `gstack-taste-update` | **Design taste learning** — writes approvals and rejections from `/design-shotgun` into a persistent per-project taste profile. Decays 5%/week. Feeds back into future variant generation so the system learns what you actually pick. |
| `gstack-ios-qa-daemon` | **iOS QA daemon** — Mac-side broker between an agent and a connected iPhone over USB CoreDevice. Loopback by default; `--tailnet` opens a Tailscale-facing listener with identity-gated capability tiers. Single-instance via flock on `~/.gstack/ios-qa-daemon.pid`. See [docs/howto-ios-testing-with-gstack.md](docs/howto-ios-testing-with-gstack.md). |
| `gstack-ios-qa-mint` | **iOS allowlist manager** — owner-grant CLI for the tailnet allowlist. `grant`/`revoke`/`list` against `~/.gstack/ios-qa-allowlist.json` (mode 0600). Remote agents never auto-allowlist; this is the explicit-intent path. |
### Continuous checkpoint mode (opt-in, local by default)
@ -400,7 +395,7 @@ Four paths, pick one:
- **PGLite local** — zero accounts, zero network, ~30 seconds. Isolated brain on this Mac only. Great for try-first; migrate to Supabase later with `/setup-gbrain --switch`.
- **Remote gbrain MCP** — your brain runs on another machine (Tailscale, ngrok, internal LAN) or a teammate's server; paste an MCP URL and bearer token. Optionally pair with a local PGLite for symbol-aware code search in split-engine mode. Best for cross-machine memory without standing up a local DB.
After init, the skill offers to register gbrain as an MCP server for Claude Code (`claude mcp add gbrain -- gbrain serve`) so `gbrain search`, `gbrain put`, etc. show up as first-class typed tools — not bash shell-outs.
After init, the skill offers to register gbrain as an MCP server for Claude Code (`claude mcp add gbrain -- gbrain serve`) so `gbrain search`, `gbrain put_page`, etc. show up as first-class typed tools — not bash shell-outs.
**Keeping the brain current.** Run `/sync-gbrain` from any repo to re-index its code into gbrain (incremental by default, `--full` for a full reindex, `--dry-run` to preview). The skill registers the cwd as a federated source via `gbrain sources add`, runs `gbrain sync --strategy code`, and writes a `## GBrain Search Guidance` block to your project's CLAUDE.md so the agent prefers `gbrain search`/`code-def`/`code-refs` over Grep. The block is removed automatically if the capability check fails — no stale guidance pointing at tools that aren't installed.

View File

@ -2,7 +2,11 @@
name: gstack
preamble-tier: 1
version: 1.1.0
description: Fast headless browser for QA testing and site dogfooding. (gstack)
description: |
Fast headless browser for QA testing and site dogfooding. Navigate pages, interact with
elements, verify state, diff before/after, take annotated screenshots, test responsive
layouts, forms, uploads, dialogs, and capture bug evidence. Use when asked to open or
test a site, verify a deployment, dogfood a user flow, or file a bug with screenshots. (gstack)
allowed-tools:
- Bash
- Read
@ -17,14 +21,6 @@ triggers:
<!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly -->
<!-- Regenerate: bun run gen:skill-docs -->
## When to invoke this skill
Navigate pages, interact with
elements, verify state, diff before/after, take annotated screenshots, test responsive
layouts, forms, uploads, dialogs, and capture bug evidence. Use when asked to open or
test a site, verify a deployment, dogfood a user flow, or file a bug with screenshots.
## Preamble (run first)
```bash
@ -60,7 +56,7 @@ _QUESTION_TUNING=$(~/.claude/skills/gstack/bin/gstack-config get question_tuning
echo "QUESTION_TUNING: $_QUESTION_TUNING"
mkdir -p ~/.gstack/analytics
if [ "$_TEL" != "off" ]; then
echo '{"skill":"gstack","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(_repo=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null | tr -cd 'a-zA-Z0-9._-'); echo "${_repo:-unknown}")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
echo '{"skill":"gstack","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
fi
for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do
if [ -f "$_PF" ]; then
@ -102,19 +98,6 @@ _CHECKPOINT_MODE=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_mode
_CHECKPOINT_PUSH=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_push 2>/dev/null || echo "false")
echo "CHECKPOINT_MODE: $_CHECKPOINT_MODE"
echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH"
# Plan-mode hint for skills like /spec that branch behavior on plan-mode state.
# Claude Code exposes plan mode via system reminders; we detect best-effort
# from CLAUDE_PLAN_FILE (set by the harness when plan mode is active) and
# fall back to "inactive". Codex hosts and Claude execution mode both end up
# inactive, which is the safe default (defaults to file+execute pipeline).
if [ -n "${CLAUDE_PLAN_FILE:-}${GSTACK_PLAN_MODE_FORCE:-}" ]; then
export GSTACK_PLAN_MODE="active"
elif [ "${GSTACK_PLAN_MODE:-}" = "active" ]; then
export GSTACK_PLAN_MODE="active"
else
export GSTACK_PLAN_MODE="inactive"
fi
echo "GSTACK_PLAN_MODE: $GSTACK_PLAN_MODE"
[ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true
```
@ -170,7 +153,7 @@ Only run `open` if yes. Always run `touch`.
If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: ask telemetry once via AskUserQuestion:
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code or file paths. Your repo name is recorded locally only and stripped before any upload.
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code, file paths, or repo names.
Options:
- A) Help gstack get better! (recommended)
@ -246,7 +229,6 @@ Key routing rules:
- Ship/deploy/PR → invoke /ship or /land-and-deploy
- Save progress → invoke /context-save
- Resume context → invoke /context-restore
- Author a backlog-ready spec/issue → invoke /spec
```
Then commit the change: `git add CLAUDE.md && git commit -m "chore: add gstack skill routing rules to CLAUDE.md"`
@ -504,7 +486,6 @@ quality gates that produce better results than answering inline.
**Routing rules — when you see these patterns, INVOKE the skill via the Skill tool:**
- User describes a new idea, asks "is this worth building", brainstorms, pitches a concept → invoke `/office-hours`
- User asks to spec something out, file an issue, write up a ticket, "turn this into a GitHub issue", "backlog item" → invoke `/spec`
- User asks about strategy, scope, ambition, "think bigger", "what should we build" → invoke `/plan-ceo-review`
- User asks to review architecture, lock in the plan, "does this design make sense" → invoke `/plan-eng-review`
- User asks about design system, brand, visual identity, "how should this look" → invoke `/design-consultation`
@ -963,7 +944,6 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`.
| `disconnect` | Disconnect headed browser, return to headless mode |
| `focus [@ref]` | Bring headed browser window to foreground (macOS) |
| `handoff [message]` | Open visible Chrome at current page for user takeover |
| `memory [--json]` | Snapshot Bun heap + per-tab JS heap + Chromium process tree + bounded buffer sizes. JSON output with --json. |
| `restart` | Restart server |
| `resume` | Re-snapshot after user takeover, return control to AI |
| `state save|load <name>` | Save/load browser state (cookies + URLs) |

View File

@ -32,7 +32,6 @@ quality gates that produce better results than answering inline.
**Routing rules — when you see these patterns, INVOKE the skill via the Skill tool:**
- User describes a new idea, asks "is this worth building", brainstorms, pitches a concept → invoke `/office-hours`
- User asks to spec something out, file an issue, write up a ticket, "turn this into a GitHub issue", "backlog item" → invoke `/spec`
- User asks about strategy, scope, ambition, "think bigger", "what should we build" → invoke `/plan-ceo-review`
- User asks to review architecture, lock in the plan, "does this design make sense" → invoke `/plan-eng-review`
- User asks about design system, brand, visual identity, "how should this look" → invoke `/design-consultation`

503
TODOS.md
View File

@ -1,284 +1,5 @@
# TODOS
## Test infrastructure
### ✅ DONE (v1.53.1.0): Rebaseline parity-suite (v1.44.1 → v1.53.0.0)
**What:** `test/parity-suite.test.ts` checked every skill's SKILL.md size against
the frozen `test/fixtures/parity-baseline-v1.44.1.json`. Five planning skills had
crept past the 1.05x ceiling: `plan-ceo-review` (1.052), `plan-eng-review` (1.062),
`plan-design-review` (1.068), `investigate` (1.053), `office-hours` (1.065) — growth
from the brain-aware-planning releases (v1.49v1.52) plus the v1.53 redaction guard.
**Resolved:** Captured a fresh baseline at HEAD via
`bun run scripts/capture-baseline.ts --tag v1.53.0.0` and re-pointed the test at
`test/fixtures/parity-baseline-v1.53.0.0.json`. The per-skill 1.05 ratio is kept, so
future bloat is still caught — only the stale anchor moved. Mirrors the earlier
`skill-size-budget` rebase (v1.44.1 → v1.47.0.0). Historical v1.44.1 / v1.46.0.0 /
v1.47.0.0 baselines retained in `test/fixtures/` for the v1→v2 audit trail. The
captured skill bytes match `origin/main` exactly (the rebasing branch left every
SKILL.md untouched). `bun test` is green again.
## gbrowser memory follow-ups (filed via /plan-eng-review + /codex on the v1.49 leak-fix PR)
These four items came out of the memory-leak investigation that shipped
the `$B memory` diagnostic + the four leak fixes. They were
deliberately deferred from that PR (already 14 commits / ~12 files);
each stands alone and any one could ship independently.
### P2: MV3 extension service worker memory profile
**What:** The `/memory` endpoint snapshot enumerates pages but does
not enumerate the gstack baked-in extension's service-worker target.
A long-running MV3 service worker can leak through retained DOM
snapshots, message ports that never close, alarms that re-arm, and
caches that grow without bound. The diagnostic should call
`Target.getTargets` with a filter for `service_worker` and include
each one in `tabs[]` (or a sibling `serviceWorkers[]` array) with the
same `Performance.getMetrics` data.
**Why:** Codex's outside-voice review on the eng-review surfaced this
class of leak (the extension is part of the gbrowser process tree but
invisible to today's snapshot). Until we surface it, a SW leak shows
up only in the parent process RSS with no per-target attribution.
**Pros:** Closes the per-target attribution gap for the
single-most-likely future leak source (our own extension).
**Cons:** Extension SW lifecycle is asymmetric vs page lifecycle;
auto-attach + filter is one more piece of CDP plumbing.
**Context:** Codex finding #4 on the eng-review outside voice. Not
in scope of the v1.49 PR; deliberately deferred to keep the PR to
the four highest-confidence leak fixes.
**Priority:** P2. **Effort:** M.
---
### P2: Native + GPU memory breakdown in `$B memory`
**What:** `$B memory` shows Bun RSS + per-tab JS heap + Chromium
process tree (PIDs + types + CPU time) but the per-process RSS is
absent — `SystemInfo.getProcessInfo` doesn't expose RSS and the eng
review (D2 USE_CDP) explicitly chose CDP over shelling to `ps`. The
honest next step is to surface what CDP DOES give for the other
memory categories: `Memory.getDOMCounters` per target (node + listener
counts), `SystemInfo.getInfo` for GPU memory, `Memory.getAllTimeSamplingProfile`
for a sampled native estimate.
**Why:** Codex's outside-voice review flagged that
`Performance.getMetrics` misses native memory, GPU memory, video
buffers, Skia, network cache, extension process RSS, and
browser-process RSS — all the categories where a 160 GB leak would
actually live. A diagnostic that misses the categories where the
leak class lives undersells itself.
**Pros:** Per-process category breakdown closes the gap between
"Activity Monitor says 160 GB" and what the diagnostic shows.
**Cons:** Each CDP method has its own quirks; this is a real
implementation pass, not a one-line addition.
**Context:** Codex finding #5 on the eng-review outside voice. Not
in scope of the v1.49 PR; deliberately deferred.
**Priority:** P2. **Effort:** M.
---
### P3: Single-context CDP listener for Network.loadingFinished
**What:** `wirePageEvents` attaches a `page.on('requestfinished')`
listener PER PAGE. The D10 fix removed the body-materialization leak
inside that listener but kept the per-page listener architecture
(7 listeners attached per tab — close, framenavigated, dialog,
console, request, response, requestfinished). The stretch goal from
D10 was to replace the per-page `requestfinished` listener with a
single context-level CDP listener via
`Target.setAutoAttach({autoAttach: true, waitForDebuggerOnStart: false,
flatten: true})` and a browser-wide `Network.loadingFinished` event
handler.
**Why:** Going from N to 1 listener for the request-size capture is
structurally the right architecture and removes one piece of per-tab
memory pressure. The body-materialization fix already addressed the
acute leak; this is the architectural cleanup that prevents similar
leaks in the same class.
**Pros:** One listener per browser instead of one per tab.
**Cons:** `Target.setAutoAttach` plumbing is more code than the
straight per-page listener; the marginal memory win is small on top
of the body-fetch fix that already landed.
**Context:** D10 stretch goal on the eng-review. The minimal-risk
fix shipped in v1.49 (replaces `await res.body()` with
`await req.sizes()`, preserving the per-page listener); this is the
architectural follow-up.
**Priority:** P3. **Effort:** M-L.
---
### P3: Real-Chromium peak-RSS reproducer (periodic tier)
**What:** The gate-tier reproducer
(`browse/test/memory-leak-reproducer.test.ts`) pins the invariant
that `res.body()` is never called during a burst of
`requestfinished` events. It uses a fake page; it does NOT spin up a
real Chromium nor measure peak Bun RSS during a real concurrent fetch
burst. A periodic-tier follow-up should: spin up a real headless
Chromium, navigate to a fixture page that concurrently fetches 500
mixed responses (small JSON, 100 KB images, 10 MB chunked,
gzip-compressed 2 MB), sample `process.memoryUsage().heapUsed` every
100 ms during the burst, assert `peak_heap < 200 MB above baseline`
AND `post-gc_heap < 30 MB above baseline`. Also include a single-tab
WebGL canvas variant that grows to >4 GB and asserts the per-tab RSS
toast fires.
**Why:** Codex flagged that the leak's real failure mode is transient
amplification under concurrent burst, not retained leak — a steady-state
heap test misses it. The fake-page gate-tier test catches the
listener-architecture regression; the periodic real-browser test
catches the actual peak-RSS class.
**Pros:** Closes the "did we actually demonstrate the OOM is fixed"
question with hard numbers. Feeds the ANGLE_B_NUMBERS CHANGELOG
release-summary table.
**Cons:** Periodic tier costs minutes of CI time and money per run;
real-browser memory tests are inherently flaky.
**Context:** Codex outside-voice finding on the eng-review; D7
ANGLE_B_NUMBERS CHANGELOG framing needs this reproducer's numbers
before /ship time.
**Priority:** P3. **Effort:** M.
---
## design daemon: follow-ups (filed v1.45.0.0 via /ship review army)
### ✅ DONE (v1.45.0.0): Tighten daemon test coverage
**Resolved in commit `6b037c55` (same PR):** All 5 test gaps filled before
landing. Per-file totals after: serve 16, daemon 34, daemon-discovery 23,
feedback-roundtrip-daemon 4 = 77 (+10 from initial ship). Specifically:
- Idle-shutdown actually fires (spawn-based, daemon process observed exiting,
state file removed).
- Bare GET polling doesn't reset idle (hammers `/api/progress` in background,
daemon still idles out).
- Idle-with-active-boards extends, then force-shuts after MAX_EXTENSIONS
(with `DESIGN_DAEMON_EXTENSION_MS=1500` + `MAX_EXTENSIONS=2`).
- Concurrent `ensureDaemon()` race converges on one daemon (lock wins).
- Stale-lock reclaim (dead PID succeeds, alive unrelated PID refuses).
- Malformed-JSON + non-object + array-body + missing-html negatives for
`POST /api/boards` and `POST /boards/<id>/api/reload`.
### P3: Minor maintainability nits from /ship review
- `design/src/cli.ts` and `design/src/serve.ts` both have a small `openBrowser`
helper with identical darwin/linux/else branches. Extract a shared
`design/src/open-browser.ts`.
- `design/src/daemon-client.ts:320` (`AbortSignal.timeout(2000)`) and `:357`
(`delay(50)`) use bare numeric literals while sibling timeouts are named
constants. Promote to `SHUTDOWN_POST_TIMEOUT_MS` and `ALIVE_POLL_INTERVAL_MS`.
- `design/src/daemon-state.ts:21` `serverPath` field is written
(`daemon.ts:541`) but never read by production code. Either remove or
document the forensic intent.
### P3: Daemon scope deferred from v1.45.0.0 plan
Originally listed in the plan's "TODOs surfaced for later" section:
- Per-daemon scoped auth tokens (only relevant once a tunnel/share use case appears).
- Optional persistent board history on disk in
`~/.gstack/projects/$SLUG/designs/history/` so submitted boards survive
daemon restarts.
- Windows spawn branch lifted from browse (V1 daemon is macOS + Linux;
Windows users fall back to legacy `--no-daemon` per-process server).
- `$D board list` / `$D board stop <id>` per-board ops CLI (V1 has only
`$D daemon status` / `stop`).
- Cross-worktree daemon attach (conductor sibling worktrees of the same
repo currently each spawn their own daemon — matches browse; revisit
if it causes friction).
---
## browse server: terminal-agent teardown follow-ups (filed v1.41 via /plan-eng-review)
### ✅ DONE (v1.44.0.0): Identity-based terminal-agent kill (replace pkill regex with PID)
**Resolved:** Bundled into the v1.44.0.0 long-lived-sidebar PR as Commit 0.
`browse/src/terminal-agent-control.ts` is the new home for `readAgentRecord`,
`writeAgentRecord`, `clearAgentRecord`, and `killAgentByRecord`. The agent
writes `<stateDir>/terminal-agent-pid` (JSON `{pid, gen, startedAt}`) at boot
and clears it on SIGTERM/SIGINT. `cli.ts` and `server.ts` both route through
`killAgentByRecord` instead of `pkill -f terminal-agent\.ts`. The new
`browse/test/terminal-agent-pid-identity.test.ts` is the static-grep tripwire
that fails CI if `pkill ... terminal-agent` or `spawnSync('pkill', ...)`
reappears in any source file.
---
### P3: shutdown() reads module-level `config`, not `cfg.config` (composition gap)
**What:** `browse/src/server.ts:shutdown()` reads `path.dirname(config.stateFile)`
where `config` is the module-level value resolved at import time, not the
`cfg.config` passed into `buildFetchHandler`. Same gap applies to
`cleanSingletonLocks(resolveChromiumProfile())` at server.ts:1298 — should
read `cfg.chromiumProfile`.
**Why:** Embedders today happen to share state-dir resolution with the CLI
(both go through `resolveConfig()` against the same env), so this doesn't
bite. But if an embedder ever passes a divergent `cfg.config` (e.g., a test
harness pointing at a temp dir), shutdown will operate on the wrong paths.
The `ownsTerminalAgent` flag exposes the problem without fixing it.
**Pros:** Closes the embedder-composition story properly. Pairs with
`cfg.chromiumProfile` to give a single coherent "this factory teardown
respects cfg" contract.
**Cons:** Pre-existing — not a regression. Two call sites today (1285 for
terminal files, 1298 for chromium locks). Threading `cfg.config` and
`cfg.chromiumProfile` into the right closures is straightforward but
broader than the v1.41 fix.
**Context:** Flagged by both Codex and Claude subagent in the /plan-eng-review
dual voices. Documented as out-of-scope in the v1.41 plan; same shape as the
`chromiumProfile` PR-body note to the gbrowser team.
**Depends on:** None.
---
### P3: Ownership-object refactor if a 4th caller-owned teardown gate appears
**What:** Today `ServerConfig` has three caller-owned teardown gates:
`xvfb?` (presence ⇒ don't close), `proxyBridge?` (same), and now
`ownsTerminalAgent` (explicit boolean). If a 4th gate appears, collapse to
`cfg.callerOwns?: Set<'terminalAgent' | 'xvfb' | 'proxyBridge' | ...>` or
similar.
**Why:** Three independent flags is below the refactor threshold — each
field has clear, distinct semantics and the JSDoc voice is consistent. A
fourth tips the cost balance: the per-field surface gets noisy, and
"what does this factory own?" becomes a question you have to ask of three
or four scattered fields instead of one explicit set.
**Pros:** Single source of truth for "what gstack tears down". Trivial
extension surface for future caller-owned resources. Easier to assert in
tests ("the set should contain X, not Y").
**Cons:** Premature today. The polarity-inversion note in the
`ownsTerminalAgent` JSDoc only hurts a little — it's one anomaly, not a
pattern. Refactoring now to an ownership object would touch every embedder.
**Context:** Recommended by Claude subagent during /plan-ceo-review dual
voice (autoplan). Trigger: a 4th caller-owned teardown gate in this same
`ServerConfig` shape.
**Depends on:** A 4th gate to motivate the refactor.
---
## /sync-gbrain memory stage perf follow-up
### P2: Investigate `gbrain import` perf on large staging dirs
@ -736,24 +457,7 @@ reads it yet.
**Effort:** L (human: ~1 week / CC: ~4h)
**Priority:** P0
**Depends on:** **90+ days of v1 dogfood stable across 3+ skills** (per
`docs/designs/PLAN_TUNING_V0.md` §"Deferred to v2" E1 acceptance criteria).
Distinct from the lighter-weight diversity-display gate
(`sample_size >= 20 AND skills_covered >= 3 AND question_ids_covered >= 8
AND days_span >= 7`) used in /plan-tune to render the inferred column —
display is a UI affordance, promotion to E1 needs a much higher bar
because behavioral adaptation is consequential and hard to revert. Prior
versions of this card cited "2+ weeks" which conflicted with V0 — V0 wins.
**Substrate risk (Codex outside-voice, Phase A review 2026-05-26):** Generated
skill prose is agent-compliance-based. Tests can verify templates contain the
right reads of `~/.gstack/developer-profile.json` and the right decision
points, but tests cannot prove agents obey them at runtime. E1 ships
adaptations as **advisory annotations on AskUserQuestion recommendations**
("Recommended via your profile: <choice>") until there's a hard runtime
execution path. Do NOT gate any AUTO_DECIDE on inferred profile alone in v1
of E1; explicit per-question preferences remain the only AUTO_DECIDE
source.
**Depends on:** 2+ weeks of v1 dogfood, profile diversity check passing.
### E3 — `/plan-tune narrative` + `/plan-tune vibe`
@ -1939,49 +1643,6 @@ Shipped in v0.6.5. TemplateContext in gen-skill-docs.ts bakes skill name into pr
**Priority:** P2
**Depends on:** CDP patches proving the value of anti-bot stealth first
## /spec follow-ups (deferred from v1.47.0.0 via /plan-ceo-review SCOPE EXPANSION)
### P2: `/spec --epic` mode (parent issue + child issues + dependency graph)
**Priority:** P2
**What:** Add `--epic` flag that produces an Epic issue (parent) plus N child issues with explicit dependency graph and topological order. Emits multiple `gh issue create` calls with parent linkage in child bodies.
**Why:** Multi-week initiatives often span 3-5 specs that share context but ship sequentially. Today `/spec --epic` would let users author the full initiative in one session and file all linked issues atomically. The Epic template already exists in `spec/SKILL.md.tmpl` (carried over from PR #1698); only the flag routing + multi-issue `gh` orchestration is missing.
**Pros:**
- Closes the multi-issue workflow gap that `/spec` v1 doesn't cover.
- Parent + child linkage means project boards show the full initiative at-a-glance.
- Composes cleanly with existing `--execute` (spawn an agent on the parent epic; agent files children as it works).
**Cons:**
- More gh API surface (one create per child, parent-link edit pass).
- Dependency-graph rendering in markdown is fiddly across GitHub vs GitLab renderers.
**Context:** Considered in `/plan-ceo-review` SCOPE EXPANSION (D5), deferred 2026-05-25 in favor of shipping the 5 critical-path expansions (--execute, --dedupe, archive, quality gate, --audit). Re-evaluate once v1.47 ships and we see how often users hit "this should be 3 issues" in real /spec sessions.
**Depends on:** v1.47.0.0 `/spec` lands first; need real usage data to calibrate the multi-issue surface.
### P3: `/spec --dedupe` semantic matching (LLM-based) for v1.1
**Priority:** P3
**What:** Upgrade `--dedupe`'s string match against `gh issue list --search` to LLM-based semantic similarity. Today's v1 picks string overlap on title keywords; semantic match would catch "the sidebar terminal flakes on reload" matching an existing issue titled "PTY reconnect fails after extension restart" where keyword overlap is zero.
**Why:** String match has high precision but low recall — it misses near-duplicates with different vocabulary. LLM semantic match catches more dupes but costs ~$0.01-0.05 per spec dispatch and adds 5-10s latency.
**Pros:**
- Catches dupes string match misses.
- One more reason `/spec` is more useful than freehand authoring.
**Cons:**
- Paid + slower. Most v1 users probably don't hit enough false-negatives to justify the cost.
- Adds another LLM-judged decision to a skill that already has the quality gate.
**Context:** Considered in `/plan-ceo-review` build-time decisions; chose string match for v1 to keep the dedupe path free + fast. Revisit if v1 produces a meaningful false-negative rate in real use.
**Depends on:** v1.47.0.0 ships; gather real false-negative data from the v1 string matcher.
## Completed
### Slim preamble + real-PTY plan-mode E2E harness (v1.13.1.0)
@ -2089,165 +1750,3 @@ Shipped in v0.6.5. TemplateContext in gen-skill-docs.ts bakes skill name into pr
### Auto-upgrade mode + smart update check
- Config CLI (`bin/gstack-config`), auto-upgrade via `~/.gstack/config.yaml`, 12h cache TTL, exponential snooze backoff (24h→48h→1wk), "never ask again" option, vendored copy sync on upgrade
**Completed:** v0.3.8
---
## Brain-aware planning follow-ups (filed v1.48.0.0 via /plan-ceo-review + /plan-eng-review)
These are the deferred cherry-picks (E2/E3/E4) from the v1.48 brain-aware
planning plan at `~/.claude/plans/hm-interesting-well-why-dapper-eagle.md`.
The foundation (Phase 0 entity model + Phase 0.5 cache + Phase 1 preflight
+ Phase 1.5 trust policy + Phase 2 write-back scaffolding) ships in
v1.48.0.0. These follow-ups extend it.
### P2: /gstack-reflect nightly synthesis skill (E2)
**What:** Scheduled skill that reads weekly `gstack/skill-run` + takes +
`get_recent_salience` and synthesizes a `gstack/insight` page surfaced at
next skill preflight.
**Why:** Cross-time pattern detection is the compounding move. "You ran 4
plan-ceo on infra this week, 0 on product — is product work getting
starved?" surfaces patterns the user wouldn't notice.
**Pros:** Brain compounds across TIME, not just across skills. Patterns
become actionable.
**Cons:** "You're starving product work" is high-judgment territory; needs
opt-out per project, careful insight templates.
**Context:** Deferred from v1.48.0.0 cherry-pick (D4) — wait 4-6 weeks for
real `gstack/skill-run` data to accumulate before designing the reflection
layer against real patterns instead of imagined ones.
**Effort:** L (human ~1-2 days, CC ~4-6h)
**Depends on:** Phase 0 (gstack/skill-run page type from v1.48.0.0) +
~6 weeks of accumulated data
### P3: Cross-machine brain-cache sync (E3)
**What:** Push compressed digests through the gstack-brain-sync git pipeline
so the brain-cache survives moving between Macs / Conductor workspaces.
**Why:** Eliminates the cold-miss tax on every new machine (~1-2s once per
machine per day).
**Pros:** Instant warm cache on new machines.
**Cons:** Cache poisoning risk if not designed carefully (hash invariants,
endpoint-binding, conflict resolution).
**Context:** Deferred from v1.48.0.0 cherry-pick (D5) — single-machine
cache is fine for V1; correctness risk needs its own design pass.
**Effort:** M (human ~4h, CC ~30min)
**Depends on:** Brain-cache layer from v1.48.0.0
### P3: /gstack-onboarding dedicated skill (E4)
**What:** Guided 5-minute setup skill for new gstack installs: walks user
through reading CLAUDE.md + README + recent commits to build `gstack/product`
and active goals with explicit AUQs.
**Why:** Better UX than the inline bootstrap (which only fires when a
planning skill is invoked).
**Pros:** Cleaner cold-start, explicit ceremony.
**Cons:** Inline bootstrap (in scope for v1.48) already covers the
cold-start path adequately.
**Context:** Deferred from v1.48.0.0 cherry-pick (D6) — observe inline
bootstrap performance first; add dedicated skill if friction is real.
**Effort:** S (human ~2h, CC ~15min)
**Depends on:** Inline bootstrap subcommand from v1.48.0.0
### P2: Upstream gbrain takes_add + takes_resolve MCP ops
**What:** Add `mcp__gbrain__takes_add` and `mcp__gbrain__takes_resolve`
ops in `~/git/gbrain/src/core/operations.ts`. Extract the markdown-fence
mirror logic from `commands/takes.ts:570` into a reusable
`engine.resolveTake()` helper.
**Why:** Unlocks Phase 2 calibration write-back without the fence-block
fallback. ~150 LOC. Already on gbrain's v0.31.x roadmap.
**Pros:** Clean Phase 2 path, removes the "fall back to put_page" smell.
**Cons:** Lives in upstream gbrain repo, not helsinki — separate PR.
**Context:** Phase 2 write-back is already wired in v1.48.0.0 behind the
BRAIN_CALIBRATION_WRITEBACK feature flag (default off). Flag flips to
true once upstream gbrain ships these ops. ~50 LOC follow-up in
helsinki to swap the fallback for the preferred op.
**Effort:** S (human ~1d, CC ~1h) in gbrain repo; trivial wire-up in
helsinki.
**Depends on:** None (parallel-track from v1.48.0.0)
### P3: Background-refresh hook supervision
**What:** Codex outside-voice raised that "background refresh at skill END"
is hand-wavy. Add proper process supervision: PID file, timeout, failure
log, cross-platform spawn.
**Why:** Current implementation backgrounds with `&` which works but
leaves no observability when a refresh fails.
**Context:** Deferred from v1.48.0.0 codex tension T3. Stays low priority
until users report stale digests where a background refresh silently
failed.
**Effort:** S (human ~2h, CC ~20min)
### P2: Re-verify calibration takes when gbrain v0.42+ lands
**What:** When upstream gbrain ships `takes_add` MCP op and we flip
`BRAIN_CALIBRATION_WRITEBACK` from FALSE to TRUE, re-run the manual
probe in `docs/gbrain-write-surfaces.md` against `/office-hours` and
confirm `gbrain takes_list` surfaces a `kind=bet` entry with the
expected weight (0.9 for office-hours, per
`scripts/brain-cache-spec.ts:151-157`).
**Why:** Today the calibration take path falls back to writing inside a
`gbrain put` fence block because `takes_add` isn't available yet. Once
v0.42+ ships, the agent will call `takes_add` directly — we should
confirm the new path actually persists a queryable take.
**Context:** v1.50.0.0 plan §"NOT in scope". The fence-block fallback
test (`test/takes-fence-fallback.test.ts`) covers wiring for both paths;
this TODO is about live verification of the preferred path when it
becomes available.
**Effort:** XS (human ~15min, CC ~5min)
**Depends on:** Upstream gbrain v0.42+ release shipping `takes_add` MCP
op (separate TODO above).
### P2: Extend brain-writeback E2E to the other 4 planning skills
**What:** `test/skill-e2e-office-hours-brain-writeback.test.ts` covers
the brain-writeback path for `/office-hours` only. Adding parallel
tests for `/plan-ceo-review`, `/plan-eng-review`, `/plan-design-review`,
and `/plan-devex-review` would bring per-skill agent-obedience coverage
to parity with the resolver unit test
(`test/resolvers-gbrain-save-results.test.ts`, which covers wiring for
all 5).
**Why:** The resolver test proves the right instructions get emitted;
the E2E proves the agent actually obeys. Today we only have that
end-to-end signal for one of five planning skills.
**Context:** v1.50.0.0 plan §"NOT in scope". Extract `makeFakeGbrain`
into `test/helpers/fake-gbrain.ts` when the second consumer arrives
(YAGNI for one consumer today).
**Effort:** S (human ~1d, CC ~1h). Periodic-tier (~$2-4 total for 4
runs).
**Depends on:** None.

View File

@ -57,9 +57,7 @@ Best for: you'd rather click through supabase.com yourself than paste a PAT.
Best for: try-it-first, no account, no cloud, no sharing. Or a dedicated "this Mac's brain" that stays isolated from any cloud agent.
**What happens:** `gbrain init --pglite`. Brain lives at `~/.gbrain/brain.pglite`. No network calls for the init itself. Done in 30 seconds.
**Embedding model.** When `VOYAGE_API_KEY` is set, gstack inits PGLite with `voyage-code-3` (1024-dim) — Voyage's code-specialized embedding model, which beats their general-purpose `voyage-4-large` and OpenAI `text-embedding-3-large` head-to-head on this codebase's symbol queries. Without `VOYAGE_API_KEY`, gbrain auto-selects (OpenAI 1536-dim when `OPENAI_API_KEY` is present, else falls down its provider chain). Either way, the embeddings call out to the chosen provider's API during sync — set the key for the provider you want before running `/sync-gbrain`.
**What happens:** `gbrain init --pglite`. Brain lives at `~/.gbrain/brain.pglite`. No network calls. Done in 30 seconds.
This is the best first choice if you just want to see what gbrain feels like before committing to cloud. You can always migrate later with `/setup-gbrain --switch`.
@ -84,7 +82,7 @@ By default the skill asks "Give Claude Code a typed tool surface for gbrain?" If
claude mcp add gbrain -- gbrain serve
```
That registers gbrain's stdio MCP server with Claude Code. Now `gbrain search`, `gbrain put`, `gbrain get`, etc. show up as first-class tools in every session, not bash shell-outs.
That registers gbrain's stdio MCP server with Claude Code. Now `gbrain search`, `gbrain put_page`, `gbrain get_page`, etc. show up as first-class tools in every session, not bash shell-outs.
**If `claude` is not on PATH**, the skill skips MCP registration gracefully with a manual-register hint. The CLI resolver still works from any skill that shells out to `gbrain` — MCP is an upgrade, not a prerequisite.
@ -136,7 +134,7 @@ The skill runs three stages — code, memory, brain-sync — independently. A fa
1. **Pre-flight.** Checks `gbrain_local_status` (the local engine's health). If the engine is `broken-db` or `broken-config`, the skill STOPs with a remediation menu — it refuses to silently degrade. If the local engine is missing and you're in remote-MCP mode (Path 4), the code stage SKIPs cleanly and only brain-sync runs.
2. **Code stage.** Registers the cwd as a federated source via `gbrain sources add`, writes a `.gbrain-source` pin file in the repo root (kubectl-style context — every worktree gets its own pin, so Conductor sibling worktrees don't collide), runs `gbrain sync --strategy code`.
3. **Memory stage.** Stages your `~/.gstack/` transcripts + curated memory. In local-stdio MCP mode, ingests into the local engine. In remote-http MCP mode, persists staged markdown to `~/.gstack/transcripts/run-<pid>-<ts>/` for the remote brain admin's pull pipeline. The ingest timeout is 30 minutes by default; raise it for a big brain with `GSTACK_INGEST_TIMEOUT_MS` (accepts 1 min24h). On timeout the gbrain import checkpoint is preserved, so the next `/sync-gbrain` resumes instead of starting over.
3. **Memory stage.** Stages your `~/.gstack/` transcripts + curated memory. In local-stdio MCP mode, ingests into the local engine. In remote-http MCP mode, persists staged markdown to `~/.gstack/transcripts/run-<pid>-<ts>/` for the remote brain admin's pull pipeline.
4. **Brain-sync stage.** Pushes curated artifacts (plans, designs, retros) to your private artifacts repo if you have one configured.
5. **CLAUDE.md guidance.** Capability-checks the round-trip (write a page → search → find it). If green, writes the `## GBrain Search Guidance` block to your project's CLAUDE.md. If red, REMOVES the block — the agent should never be told to use a tool that isn't installed.
@ -226,8 +224,8 @@ Gbrain itself ships with these that gstack wraps:
| `gbrain migrate --to supabase --url ...` | Move a PGLite brain to Supabase (lossless, preserves source as backup) |
| `gbrain migrate --to pglite` | Reverse migration |
| `gbrain search "query"` | Search the brain |
| `gbrain put "<slug>" --content "<markdown-with-frontmatter>"` | Write a page (title/tags go in YAML frontmatter inside `--content`) |
| `gbrain get "<slug>"` | Fetch a page |
| `gbrain put_page --title "..." --tags "a,b" <<<"content"` | Write a page |
| `gbrain get_page "<slug>"` | Fetch a page |
| `gbrain serve` | Start the MCP stdio server (used by `claude mcp add`) |
### Config files + state
@ -253,8 +251,7 @@ Gbrain itself ships with these that gstack wraps:
| `SUPABASE_API_BASE` | `gstack-gbrain-supabase-provision` | Override the Management API host. Used by tests to point at a mock server. |
| `GBRAIN_INSTALL_DIR` | `gstack-gbrain-install` | Override default install path (`~/gbrain`) |
| `GSTACK_HOME` | every bin helper | Override `~/.gstack` state dir. Heavy test use. |
| `VOYAGE_API_KEY` | `gbrain embed` subprocess; gstack PGLite init | When set, gstack inits PGLite with `voyage-code-3` (1024-dim), Voyage's code-specialized embedding model. Beats `voyage-4-large` and OpenAI `text-embedding-3-large` head-to-head on this codebase's symbol queries. See CHANGELOG v1.43.1.0 for the A/B numbers. |
| `OPENAI_API_KEY` | `gbrain embed` subprocess | Used for embeddings during `gbrain sync` / `/sync-gbrain` when `VOYAGE_API_KEY` is not set (gbrain's auto-selected fallback, `text-embedding-3-large` 1536-dim). Without either key, pages are imported structurally (symbol tables, chunks) but semantic search degrades — you'll see `[gbrain] embedding failed for code file ...` in the sync log. |
| `OPENAI_API_KEY` | `gbrain embed` subprocess | Required for embeddings during `gbrain sync` / `/sync-gbrain`. Without it, pages are imported structurally (symbol tables, chunks) but semantic search degrades — you'll see `[gbrain] embedding failed for code file ... OpenAI embedding requires OPENAI_API_KEY` in the sync log. |
| `ANTHROPIC_API_KEY` | `claude-agent-sdk`, paid evals | Required for `bun run test:evals` and any direct `query()` call against Claude. |
| `GSTACK_OPENAI_API_KEY` | `lib/conductor-env-shim.ts` | Conductor-injected fallback. Promoted to `OPENAI_API_KEY` when the canonical name is empty. |
| `GSTACK_ANTHROPIC_API_KEY` | `lib/conductor-env-shim.ts` | Same pattern as above for Anthropic. |
@ -348,7 +345,7 @@ Embeddings probably failed during import. Symbol queries (`code-def`, `code-refs
[gbrain] embedding failed for code file <name>: OpenAI embedding requires OPENAI_API_KEY
```
The fix is to put a provider API key in the process env before re-running. `VOYAGE_API_KEY` is preferred for code (gstack defaults PGLite to `voyage-code-3` when set); otherwise `OPENAI_API_KEY` falls back to `text-embedding-3-large`. On a bare Mac shell, source the key from `~/.zshrc` before calling. In Conductor, the `lib/conductor-env-shim.ts` shim promotes `GSTACK_ANTHROPIC_API_KEY` / `GSTACK_OPENAI_API_KEY` to their canonical names automatically; for `VOYAGE_API_KEY`, set it directly in your Conductor workspace env. Re-run `/sync-gbrain --code-only` to backfill embeddings on already-imported pages.
The fix is to put `OPENAI_API_KEY` in the process env before re-running. On a bare Mac shell, source it from `~/.zshrc` before calling. In Conductor, set `GSTACK_OPENAI_API_KEY` at the workspace level — `lib/conductor-env-shim.ts` promotes it to canonical automatically when imported. Re-run `/sync-gbrain --code-only` to backfill embeddings on already-imported pages.
### `gbrain sync` blocked at a commit hash — `FILE_TOO_LARGE`
@ -379,7 +376,7 @@ Another gstack session in a sibling Conductor workspace may be holding a lock on
## Related skills + next steps
- `/health` — includes a GBrain dimension (doctor status, sync queue depth, last-push age) in its 0-10 composite score. The dimension is omitted when gbrain isn't installed; running `/health` on a non-gbrain machine doesn't penalize that choice.
- `/gstack-upgrade` — keeps gstack itself up to date. Does NOT upgrade gbrain independently. gbrain installs at the latest HEAD by default; to refresh it, `git pull` in your gbrain clone (default `~/gbrain`) and re-run `/setup-gbrain`. Pin a specific commit with `gstack-gbrain-install --pinned-commit <sha>` if you need reproducibility. Installs below the minimum tested version are refused.
- `/gstack-upgrade` — keeps gstack itself up to date. Does NOT upgrade gbrain independently. To bump gbrain, update `PINNED_COMMIT` in `bin/gstack-gbrain-install` and re-run `/setup-gbrain`.
- `/retro` — weekly retrospective pulls learnings and plans from your gbrain when memory sync is on, letting the retro reference cross-machine history.
Run `/setup-gbrain` and see what sticks.

View File

@ -1 +1 @@
1.55.1.0
1.40.0.0

View File

@ -2,7 +2,16 @@
name: autoplan
preamble-tier: 3
version: 1.0.0
description: Auto-review pipeline — reads the full CEO, design, eng, and DX review skills from disk and runs them sequentially with auto-decisions using 6 decision principles. (gstack)
description: |
Auto-review pipeline — reads the full CEO, design, eng, and DX review skills from disk
and runs them sequentially with auto-decisions using 6 decision principles. Surfaces
taste decisions (close approaches, borderline scope, codex disagreements) at a final
approval gate. One command, fully reviewed plan out.
Use when asked to "auto review", "autoplan", "run all reviews", "review this plan
automatically", or "make the decisions for me".
Proactively suggest when the user has a plan file and wants to run the full review
gauntlet without answering 15-30 intermediate questions. (gstack)
Voice triggers (speech-to-text aliases): "auto plan", "automatic review".
benefits-from: [office-hours]
triggers:
- run all reviews
@ -21,19 +30,6 @@ allowed-tools:
<!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly -->
<!-- Regenerate: bun run gen:skill-docs -->
## When to invoke this skill
Surfaces
taste decisions (close approaches, borderline scope, codex disagreements) at a final
approval gate. One command, fully reviewed plan out.
Use when asked to "auto review", "autoplan", "run all reviews", "review this plan
automatically", or "make the decisions for me".
Proactively suggest when the user has a plan file and wants to run the full review
gauntlet without answering 15-30 intermediate questions.
Voice triggers (speech-to-text aliases): "auto plan", "automatic review".
## Preamble (run first)
```bash
@ -69,7 +65,7 @@ _QUESTION_TUNING=$(~/.claude/skills/gstack/bin/gstack-config get question_tuning
echo "QUESTION_TUNING: $_QUESTION_TUNING"
mkdir -p ~/.gstack/analytics
if [ "$_TEL" != "off" ]; then
echo '{"skill":"autoplan","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(_repo=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null | tr -cd 'a-zA-Z0-9._-'); echo "${_repo:-unknown}")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
echo '{"skill":"autoplan","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
fi
for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do
if [ -f "$_PF" ]; then
@ -111,19 +107,6 @@ _CHECKPOINT_MODE=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_mode
_CHECKPOINT_PUSH=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_push 2>/dev/null || echo "false")
echo "CHECKPOINT_MODE: $_CHECKPOINT_MODE"
echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH"
# Plan-mode hint for skills like /spec that branch behavior on plan-mode state.
# Claude Code exposes plan mode via system reminders; we detect best-effort
# from CLAUDE_PLAN_FILE (set by the harness when plan mode is active) and
# fall back to "inactive". Codex hosts and Claude execution mode both end up
# inactive, which is the safe default (defaults to file+execute pipeline).
if [ -n "${CLAUDE_PLAN_FILE:-}${GSTACK_PLAN_MODE_FORCE:-}" ]; then
export GSTACK_PLAN_MODE="active"
elif [ "${GSTACK_PLAN_MODE:-}" = "active" ]; then
export GSTACK_PLAN_MODE="active"
else
export GSTACK_PLAN_MODE="inactive"
fi
echo "GSTACK_PLAN_MODE: $GSTACK_PLAN_MODE"
[ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true
```
@ -179,7 +162,7 @@ Only run `open` if yes. Always run `touch`.
If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: ask telemetry once via AskUserQuestion:
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code or file paths. Your repo name is recorded locally only and stripped before any upload.
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code, file paths, or repo names.
Options:
- A) Help gstack get better! (recommended)
@ -255,7 +238,6 @@ Key routing rules:
- Ship/deploy/PR → invoke /ship or /land-and-deploy
- Save progress → invoke /context-save
- Resume context → invoke /context-restore
- Author a backlog-ready spec/issue → invoke /spec
```
Then commit the change: `git add CLAUDE.md && git commit -m "chore: add gstack skill routing rules to CLAUDE.md"`
@ -342,36 +324,7 @@ Effort both-scales: when an option involves effort, label both human-team and CC
Net line closes the tradeoff. Per-skill instructions may add stricter rules.
### Handling 5+ options — split, never drop
AskUserQuestion caps every call at **4 options**. With 5+ real options, NEVER
drop, merge, or silently defer one to fit. Pick a compliant shape:
- **Batch into ≤4-groups** — for coherent alternatives (e.g. version bumps,
layout variants). One call, 5th surfaced only if first 4 don't fit.
- **Split per-option** — for independent scope items (e.g. "ship E1..E6?").
Fire N sequential calls, one per option. Default to this when unsure.
Per-option call shape: `D<N>.k` header (e.g. D3.1..D3.5), ELI10 per option,
Recommendation, kind-note (no completeness score — Include/Defer/Cut/Hold are
decision actions), and 4 buckets:
**A) Include**, **B) Defer**, **C) Cut**, **D) Hold** (stop chain, discuss).
After the chain, fire `D<N>.final` to validate the assembled set (reprompt
dependency conflicts) and confirm shipping it. Use `D<N>.revise-<k>` to
revise one option without re-running the chain.
For N>6, fire a `D<N>.0` meta-AskUserQuestion first (proceed / narrow / batch).
question_ids for split chains: `<skill>-split-<option-slug>` (kebab-case ASCII,
≤64 chars, `-2`/`-3` suffix on collision). The runtime checker
(`bin/gstack-question-preference`) refuses `never-ask` on any `*-split-*` id,
so split chains are never AUTO_DECIDE-eligible — the user's option set is sacred.
**Full rule + worked examples + Hold/dependency semantics:** see
`docs/askuserquestion-split.md` in the gstack repo. Read on demand when N>4.
**Non-ASCII characters — write directly, never \u-escape.** When any
12. **Non-ASCII characters — write directly, never \u-escape.** When any
string field (question, option label, option description) contains
Chinese (繁體/簡體), Japanese, Korean, or other non-ASCII text, emit
the literal UTF-8 characters in the JSON string. **Never escape them
@ -404,9 +357,6 @@ Before calling AskUserQuestion, verify:
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
- [ ] If a per-option Hold fires, you stopped the chain immediately (didn't queue)
## Artifacts Sync (skill start)
@ -606,7 +556,84 @@ Applies to AskUserQuestion, user replies, and findings. AskUserQuestion Format i
- User-turn override wins: if the current message asks for terse / no explanations / just the answer, skip this section.
- Terse mode (EXPLAIN_LEVEL: terse): no glosses, no outcome-framing layer, shorter responses.
Curated jargon list lives at `~/.claude/skills/gstack/scripts/jargon-list.json` (80+ terms). On the first jargon term you encounter this session, Read that file once; treat the `terms` array as the canonical list. The list is repo-owned and may grow between releases.
Jargon list, gloss on first use if the term appears:
- idempotent
- idempotency
- race condition
- deadlock
- cyclomatic complexity
- N+1
- N+1 query
- backpressure
- memoization
- eventual consistency
- CAP theorem
- CORS
- CSRF
- XSS
- SQL injection
- prompt injection
- DDoS
- rate limit
- throttle
- circuit breaker
- load balancer
- reverse proxy
- SSR
- CSR
- hydration
- tree-shaking
- bundle splitting
- code splitting
- hot reload
- tombstone
- soft delete
- cascade delete
- foreign key
- composite index
- covering index
- OLTP
- OLAP
- sharding
- replication lag
- quorum
- two-phase commit
- saga
- outbox pattern
- inbox pattern
- optimistic locking
- pessimistic locking
- thundering herd
- cache stampede
- bloom filter
- consistent hashing
- virtual DOM
- reconciliation
- closure
- hoisting
- tail call
- GIL
- zero-copy
- mmap
- cold start
- warm start
- green-blue deploy
- canary deploy
- feature flag
- kill switch
- dead letter queue
- fan-out
- fan-in
- debounce
- throttle (UI)
- hydration mismatch
- memory leak
- GC pause
- heap fragmentation
- stack overflow
- null pointer
- dangling pointer
- buffer overflow
## Completeness Principle — Boil the Lake
@ -654,11 +681,7 @@ If you are looping on the same diagnostic, same file, or failed fix variants, ST
Before each AskUserQuestion, choose `question_id` from `scripts/question-registry.ts` or `{skill}-{slug}`, then run `~/.claude/skills/gstack/bin/gstack-question-preference --check "<id>"`. `AUTO_DECIDE` means choose the recommended option and say "Auto-decided [summary] → [option] (your preference). Change with /plan-tune." `ASK_NORMALLY` means ask.
**Embed the question_id as a marker in the question text** so hooks can identify it deterministically (plan-tune cathedral T14 / D18 progressive markers). Append `<gstack-qid:{question_id}>` somewhere in the rendered question (the leading line or trailing line is fine; the marker doesn't render visibly to the user when wrapped in HTML-style angle brackets, but the hook strips it). Without the marker the PreToolUse enforcement hook treats the AUQ as observed-only and never auto-decides — so always include it when the question matches a registered `question_id`.
**Embed the option recommendation via the `(recommended)` label suffix** on exactly one option per AUQ. The PreToolUse hook parses `(recommended)` first, falls back to "Recommendation: X" prose, and refuses to auto-decide if ambiguous. Two `(recommended)` labels = refuse.
After answer, log best-effort (PostToolUse hook also captures deterministically when installed; dedup on (source, tool_use_id) handles double-writes):
After answer, log best-effort:
```bash
~/.claude/skills/gstack/bin/gstack-question-log '{"skill":"autoplan","question_id":"<id>","question_summary":"<short>","category":"<approval|clarification|routing|cherry-pick|feedback-loop>","door_type":"<one-way|two-way>","options_count":N,"user_choice":"<key>","recommended":"<key>","session_id":"'"$_SESSION_ID"'"}' 2>/dev/null || true
```

View File

@ -2,7 +2,14 @@
name: benchmark-models
preamble-tier: 1
version: 1.0.0
description: Cross-model benchmark for gstack skills. (gstack)
description: |
Cross-model benchmark for gstack skills. Runs the same prompt through Claude,
GPT (via Codex CLI), and Gemini side-by-side — compares latency, tokens, cost,
and optionally quality via LLM judge. Answers "which model is actually best
for this skill?" with data instead of vibes. Separate from /benchmark, which
measures web page performance. Use when: "benchmark models", "compare models",
"which model is best for X", "cross-model comparison", "model shootout". (gstack)
Voice triggers (speech-to-text aliases): "compare models", "model shootout", "which model is best".
triggers:
- cross model benchmark
- compare claude gpt gemini
@ -16,18 +23,6 @@ allowed-tools:
<!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly -->
<!-- Regenerate: bun run gen:skill-docs -->
## When to invoke this skill
Runs the same prompt through Claude,
GPT (via Codex CLI), and Gemini side-by-side — compares latency, tokens, cost,
and optionally quality via LLM judge. Answers "which model is actually best
for this skill?" with data instead of vibes. Separate from /benchmark, which
measures web page performance. Use when: "benchmark models", "compare models",
"which model is best for X", "cross-model comparison", "model shootout".
Voice triggers (speech-to-text aliases): "compare models", "model shootout", "which model is best".
## Preamble (run first)
```bash
@ -63,7 +58,7 @@ _QUESTION_TUNING=$(~/.claude/skills/gstack/bin/gstack-config get question_tuning
echo "QUESTION_TUNING: $_QUESTION_TUNING"
mkdir -p ~/.gstack/analytics
if [ "$_TEL" != "off" ]; then
echo '{"skill":"benchmark-models","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(_repo=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null | tr -cd 'a-zA-Z0-9._-'); echo "${_repo:-unknown}")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
echo '{"skill":"benchmark-models","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
fi
for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do
if [ -f "$_PF" ]; then
@ -105,19 +100,6 @@ _CHECKPOINT_MODE=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_mode
_CHECKPOINT_PUSH=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_push 2>/dev/null || echo "false")
echo "CHECKPOINT_MODE: $_CHECKPOINT_MODE"
echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH"
# Plan-mode hint for skills like /spec that branch behavior on plan-mode state.
# Claude Code exposes plan mode via system reminders; we detect best-effort
# from CLAUDE_PLAN_FILE (set by the harness when plan mode is active) and
# fall back to "inactive". Codex hosts and Claude execution mode both end up
# inactive, which is the safe default (defaults to file+execute pipeline).
if [ -n "${CLAUDE_PLAN_FILE:-}${GSTACK_PLAN_MODE_FORCE:-}" ]; then
export GSTACK_PLAN_MODE="active"
elif [ "${GSTACK_PLAN_MODE:-}" = "active" ]; then
export GSTACK_PLAN_MODE="active"
else
export GSTACK_PLAN_MODE="inactive"
fi
echo "GSTACK_PLAN_MODE: $GSTACK_PLAN_MODE"
[ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true
```
@ -173,7 +155,7 @@ Only run `open` if yes. Always run `touch`.
If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: ask telemetry once via AskUserQuestion:
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code or file paths. Your repo name is recorded locally only and stripped before any upload.
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code, file paths, or repo names.
Options:
- A) Help gstack get better! (recommended)
@ -249,7 +231,6 @@ Key routing rules:
- Ship/deploy/PR → invoke /ship or /land-and-deploy
- Save progress → invoke /context-save
- Resume context → invoke /context-restore
- Author a backlog-ready spec/issue → invoke /spec
```
Then commit the change: `git add CLAUDE.md && git commit -m "chore: add gstack skill routing rules to CLAUDE.md"`

View File

@ -2,7 +2,13 @@
name: benchmark
preamble-tier: 1
version: 1.0.0
description: Performance regression detection using the browse daemon. (gstack)
description: |
Performance regression detection using the browse daemon. Establishes
baselines for page load times, Core Web Vitals, and resource sizes.
Compares before/after on every PR. Tracks performance trends over time.
Use when: "performance", "benchmark", "page speed", "lighthouse", "web vitals",
"bundle size", "load time". (gstack)
Voice triggers (speech-to-text aliases): "speed test", "check performance".
triggers:
- performance benchmark
- check page speed
@ -17,17 +23,6 @@ allowed-tools:
<!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly -->
<!-- Regenerate: bun run gen:skill-docs -->
## When to invoke this skill
Establishes
baselines for page load times, Core Web Vitals, and resource sizes.
Compares before/after on every PR. Tracks performance trends over time.
Use when: "performance", "benchmark", "page speed", "lighthouse", "web vitals",
"bundle size", "load time".
Voice triggers (speech-to-text aliases): "speed test", "check performance".
## Preamble (run first)
```bash
@ -63,7 +58,7 @@ _QUESTION_TUNING=$(~/.claude/skills/gstack/bin/gstack-config get question_tuning
echo "QUESTION_TUNING: $_QUESTION_TUNING"
mkdir -p ~/.gstack/analytics
if [ "$_TEL" != "off" ]; then
echo '{"skill":"benchmark","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(_repo=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null | tr -cd 'a-zA-Z0-9._-'); echo "${_repo:-unknown}")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
echo '{"skill":"benchmark","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
fi
for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do
if [ -f "$_PF" ]; then
@ -105,19 +100,6 @@ _CHECKPOINT_MODE=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_mode
_CHECKPOINT_PUSH=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_push 2>/dev/null || echo "false")
echo "CHECKPOINT_MODE: $_CHECKPOINT_MODE"
echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH"
# Plan-mode hint for skills like /spec that branch behavior on plan-mode state.
# Claude Code exposes plan mode via system reminders; we detect best-effort
# from CLAUDE_PLAN_FILE (set by the harness when plan mode is active) and
# fall back to "inactive". Codex hosts and Claude execution mode both end up
# inactive, which is the safe default (defaults to file+execute pipeline).
if [ -n "${CLAUDE_PLAN_FILE:-}${GSTACK_PLAN_MODE_FORCE:-}" ]; then
export GSTACK_PLAN_MODE="active"
elif [ "${GSTACK_PLAN_MODE:-}" = "active" ]; then
export GSTACK_PLAN_MODE="active"
else
export GSTACK_PLAN_MODE="inactive"
fi
echo "GSTACK_PLAN_MODE: $GSTACK_PLAN_MODE"
[ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true
```
@ -173,7 +155,7 @@ Only run `open` if yes. Always run `touch`.
If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: ask telemetry once via AskUserQuestion:
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code or file paths. Your repo name is recorded locally only and stripped before any upload.
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code, file paths, or repo names.
Options:
- A) Help gstack get better! (recommended)
@ -249,7 +231,6 @@ Key routing rules:
- Ship/deploy/PR → invoke /ship or /land-and-deploy
- Save progress → invoke /context-save
- Resume context → invoke /context-restore
- Author a backlog-ready spec/issue → invoke /spec
```
Then commit the change: `git add CLAUDE.md && git commit -m "chore: add gstack skill routing rules to CLAUDE.md"`

View File

@ -56,23 +56,8 @@ if [ ! -e "$AGENTS_LINK" ]; then
ln -s "$REPO_ROOT" "$AGENTS_LINK"
fi
# 6. Run setup via the symlink so it detects .claude/skills/ as its parent.
#
# Workspace/dev setup MUST be non-interactive: Conductor runs this under a
# forwarded pty, so any `read` in setup (skill-prefix prompt, plan-tune hook
# consent) would hang the workspace forever. Detaching stdin makes every setup
# prompt take its smart non-interactive default (flat skill names, etc.).
#
# `--plan-tune-hooks=prompt` is load-bearing, not redundant: stdin alone only
# suppresses the *prompt* branch. A saved `plan_tune_hooks: yes` or an exported
# GSTACK_PLAN_TUNE_HOOKS=yes would still resolve to "install" and rewrite the
# user's global ~/.claude/settings.json to point at THIS ephemeral worktree —
# which breaks once the workspace is deleted. The flag has highest precedence,
# so it pins resolution to "prompt", and closed stdin then makes prompt-mode a
# no-op skip (no install, no decline marker). A dev workspace must never mutate
# global settings.json. To install the hooks, run `./setup --plan-tune-hooks`
# directly (outside dev-setup). Saved prefix/other config preferences still apply.
"$GSTACK_LINK/setup" --plan-tune-hooks=prompt </dev/null
# 6. Run setup via the symlink so it detects .claude/skills/ as its parent
"$GSTACK_LINK/setup"
echo ""
echo "Dev mode active. Skills resolve from this working tree."

View File

@ -49,19 +49,6 @@ strip_git() {
echo "${1%.git}"
}
valid_owner_repo() {
local owner_repo="$1"
case "$owner_repo" in
""|/*|*/|*//*)
return 1
;;
esac
case "$owner_repo" in
*/*) return 0 ;;
*) return 1 ;;
esac
}
# Parse to (host, owner_repo) regardless of input shape.
parse_url() {
local u="$1"
@ -95,7 +82,7 @@ parse_url() {
exit 3
;;
esac
if [ -z "$host" ] || ! valid_owner_repo "$owner_repo"; then
if [ -z "$host" ] || [ -z "$owner_repo" ] || [ "$owner_repo" = "$u" ]; then
echo "gstack-artifacts-url: failed to parse host/owner from: $u" >&2
exit 3
fi

View File

@ -1,949 +0,0 @@
#!/usr/bin/env bun
/**
* gstack-brain-cache — three-tier cache for brain-aware planning skills.
*
* Subcommands:
* get <entity-name> [--project <slug>] — return digest content; refresh if stale
* refresh [--full] [--entity X] [--project <slug>] — force refresh one or all
* invalidate <entity-name> [--project <slug>] — mark stale; next get triggers cold
* digest <entity-slug> — compress a brain page slug to digest
* meta [--project <slug>] — print _meta.json
*
* (Later commits add: bootstrap [T2b], list [T18], purge [T18], retention sweep [T18].)
*
* Cache layout:
* ~/.gstack/brain-cache/ ← cross-project (user-profile only)
* ~/.gstack/projects/<slug>/brain-cache/ ← per-project (everything else)
*
* Atomic writes via .tmp + rename. Stale-but-usable fallback when brain
* unreachable. Concurrent-refresh dedup is a follow-up commit (T15).
*/
import { existsSync, mkdirSync, readFileSync, writeFileSync, renameSync, statSync, unlinkSync, readdirSync, openSync, closeSync } from 'fs';
import { join, dirname } from 'path';
import { homedir, hostname } from 'os';
import { spawnSync } from 'child_process';
import { execGbrainJson, spawnGbrain } from '../lib/gbrain-exec';
import {
BRAIN_CACHE_ENTITIES,
CACHE_REFRESH_LOCK_TIMEOUT_MS,
GSTACK_SCHEMA_PACK_NAME,
GSTACK_SCHEMA_PACK_VERSION,
SALIENCE_DEFAULT_ALLOWLIST,
type BrainCacheEntity,
} from '../scripts/brain-cache-spec';
// ──────────────────────────────────────────────────────────────────────────
// Paths + meta
// ──────────────────────────────────────────────────────────────────────────
const GSTACK_HOME = process.env.GSTACK_HOME || join(homedir(), '.gstack');
interface CacheMeta {
/** Version of the schema pack the cache was built against. Mismatch → full rebuild. */
schema_version: string;
/** SHA8 hash of the brain MCP endpoint URL (or 'local' for on-disk engines). */
endpoint_hash: string;
/** Per-entity last-refresh epoch ms. Absent → never refreshed. */
last_refresh: Record<string, number>;
/** Per-entity last-attempt epoch ms (even if attempt failed). For stale-but-usable diagnostics. */
last_attempt?: Record<string, number>;
}
/** Returns the directory holding a given entity's cache file. */
export function entityDir(entity: BrainCacheEntity, projectSlug: string | null): string {
if (entity.scope === 'cross-project') {
return join(GSTACK_HOME, 'brain-cache');
}
if (!projectSlug) {
throw new Error(`Per-project entity needs a project slug: ${entity.file}`);
}
return join(GSTACK_HOME, 'projects', projectSlug, 'brain-cache');
}
/** Returns the path to the cache file for a given entity. */
export function entityPath(entityName: string, projectSlug: string | null): string {
const entity = BRAIN_CACHE_ENTITIES[entityName];
if (!entity) throw new Error(`Unknown brain cache entity: ${entityName}`);
return join(entityDir(entity, projectSlug), entity.file);
}
/** Returns the path to the _meta.json for a given scope. */
export function metaPath(scope: 'cross-project' | 'per-project', projectSlug: string | null): string {
if (scope === 'cross-project') {
return join(GSTACK_HOME, 'brain-cache', '_meta.json');
}
if (!projectSlug) throw new Error('Per-project meta needs a project slug');
return join(GSTACK_HOME, 'projects', projectSlug, 'brain-cache', '_meta.json');
}
function loadMeta(scope: 'cross-project' | 'per-project', projectSlug: string | null): CacheMeta {
const path = metaPath(scope, projectSlug);
if (!existsSync(path)) {
return { schema_version: GSTACK_SCHEMA_PACK_VERSION, endpoint_hash: detectEndpointHash(), last_refresh: {}, last_attempt: {} };
}
try {
return JSON.parse(readFileSync(path, 'utf-8')) as CacheMeta;
} catch {
// Corrupt _meta — start fresh (entries will refresh on next access).
return { schema_version: GSTACK_SCHEMA_PACK_VERSION, endpoint_hash: detectEndpointHash(), last_refresh: {}, last_attempt: {} };
}
}
function saveMeta(scope: 'cross-project' | 'per-project', projectSlug: string | null, meta: CacheMeta): void {
const path = metaPath(scope, projectSlug);
mkdirSync(dirname(path), { recursive: true });
atomicWrite(path, JSON.stringify(meta, null, 2));
}
// ──────────────────────────────────────────────────────────────────────────
// Endpoint hash detection
// ──────────────────────────────────────────────────────────────────────────
import { createHash } from 'crypto';
function sha8(input: string): string {
return createHash('sha256').update(input).digest('hex').slice(0, 8);
}
/**
* Detects the active brain endpoint (MCP URL or 'local') and returns its
* stable identity hash. Used to detect when the user switches brains
* (different endpoint → different cache).
*/
export function detectEndpointHash(): string {
const claudeJsonPath = join(homedir(), '.claude.json');
if (existsSync(claudeJsonPath)) {
try {
const cfg = JSON.parse(readFileSync(claudeJsonPath, 'utf-8'));
const gbrainServer = cfg?.mcpServers?.gbrain;
const url = gbrainServer?.url || gbrainServer?.transport?.url;
if (typeof url === 'string' && url.length > 0) {
return sha8(url);
}
} catch { /* fall through to local */ }
}
// Local engine — no endpoint URL; use a stable literal hash.
return 'local';
}
// ──────────────────────────────────────────────────────────────────────────
// Atomic write (tmp + rename)
// ──────────────────────────────────────────────────────────────────────────
function atomicWrite(path: string, content: string): void {
mkdirSync(dirname(path), { recursive: true });
const tmp = `${path}.tmp.${process.pid}.${Date.now()}`;
writeFileSync(tmp, content, 'utf-8');
renameSync(tmp, path);
}
// ──────────────────────────────────────────────────────────────────────────
// Staleness + refresh logic
// ──────────────────────────────────────────────────────────────────────────
/** Returns true if the cached digest is past its TTL. */
function isStale(entityName: string, meta: CacheMeta): boolean {
const entity = BRAIN_CACHE_ENTITIES[entityName];
if (!entity) return true;
const last = meta.last_refresh[entityName];
if (!last) return true;
return Date.now() - last > entity.ttl_ms;
}
/** Returns true if the cache file exists on disk. */
function hasFile(entityName: string, projectSlug: string | null): boolean {
return existsSync(entityPath(entityName, projectSlug));
}
/** Returns true if schema version recorded in meta differs from current pack version. */
function schemaVersionMismatch(meta: CacheMeta): boolean {
return meta.schema_version !== GSTACK_SCHEMA_PACK_VERSION;
}
/** Returns true if endpoint hash recorded in meta differs from current detected endpoint. */
function endpointSwitched(meta: CacheMeta): boolean {
return meta.endpoint_hash !== detectEndpointHash();
}
// ──────────────────────────────────────────────────────────────────────────
// Subcommand: get
// ──────────────────────────────────────────────────────────────────────────
interface GetResult {
/** Path to the digest file. */
path: string;
/** Cache state: 'warm' (fresh + valid), 'cold-refreshed' (was stale, refreshed inline), 'stale-fallback' (used stale because refresh failed), 'missing' (no cache and no refresh). */
state: 'warm' | 'cold-refreshed' | 'stale-fallback' | 'missing';
/** Optional message for diagnostics. */
message?: string;
}
export function cmdGet(entityName: string, projectSlug: string | null): GetResult {
const entity = BRAIN_CACHE_ENTITIES[entityName];
if (!entity) throw new Error(`Unknown entity: ${entityName}`);
const scope = entity.scope;
const meta = loadMeta(scope, projectSlug);
// Schema-version mismatch → full rebuild (D4 A4).
if (schemaVersionMismatch(meta) || endpointSwitched(meta)) {
rebuildAllForScope(scope, projectSlug);
// After rebuild, meta is fresh; fall through to warm path.
const newMeta = loadMeta(scope, projectSlug);
if (hasFile(entityName, projectSlug) && !isStale(entityName, newMeta)) {
return { path: entityPath(entityName, projectSlug), state: 'warm' };
}
// Rebuild may have failed for this entity specifically.
return { path: entityPath(entityName, projectSlug), state: 'missing', message: 'rebuild after schema/endpoint change' };
}
if (hasFile(entityName, projectSlug) && !isStale(entityName, meta)) {
return { path: entityPath(entityName, projectSlug), state: 'warm' };
}
// Stale or missing — try cold refresh.
const refreshed = refreshEntity(entityName, projectSlug);
if (refreshed) {
return { path: entityPath(entityName, projectSlug), state: 'cold-refreshed' };
}
// Refresh failed. Use stale-but-usable if file exists.
if (hasFile(entityName, projectSlug)) {
return { path: entityPath(entityName, projectSlug), state: 'stale-fallback', message: 'brain unreachable; using stale cache' };
}
// No cache and no refresh = missing.
return { path: entityPath(entityName, projectSlug), state: 'missing', message: 'brain unreachable; no cache available' };
}
// ──────────────────────────────────────────────────────────────────────────
// Subcommand: refresh
// ──────────────────────────────────────────────────────────────────────────
// ──────────────────────────────────────────────────────────────────────────
// Lockfile dedup (T15 / D3)
// ──────────────────────────────────────────────────────────────────────────
/**
* Returns the lock file path for a project scope. Cross-project entities
* still lock per-project (the project triggering the refresh holds the lock);
* concurrent attempts from different projects on cross-project entities
* serialize naturally because they're rare and the lock window is short.
*/
function lockPath(projectSlug: string | null): string {
const dir = projectSlug
? join(GSTACK_HOME, 'projects', projectSlug, 'brain-cache')
: join(GSTACK_HOME, 'brain-cache');
return join(dir, '.refresh.lock');
}
interface LockHandle {
fd: number;
path: string;
}
/**
* Try to acquire the refresh lock. Returns null when another process holds it
* (and the lock is fresh). Stale locks (process dead OR older than the
* timeout) are taken over.
*/
function tryAcquireLock(projectSlug: string | null): LockHandle | null {
const path = lockPath(projectSlug);
mkdirSync(dirname(path), { recursive: true });
// If a lock exists, see if it's stale
if (existsSync(path)) {
try {
const raw = readFileSync(path, 'utf-8');
const lock = JSON.parse(raw) as { pid: number; host: string; ts: number };
const age = Date.now() - lock.ts;
const sameHost = lock.host === hostname();
const processGone = sameHost && lock.pid > 0 && !isPidAlive(lock.pid);
if (age <= CACHE_REFRESH_LOCK_TIMEOUT_MS && !processGone) {
return null; // someone else holds a fresh lock
}
// Stale: take over
} catch {
// Corrupt lock file → take over
}
}
// Write our lock (best-effort O_EXCL via tmp+rename for atomic creation)
const payload = JSON.stringify({ pid: process.pid, host: hostname(), ts: Date.now() });
const tmp = `${path}.tmp.${process.pid}.${Date.now()}`;
try {
writeFileSync(tmp, payload);
renameSync(tmp, path);
} catch (err) {
return null;
}
// Race: another process may have raced us. Re-read and verify ownership.
try {
const raw = readFileSync(path, 'utf-8');
const lock = JSON.parse(raw) as { pid: number; host: string };
if (lock.pid !== process.pid || lock.host !== hostname()) {
return null;
}
} catch {
return null;
}
return { fd: -1, path };
}
function releaseLock(handle: LockHandle): void {
try { unlinkSync(handle.path); } catch { /* best effort */ }
}
function isPidAlive(pid: number): boolean {
try {
process.kill(pid, 0);
return true;
} catch (err: any) {
if (err?.code === 'EPERM') return true; // exists but we don't own it
return false;
}
}
/**
* Run a refresh callback under the project-scoped lock. If another refresh is
* already in flight, returns 'dedup' and the caller can either wait + retry
* (the resolver does this) or fall through to stale-but-usable. Stale locks
* (process dead, or older than CACHE_REFRESH_LOCK_TIMEOUT_MS) are taken over.
*/
export function withRefreshLock<T>(projectSlug: string | null, fn: () => T): T | 'dedup' {
const handle = tryAcquireLock(projectSlug);
if (!handle) return 'dedup';
try {
return fn();
} finally {
releaseLock(handle);
}
}
/** Refreshes one entity from the brain. Returns true on success. */
export function refreshEntity(entityName: string, projectSlug: string | null): boolean {
const entity = BRAIN_CACHE_ENTITIES[entityName];
if (!entity) return false;
// Mark attempt
const meta = loadMeta(entity.scope, projectSlug);
meta.last_attempt = meta.last_attempt || {};
meta.last_attempt[entityName] = Date.now();
// Fetch from brain. The actual fetch logic varies per entity — derived digests
// (recent-decisions, salience) need different queries from direct page reads.
// For T2a we implement the direct-page path; derived digests get filled in by
// the resolver / write-back paths in later commits.
const digestContent = fetchAndCompressEntity(entityName, projectSlug);
if (digestContent === null) {
saveMeta(entity.scope, projectSlug, meta);
return false;
}
// Enforce per-entity budget by truncating from end (oldest items live there
// by convention in our compressor). The per-skill budget is separately
// enforced at preflight injection time.
let final = digestContent;
if (Buffer.byteLength(final, 'utf-8') > entity.budget_bytes) {
final = truncateToBudget(final, entity.budget_bytes);
}
atomicWrite(entityPath(entityName, projectSlug), final);
meta.last_refresh[entityName] = Date.now();
// Keep schema/endpoint identity fresh.
meta.schema_version = GSTACK_SCHEMA_PACK_VERSION;
meta.endpoint_hash = detectEndpointHash();
saveMeta(entity.scope, projectSlug, meta);
return true;
}
/**
* Refresh all entities for a scope (per-project or cross-project).
* Used by --full and by schema/endpoint-change rebuilds.
*/
export function refreshAll(projectSlug: string | null): { success: number; failed: number } {
let success = 0;
let failed = 0;
for (const [name, entity] of Object.entries(BRAIN_CACHE_ENTITIES)) {
// Cross-project entities only refresh when explicitly targeted via no-slug calls
if (entity.scope === 'cross-project' && projectSlug) continue;
if (entity.scope === 'per-project' && !projectSlug) continue;
if (refreshEntity(name, projectSlug)) success++; else failed++;
}
return { success, failed };
}
/** Rebuild on schema-version mismatch or endpoint switch. Wipes affected scope first. */
function rebuildAllForScope(scope: 'cross-project' | 'per-project', projectSlug: string | null): void {
// Wipe files but preserve dir; meta gets fully rewritten by refreshes below.
for (const [name, entity] of Object.entries(BRAIN_CACHE_ENTITIES)) {
if (entity.scope !== scope) continue;
const p = entityPath(name, projectSlug);
if (existsSync(p)) {
try { unlinkSync(p); } catch { /* best effort */ }
}
}
// Fresh meta starts here
const fresh: CacheMeta = {
schema_version: GSTACK_SCHEMA_PACK_VERSION,
endpoint_hash: detectEndpointHash(),
last_refresh: {},
last_attempt: {},
};
saveMeta(scope, projectSlug, fresh);
// Refresh all entities in this scope
for (const [name, entity] of Object.entries(BRAIN_CACHE_ENTITIES)) {
if (entity.scope !== scope) continue;
refreshEntity(name, projectSlug);
}
}
// ──────────────────────────────────────────────────────────────────────────
// Subcommand: invalidate
// ──────────────────────────────────────────────────────────────────────────
export function cmdInvalidate(entityName: string, projectSlug: string | null): void {
const entity = BRAIN_CACHE_ENTITIES[entityName];
if (!entity) throw new Error(`Unknown entity: ${entityName}`);
const meta = loadMeta(entity.scope, projectSlug);
delete meta.last_refresh[entityName];
saveMeta(entity.scope, projectSlug, meta);
}
// ──────────────────────────────────────────────────────────────────────────
// Fetch + compress per-entity
// ──────────────────────────────────────────────────────────────────────────
/**
* Returns the digest markdown content for an entity, or null if the brain is
* unreachable / the source page doesn't exist.
*
* For T2a we implement the entity → page-slug mapping for the simple cases.
* Derived digests (recent-decisions, salience) get specialized paths.
*/
function fetchAndCompressEntity(entityName: string, projectSlug: string | null): string | null {
switch (entityName) {
case 'user-profile':
return fetchUserProfile();
case 'product':
return fetchProduct(projectSlug);
case 'goals':
return fetchGoals(projectSlug);
case 'developer-persona':
return fetchSimplePage(`gstack/developer-persona/${projectSlug}`);
case 'brand':
return fetchSimplePage(`gstack/brand/${projectSlug}`);
case 'competitive-intel':
return fetchSimplePage(`gstack/competitive-intel/${projectSlug}`);
case 'recent-decisions':
return fetchRecentDecisions(projectSlug);
case 'salience':
// D9 salience allowlist applied in T17 commit; T2a returns raw output for now.
return fetchSalience(projectSlug);
default:
return null;
}
}
/** Generic single-page fetch via `gbrain get`. Returns null on miss/unreachable. */
function fetchSimplePage(slug: string): string | null {
const result = spawnGbrain(['get', slug, '--json'], { timeout: 10_000 });
if (result.status !== 0) return null;
try {
const page = JSON.parse(result.stdout) as { body?: string; title?: string };
if (!page?.body) return null;
return compressPage(slug, page.title || slug, page.body);
} catch {
return null;
}
}
function fetchUserProfile(): string | null {
// The user-slug discovery is implemented in T16 (D4 A3). For T2a we accept
// env GSTACK_USER_SLUG as override, fallback to $USER for direct calls.
const slug = process.env.GSTACK_USER_SLUG || process.env.USER || 'unknown';
return fetchSimplePage(`gstack/user-profile/${slug}`);
}
function fetchProduct(projectSlug: string | null): string | null {
if (!projectSlug) return null;
return fetchSimplePage(`gstack/product/${projectSlug}`);
}
/**
* Goals are LIST queries: all gstack/goal/<project>/* pages.
* Compress the top N by recency.
*/
function fetchGoals(projectSlug: string | null): string | null {
if (!projectSlug) return null;
const result = execGbrainJson<{ pages?: Array<{ slug: string; title?: string; body?: string }> }>([
'list-pages',
'--type', 'gstack/goal',
'--limit', '10',
'--json',
]);
if (!result?.pages) return null;
const goals = result.pages.filter((p) => p.slug?.startsWith(`gstack/goal/${projectSlug}/`));
if (goals.length === 0) {
// Empty digest is valid (just header + 'no active goals' line)
return `# Active goals (project: ${projectSlug})\n\n_No active goals recorded yet._\n`;
}
const lines = goals.map((g) => `- [[${g.slug}]] — ${g.title || '(untitled)'}`);
return `# Active goals (project: ${projectSlug})\n\n${lines.join('\n')}\n`;
}
/**
* recent-decisions: last 5 gstack/skill-run pages for this project, compressed
* to one-line summaries.
*/
function fetchRecentDecisions(projectSlug: string | null): string | null {
if (!projectSlug) return null;
const result = execGbrainJson<{ pages?: Array<{ slug: string; title?: string }> }>([
'list-pages',
'--type', 'gstack/skill-run',
'--limit', '5',
'--sort', 'updated_desc',
'--json',
]);
if (!result?.pages) {
return `# Recent decisions (project: ${projectSlug})\n\n_No prior skill runs recorded._\n`;
}
const lines = result.pages.map((p) => `- ${p.title || p.slug}`);
return `# Recent decisions (project: ${projectSlug})\n\n${lines.join('\n')}\n`;
}
/**
* Reads the user's salience allowlist override from gstack-config. If unset,
* returns SALIENCE_DEFAULT_ALLOWLIST. The override is comma-separated; we
* trim and drop empty entries.
*/
export function getSalienceAllowlist(): ReadonlyArray<string> {
// Short-circuit via env var for tests + headless callers.
const env = process.env.GSTACK_SALIENCE_ALLOWLIST;
if (typeof env === 'string' && env.length > 0) {
return env.split(',').map((s) => s.trim()).filter(Boolean);
}
// Shell out to gstack-config with a tight timeout. Falls back to defaults
// on any failure (config script missing, command non-zero, parse error).
try {
const skillRoot = join(homedir(), '.claude', 'skills', 'gstack');
const bin = join(skillRoot, 'bin', 'gstack-config');
if (!existsSync(bin)) return SALIENCE_DEFAULT_ALLOWLIST;
const result = spawnSync(bin, ['get', 'salience_allowlist'], { timeout: 2000, encoding: 'utf-8' });
if (result.status !== 0 || !result.stdout) return SALIENCE_DEFAULT_ALLOWLIST;
const trimmed = result.stdout.trim();
if (!trimmed) return SALIENCE_DEFAULT_ALLOWLIST;
const parts = trimmed.split(',').map((s) => s.trim()).filter(Boolean);
return parts.length > 0 ? parts : SALIENCE_DEFAULT_ALLOWLIST;
} catch {
return SALIENCE_DEFAULT_ALLOWLIST;
}
}
/**
* D9 salience privacy gate: returns true if the slug starts with any allowlisted
* prefix. Anything NOT matching is stripped at digest write time so that family,
* therapy, reflection, and other sensitive content never leaks into work-flow
* planning prompts by default.
*/
export function isSalienceSlugAllowed(slug: string, allowlist: ReadonlyArray<string>): boolean {
for (const prefix of allowlist) {
if (slug.startsWith(prefix)) return true;
}
return false;
}
function fetchSalience(projectSlug: string | null): string | null {
// get-recent-salience is a gbrain CLI sub-shape; we use the MCP-shape JSON
const result = execGbrainJson<{ pages?: Array<{ slug: string; title?: string; emotional_weight?: number }> }>([
'get-recent-salience',
'--days', '14',
'--limit', '10',
'--json',
]);
if (!result?.pages) return `# Recent salience\n\n_No salient pages in last 14d._\n`;
// D9 privacy gate: strip entries outside the allowlist BEFORE rendering.
// Sensitive personal content (family, therapy, reflection) is never written
// into the digest cache file, even when the brain itself ranks it salient.
const allowlist = getSalienceAllowlist();
const filtered = result.pages.filter((p) => p.slug && isSalienceSlugAllowed(p.slug, allowlist));
const stripped = result.pages.length - filtered.length;
if (filtered.length === 0) {
const header = `# Recent salience (last 14d)`;
const note = stripped > 0
? `\n_All ${stripped} salient entries stripped by allowlist gate (no work-flow content in window)._\n`
: `\n_No salient pages in last 14d._\n`;
return `${header}\n${note}`;
}
const lines = filtered.map((p) => `- [[${p.slug}]] — ${p.title || ''} (weight: ${p.emotional_weight?.toFixed(2) ?? 'n/a'})`);
const footer = stripped > 0
? `\n\n_${stripped} private entries stripped by allowlist gate._`
: '';
return `# Recent salience (last 14d)\n\n${lines.join('\n')}${footer}\n`;
}
/**
* Compress a brain page body into a digest. The compressor keeps frontmatter
* out, trims body to the first H2/H3 sections, and prepends a slug header.
* Per-entity budget enforcement happens at the caller (refreshEntity).
*/
function compressPage(slug: string, title: string, body: string): string {
const trimmed = body
.replace(/^---[\s\S]*?---\s*\n/m, '') // strip frontmatter
.trim();
return `# ${title}\nslug: ${slug}\n\n${trimmed}\n`;
}
/**
* Truncate a digest to a byte budget. Tries to cut at the last newline before
* the budget so the digest stays readable.
*/
function truncateToBudget(content: string, budgetBytes: number): string {
const buf = Buffer.from(content, 'utf-8');
if (buf.byteLength <= budgetBytes) return content;
const truncated = buf.slice(0, budgetBytes).toString('utf-8');
const lastNewline = truncated.lastIndexOf('\n');
const cleanCut = lastNewline > budgetBytes * 0.8 ? truncated.slice(0, lastNewline) : truncated;
return `${cleanCut}\n\n_(digest truncated to ${budgetBytes}-byte budget)_\n`;
}
// ──────────────────────────────────────────────────────────────────────────
// Subcommand: digest
// ──────────────────────────────────────────────────────────────────────────
/**
* Public: compress a brain page slug to digest format. Used by callers that
* want to know what the digest WOULD look like without writing to cache.
*/
export function cmdDigest(slug: string): string | null {
return fetchSimplePage(slug);
}
// ──────────────────────────────────────────────────────────────────────────
// Subcommand: meta
// ──────────────────────────────────────────────────────────────────────────
export function cmdMeta(projectSlug: string | null): CacheMeta {
if (projectSlug) return loadMeta('per-project', projectSlug);
return loadMeta('cross-project', null);
}
// ──────────────────────────────────────────────────────────────────────────
// Subcommand: bootstrap (T2b)
// ──────────────────────────────────────────────────────────────────────────
/**
* Bootstrap synthesizes draft entity content from CLAUDE.md + README +
* recent commits + learnings.jsonl for a fresh project. Emits as JSON for
* the caller (skill template) to AUQ-confirm before any write to the brain.
*
* This keeps the CLI pure (no AUQ logic) while preventing silent
* auto-extraction garbage (D10 T4 fix). The agent is responsible for the
* "Synthesized X — looks right?" prompt per entity.
*/
export interface BootstrapDraft {
product?: { slug: string; title: string; body: string };
goals?: Array<{ slug: string; title: string; body: string }>;
developer_persona?: { slug: string; title: string; body: string };
brand?: { slug: string; title: string; body: string };
competitive_intel?: { slug: string; title: string; body: string };
}
export function cmdBootstrap(projectSlug: string): BootstrapDraft {
const draft: BootstrapDraft = {};
const repoRoot = process.env.GSTACK_REPO_ROOT || process.cwd();
// Product synthesis: CLAUDE.md headline + README first paragraph
let claudeMd = '';
try { claudeMd = readFileSync(join(repoRoot, 'CLAUDE.md'), 'utf-8'); } catch { /* missing is fine */ }
let readmeMd = '';
try { readmeMd = readFileSync(join(repoRoot, 'README.md'), 'utf-8'); } catch { /* missing is fine */ }
const productLead = synthesizeProductLead(claudeMd, readmeMd, projectSlug);
if (productLead) {
draft.product = {
slug: `gstack/product/${projectSlug}`,
title: projectSlug,
body: productLead,
};
}
// Goals: try learnings.jsonl + recent commit messages mentioning "goal" or "ship"
const learningsPath = join(GSTACK_HOME, 'projects', projectSlug, 'learnings.jsonl');
const goalsHints = synthesizeGoalsHints(learningsPath, repoRoot);
if (goalsHints.length > 0) {
draft.goals = goalsHints.slice(0, 3).map((hint, idx) => ({
slug: `gstack/goal/${projectSlug}/bootstrap-${idx + 1}`,
title: hint.title,
body: hint.body,
}));
}
return draft;
}
function synthesizeProductLead(claudeMd: string, readmeMd: string, slug: string): string | null {
// First H1 in CLAUDE.md or README, plus first paragraph after it.
const source = claudeMd || readmeMd;
if (!source) return null;
const h1Match = source.match(/^#\s+(.+)$/m);
const heading = h1Match?.[1]?.trim() || slug;
// First non-heading paragraph
const paraMatch = source.match(/(?:^|\n)([^#\n][^\n]+(?:\n[^#\n][^\n]+)*)/);
const lead = paraMatch?.[1]?.trim() || '(no description found in CLAUDE.md or README)';
return [
`# ${heading}`,
'',
'## What',
lead.slice(0, 500),
'',
'## Stage',
'(fill in current stage, e.g., v1.x shipped, in development, paused)',
'',
'## Team',
'(fill in team composition + size)',
'',
'## Active goals',
'(populated by /office-hours over time)',
'',
'## Recent decisions',
'(populated by /plan-ceo-review over time)',
'',
].join('\n');
}
function synthesizeGoalsHints(learningsPath: string, repoRoot: string): Array<{ title: string; body: string }> {
const hints: Array<{ title: string; body: string }> = [];
if (existsSync(learningsPath)) {
try {
const lines = readFileSync(learningsPath, 'utf-8').split('\n').filter(Boolean);
for (const line of lines.slice(-10)) {
try {
const entry = JSON.parse(line);
if (entry?.insight && (entry?.type === 'pattern' || entry?.type === 'architecture')) {
hints.push({
title: entry.insight.slice(0, 80),
body: `Source: learnings.jsonl\nType: ${entry.type}\n\n${entry.insight}\n`,
});
}
} catch { /* skip malformed line */ }
}
} catch { /* unreadable file, skip */ }
}
return hints;
}
// ──────────────────────────────────────────────────────────────────────────
// Subcommand: list (T18)
// ──────────────────────────────────────────────────────────────────────────
/**
* Lists all gstack-owned pages currently in the brain for a project, grouped
* by type. Powers the user's ability to audit what gstack has written.
*/
export function cmdList(projectSlug: string | null): Array<{ type: string; slug: string; title?: string }> {
// We probe each gstack/<type>/ namespace via list-pages with a type filter.
const types = ['gstack/user-profile', 'gstack/product', 'gstack/goal', 'gstack/developer-persona', 'gstack/brand', 'gstack/competitive-intel', 'gstack/skill-run', 'gstack/take'];
const all: Array<{ type: string; slug: string; title?: string }> = [];
for (const type of types) {
const result = execGbrainJson<{ pages?: Array<{ slug: string; title?: string }> }>([
'list-pages',
'--type', type,
'--limit', '200',
'--json',
]);
if (!result?.pages) continue;
for (const page of result.pages) {
if (projectSlug && !page.slug?.includes(`/${projectSlug}`) && type !== 'gstack/user-profile') {
continue;
}
all.push({ type, slug: page.slug, title: page.title });
}
}
return all;
}
// ──────────────────────────────────────────────────────────────────────────
// Subcommand: purge (T18)
// ──────────────────────────────────────────────────────────────────────────
/**
* Delete one gstack-owned page from the brain. Caller (skill template) is
* responsible for the confirm prompt; this is the raw operation.
*/
export function cmdPurge(slug: string): { deleted: boolean; error?: string } {
if (!slug.startsWith('gstack/')) {
return { deleted: false, error: 'refusing to purge non-gstack page' };
}
const result = spawnGbrain(['delete-page', slug], { timeout: 10_000 });
if (result.status !== 0) {
return { deleted: false, error: result.stderr?.trim() || `exit ${result.status}` };
}
// Also invalidate any cached digests that referenced this page.
// Best-effort — derived digests may need explicit invalidate.
return { deleted: true };
}
// ──────────────────────────────────────────────────────────────────────────
// CLI dispatch
// ──────────────────────────────────────────────────────────────────────────
function parseArgs(argv: string[]): { cmd: string; positional: string[]; flags: Record<string, string | boolean> } {
const cmd = argv[2] || '';
const rest = argv.slice(3);
const positional: string[] = [];
const flags: Record<string, string | boolean> = {};
for (let i = 0; i < rest.length; i++) {
const arg = rest[i];
if (arg.startsWith('--')) {
const key = arg.slice(2);
const next = rest[i + 1];
if (next && !next.startsWith('--')) {
flags[key] = next;
i++;
} else {
flags[key] = true;
}
} else {
positional.push(arg);
}
}
return { cmd, positional, flags };
}
function projectSlugFromFlag(flags: Record<string, string | boolean>): string | null {
const v = flags.project;
return typeof v === 'string' ? v : null;
}
function printUsage(): void {
process.stderr.write(`Usage: gstack-brain-cache <subcommand>
Subcommands:
get <entity-name> [--project <slug>]
refresh [--full] [--entity X] [--project <slug>]
invalidate <entity-name> [--project <slug>]
digest <entity-slug>
meta [--project <slug>]
bootstrap --project <slug> — emit synthesized entity drafts (JSON)
list [--project <slug>] — list gstack-owned pages in brain
purge <slug> — delete a gstack-owned brain page (refuses non-gstack/ slugs)
`);
}
async function main(): Promise<number> {
const { cmd, positional, flags } = parseArgs(process.argv);
const projectSlug = projectSlugFromFlag(flags);
try {
switch (cmd) {
case 'get': {
const entityName = positional[0];
if (!entityName) { printUsage(); return 1; }
const result = cmdGet(entityName, projectSlug);
if (result.state === 'missing') {
process.stderr.write(`(${result.state}: ${result.message ?? 'no cache'})\n`);
return 2;
}
if (result.state !== 'warm') {
process.stderr.write(`(${result.state}${result.message ? ': ' + result.message : ''})\n`);
}
process.stdout.write(readFileSync(result.path, 'utf-8'));
return 0;
}
case 'refresh': {
// D3: dedup concurrent refreshes via lockfile. Skipped (dedup) when
// another process is already mid-refresh on the same project.
if (flags.entity) {
const entityName = String(flags.entity);
const result = withRefreshLock(projectSlug, () => refreshEntity(entityName, projectSlug));
if (result === 'dedup') {
process.stderr.write(`(dedup: another refresh in flight)\n`);
return 3;
}
process.stdout.write(result ? `refreshed ${entityName}\n` : `failed to refresh ${entityName}\n`);
return result ? 0 : 1;
}
const allResult = withRefreshLock(projectSlug, () => refreshAll(projectSlug));
if (allResult === 'dedup') {
process.stderr.write(`(dedup: another refresh in flight)\n`);
return 3;
}
process.stdout.write(`refreshed=${allResult.success} failed=${allResult.failed}\n`);
return allResult.failed > 0 ? 1 : 0;
}
case 'invalidate': {
const entityName = positional[0];
if (!entityName) { printUsage(); return 1; }
cmdInvalidate(entityName, projectSlug);
process.stdout.write(`invalidated ${entityName}\n`);
return 0;
}
case 'digest': {
const slug = positional[0];
if (!slug) { printUsage(); return 1; }
const content = cmdDigest(slug);
if (content === null) {
process.stderr.write('brain unreachable or page not found\n');
return 2;
}
process.stdout.write(content);
return 0;
}
case 'meta': {
const meta = cmdMeta(projectSlug);
process.stdout.write(JSON.stringify(meta, null, 2) + '\n');
return 0;
}
case 'bootstrap': {
if (!projectSlug) {
process.stderr.write('bootstrap requires --project <slug>\n');
return 1;
}
const draft = cmdBootstrap(projectSlug);
process.stdout.write(JSON.stringify(draft, null, 2) + '\n');
return 0;
}
case 'list': {
const pages = cmdList(projectSlug);
if (flags.json) {
process.stdout.write(JSON.stringify(pages, null, 2) + '\n');
} else {
for (const p of pages) {
process.stdout.write(`${p.type}\t${p.slug}\t${p.title ?? ''}\n`);
}
}
return 0;
}
case 'purge': {
const slug = positional[0];
if (!slug) { printUsage(); return 1; }
const result = cmdPurge(slug);
if (result.deleted) {
process.stdout.write(`deleted ${slug}\n`);
return 0;
}
process.stderr.write(`failed: ${result.error}\n`);
return 1;
}
case '':
case 'help':
case '--help':
case '-h':
printUsage();
return 0;
default:
process.stderr.write(`unknown subcommand: ${cmd}\n`);
printUsage();
return 1;
}
} catch (err) {
process.stderr.write(`error: ${err instanceof Error ? err.message : String(err)}\n`);
return 1;
}
}
// Only run main when invoked as a script (not when imported by tests)
if (import.meta.main) {
main().then((code) => process.exit(code));
}

View File

@ -192,10 +192,7 @@ function resolveSkillFile(args: CliArgs): string | null {
function gbrainAvailable(): boolean {
try {
execFileSync("gbrain", ["--version"], {
stdio: "ignore",
timeout: MCP_TIMEOUT_MS,
});
execFileSync("command", ["-v", "gbrain"], { stdio: "ignore" });
return true;
} catch {
return false;

View File

@ -136,11 +136,7 @@ def load_privacy_map(path):
allowlist_globs = load_lines(allowlist_path)
privacy_map = load_privacy_map(privacy_path)
# Normalize skip entries to the POSIX form queued paths use, so a backslash
# entry in .brain-skip.txt still matches on Windows. The drain is the safety
# boundary that actually stages files, so it must normalize identically to
# discover_new — otherwise an explicitly-skipped file gets committed.
skip_lines = {s.replace(os.sep, "/") for s in load_lines(skip_path)}
skip_lines = set(load_lines(skip_path))
# Read queue; collect unique file paths.
queue_paths = set()
@ -257,8 +253,6 @@ subcmd_once() {
# Stage with git add -f (forces past .gitignore=*) explicit paths only.
while IFS= read -r p; do
p="${p%$'\r'}" # Windows: compute_paths_to_stage's python print() emits CRLF;
# a trailing CR makes the pathspec match nothing (silent no-stage).
[ -z "$p" ] && continue
git -C "$GSTACK_HOME" add -f -- "$p" 2>/dev/null || true
done < "$paths_file"
@ -382,13 +376,10 @@ subcmd_discover_new() {
exit 0
fi
# Walk allowlist globs; enqueue any file where mtime+size differs from cursor.
python3 - "$GSTACK_HOME" "$ALLOWLIST" "$DISCOVER_CURSOR" <<'PYEOF' 2>/dev/null || true
import sys, os, json, fnmatch
from datetime import datetime, timezone
python3 - "$GSTACK_HOME" "$ALLOWLIST" "$DISCOVER_CURSOR" "$SCRIPT_DIR/gstack-brain-enqueue" <<'PYEOF' 2>/dev/null || true
import sys, os, json, glob, fnmatch, subprocess, hashlib
gstack_home, allowlist_path, cursor_path = sys.argv[1:4]
queue_path = os.path.join(gstack_home, ".brain-queue.jsonl")
skip_path = os.path.join(gstack_home, ".brain-skip.txt")
gstack_home, allowlist_path, cursor_path, enqueue_bin = sys.argv[1:5]
def load_lines(path):
try:
@ -412,12 +403,8 @@ def save_cursor(path, data):
pass
allowlist = load_lines(allowlist_path)
# Normalize skip entries to the same POSIX form as `rel` below, so a
# backslash entry in .brain-skip.txt still matches a normalized path on Windows.
skip = {s.replace(os.sep, "/") for s in load_lines(skip_path)}
cursor = load_cursor(cursor_path)
new_cursor = dict(cursor)
to_enqueue = []
# Walk all files under gstack_home, match against allowlist.
for root, dirs, files in os.walk(gstack_home):
@ -426,54 +413,22 @@ for root, dirs, files in os.walk(gstack_home):
continue
for name in files:
full = os.path.join(root, name)
# Repo paths are POSIX-relative. os.path.relpath yields backslash
# separators on Windows, which never match the forward-slash allowlist
# globs (e.g. "projects/*/learnings.jsonl"), so discovery silently
# enqueued nothing under projects/ on Windows. Normalize to "/".
rel = os.path.relpath(full, gstack_home).replace(os.sep, "/")
rel = os.path.relpath(full, gstack_home)
if rel.startswith(".brain-"):
continue
if not any(fnmatch.fnmatchcase(rel, pat) for pat in allowlist):
continue
if rel in skip:
matched = any(fnmatch.fnmatchcase(rel, pat) for pat in allowlist)
if not matched:
continue
try:
st = os.stat(full)
key = f"{int(st.st_mtime)}:{st.st_size}"
except OSError:
continue
if cursor.get(rel) != key:
to_enqueue.append((rel, key))
# Append to the queue directly. The previous implementation shelled out to
# gstack-brain-enqueue once per file, but Windows Python cannot exec a
# bash-shebang script (the spawn fails with a fork error), so discovery
# enqueued nothing on Windows even after the path-match fix above.
# Writing the queue line here is platform-agnostic; the drain step
# (compute_paths_to_stage) still re-applies the skip-list + privacy filters.
if to_enqueue:
ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
try:
# One atomic append per record (O_APPEND, each line < PIPE_BUF), matching
# gstack-brain-enqueue's concurrency contract so a writer-shim append
# running in parallel can't interleave mid-record. Buffered text writes
# don't guarantee that. Compact separators match the shim's JSON shape.
fd = os.open(queue_path, os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0o644)
try:
for rel, key in to_enqueue:
rec = json.dumps({"file": rel, "ts": ts}, separators=(",", ":"))
os.write(fd, (rec + "\n").encode("utf-8"))
finally:
os.close(fd)
except OSError:
# Queue write failed (disk full, AV file lock). Leave the cursor
# unadvanced so these files are retried on the next discover instead of
# being silently recorded as synced (which loses the change until the
# file next changes).
to_enqueue = []
# Advance the cursor only for records actually written.
for rel, key in to_enqueue:
new_cursor[rel] = key
prev = cursor.get(rel)
if prev != key:
# Enqueue via the shim (respects sync mode + skip list).
subprocess.run([enqueue_bin, rel], check=False)
new_cursor[rel] = key
save_cursor(cursor_path, new_cursor)
PYEOF

View File

@ -1,223 +0,0 @@
#!/usr/bin/env bash
# gstack-codex-session-import — backfill question-log.jsonl from Codex sessions.
#
# Codex has no AskUserQuestion tool (per docs/spikes/codex-session-format.md).
# gstack skills running on Codex emit Decision Briefs as plain agent_message
# text, and the user's response shows up in the next user_message. This
# importer reconstructs those question/answer pairs from the structured
# JSONL session files at ~/.codex/sessions/<date>/.
#
# Usage:
# gstack-codex-session-import # latest session under ~/.codex/sessions/
# gstack-codex-session-import <path/to.jsonl> # explicit session file
# gstack-codex-session-import --since <iso> # all sessions newer than <iso>
#
# Recovery strategy (two-tier per D5/T4 spike):
# 1. Marker-first: extract <gstack-qid:foo-bar> from agent_message → stable id.
# 2. Pattern fallback: detect D<N> header + numbered options → hash id
# (source=codex-import-pattern, never used as preference key per D18).
#
# Writes via bin/gstack-question-log so source tagging, dedup, and async
# derive all apply uniformly.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}"
CODEX_SESSIONS_ROOT="${CODEX_SESSIONS_ROOT:-$HOME/.codex/sessions}"
MODE="latest"
EXPLICIT_PATH=""
SINCE_ISO=""
if [ $# -gt 0 ]; then
case "$1" in
--since)
MODE="since"
SINCE_ISO="${2:-}"
;;
--help|-h)
sed -n '1,/^set -euo/p' "$0" | sed 's|^# \?||'
exit 0
;;
-*)
echo "unknown flag: $1" >&2
exit 1
;;
*)
MODE="explicit"
EXPLICIT_PATH="$1"
;;
esac
fi
# Resolve list of session files to process.
SESSION_FILES=()
case "$MODE" in
explicit)
if [ ! -f "$EXPLICIT_PATH" ]; then
echo "gstack-codex-session-import: file not found: $EXPLICIT_PATH" >&2
exit 1
fi
SESSION_FILES=("$EXPLICIT_PATH")
;;
latest)
if [ ! -d "$CODEX_SESSIONS_ROOT" ]; then
echo "NO_SESSIONS: $CODEX_SESSIONS_ROOT does not exist"
exit 0
fi
LATEST=$(find "$CODEX_SESSIONS_ROOT" -type f -name "rollout-*.jsonl" -print 2>/dev/null \
| xargs ls -t 2>/dev/null | head -1 || true)
if [ -z "$LATEST" ]; then
echo "NO_SESSIONS: no rollout-*.jsonl files under $CODEX_SESSIONS_ROOT"
exit 0
fi
SESSION_FILES=("$LATEST")
;;
since)
if [ -z "$SINCE_ISO" ]; then
echo "--since requires an ISO 8601 timestamp" >&2
exit 1
fi
while IFS= read -r f; do
SESSION_FILES+=("$f")
done < <(find "$CODEX_SESSIONS_ROOT" -type f -name "rollout-*.jsonl" -newer <(date -u -d "$SINCE_ISO" 2>/dev/null || date -u) 2>/dev/null)
;;
esac
if [ ${#SESSION_FILES[@]} -eq 0 ]; then
echo "NO_SESSIONS: nothing to import"
exit 0
fi
# Parse + extract via bun. Emits one line per question found, ready to pipe
# into gstack-question-log. Tagged with source so downstream consumers
# (/plan-tune stats, dream cycle) can distinguish backfilled events from
# live captures.
IMPORTED=0
SKIPPED_NO_ANSWER=0
for SESSION_FILE in "${SESSION_FILES[@]}"; do
COUNT_LINE=$(SESSION_FILE_PATH="$SESSION_FILE" QLOG_BIN="$SCRIPT_DIR/gstack-question-log" bun -e '
const fs = require("fs");
const path = require("path");
const { spawnSync } = require("child_process");
const crypto = require("crypto");
const sessionPath = process.env.SESSION_FILE_PATH;
const qlogBin = process.env.QLOG_BIN;
const lines = fs.readFileSync(sessionPath, "utf-8").trim().split("\n").filter(Boolean);
let meta = null;
const stream = [];
for (const ln of lines) {
try {
const e = JSON.parse(ln);
if (e.type === "session_meta") meta = e.payload;
else stream.push(e);
} catch {}
}
if (!meta) {
console.error("WARN: no session_meta in " + sessionPath);
console.log("0 0");
process.exit(0);
}
const cwd = meta.cwd || "";
const sessionId = (meta.id || path.basename(sessionPath)).slice(0, 64);
// Walk for agent_message → next user_message pairs.
const briefs = [];
for (let i = 0; i < stream.length; i++) {
const e = stream[i];
if (e.type !== "event_msg" || e.payload?.type !== "agent_message") continue;
const text = String(e.payload?.message || "");
if (!text) continue;
// Detect D-numbered brief or marker. Markers are sufficient on their own.
const markerMatch = text.match(/<gstack-qid:([a-z0-9-]{1,64})>/i);
const dMatch = text.match(/^D\d+[\.\d]*\s*[—\-]\s*(.+?)$/m);
if (!markerMatch && !dMatch) continue;
// Find the next user_message in the stream.
let answer = null;
for (let j = i + 1; j < stream.length; j++) {
const e2 = stream[j];
if (e2.type === "event_msg" && e2.payload?.type === "user_message") {
answer = String(e2.payload?.message || "").trim();
break;
}
}
if (!answer) continue;
// Extract options A) ... B) ... from the brief.
const optMatches = [...text.matchAll(/^([A-Z])\)\s+(.+?)(?:\s+\(recommended\))?$/gm)];
const options = optMatches.map((m) => m[2].trim());
// Identify recommended option (label first, prose fallback).
let recommended;
const recLabel = [...text.matchAll(/^([A-Z])\)\s+(.+?)\s+\(recommended\)$/gm)];
if (recLabel.length === 1) recommended = recLabel[0][2].trim();
// Identify which option the user picked from their answer.
// Look for "A" / "A) ..." / option-label prefix match.
let userChoice = "__unknown__";
const letterMatch = answer.match(/^\s*([A-Z])\b/);
if (letterMatch) {
const idx = letterMatch[1].charCodeAt(0) - 65;
if (idx >= 0 && idx < options.length) userChoice = options[idx];
else userChoice = letterMatch[1];
} else if (options.length > 0) {
const lower = answer.toLowerCase();
const m = options.find((o) => lower.includes(o.toLowerCase().slice(0, 12)));
if (m) userChoice = m;
}
if (userChoice === "__unknown__") {
userChoice = answer.slice(0, 64);
}
const summary = (dMatch?.[1] || text.split("\n")[0]).slice(0, 200);
let questionId, source;
if (markerMatch) {
questionId = markerMatch[1];
source = "codex-import-marker";
} else {
const sortedOpts = [...options].sort().join("|");
const h = crypto.createHash("sha1").update("codex::" + summary + "::" + sortedOpts).digest("hex").slice(0, 10);
questionId = "hook-" + h;
source = "codex-import-pattern";
}
briefs.push({
skill: "codex",
question_id: questionId,
question_summary: summary,
options_count: options.length || 1,
user_choice: userChoice.slice(0, 64),
...(recommended ? { recommended: recommended.slice(0, 64) } : {}),
source,
session_id: sessionId,
// Use ts_nanos+ts shape from the event itself if available; else null.
ts: e.timestamp || undefined,
});
}
let imported = 0;
for (const b of briefs) {
const res = spawnSync(qlogBin, [JSON.stringify(b)], {
encoding: "utf-8",
stdio: ["ignore", "pipe", "pipe"],
// Run from the originating cwd so gstack-slug bucks events into the
// right project. Falls back to the importer cwd if the session cwd
// no longer exists.
cwd: cwd && fs.existsSync(cwd) ? cwd : undefined,
timeout: 5000,
});
if (res.status === 0) imported++;
}
console.log(imported + " 0");
' 2>&1)
IMP=$(echo "$COUNT_LINE" | awk "{print \$1}")
IMPORTED=$((IMPORTED + IMP))
done
echo "IMPORTED: $IMPORTED events from ${#SESSION_FILES[@]} session(s)"

View File

@ -8,13 +8,11 @@
# gstack-config defaults — show just the defaults table
#
# Env overrides (for testing):
# GSTACK_STATE_ROOT — override ~/.gstack state directory (highest priority,
# matches D16 cathedral isolation convention)
# GSTACK_HOME — override ~/.gstack state directory (aligns with writer scripts)
# GSTACK_STATE_DIR — legacy alias for GSTACK_HOME (kept for backwards compat)
set -euo pipefail
STATE_DIR="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-${GSTACK_STATE_DIR:-$HOME/.gstack}}}"
STATE_DIR="${GSTACK_HOME:-${GSTACK_STATE_DIR:-$HOME/.gstack}}"
CONFIG_FILE="$STATE_DIR/config.yaml"
# Annotated header for new config files. Written once on first `set`.
@ -75,16 +73,6 @@ CONFIG_HEADER='# gstack configuration — edit freely, changes take effect on ne
# # Set to true once the privacy gate has asked the user.
# # Flip back to false to be re-prompted.
#
# ─── Plan-tune hooks ─────────────────────────────────────────────────
# plan_tune_hooks: prompt # Controls whether ./setup installs the plan-tune
# # Claude Code hooks (PostToolUse capture +
# # PreToolUse preference enforcement).
# # prompt — ask on a real TTY, skip otherwise (default)
# # yes — install non-interactively
# # no — skip non-interactively
# # Override per-run: ./setup --plan-tune-hooks /
# # --no-plan-tune-hooks, or env GSTACK_PLAN_TUNE_HOOKS.
#
# ─── Advanced ────────────────────────────────────────────────────────
# codex_reviews: enabled # disabled = skip Codex adversarial reviews in /ship
# gstack_contributor: false # true = file field reports when gstack misbehaves
@ -112,7 +100,6 @@ lookup_default() {
skill_prefix) echo "false" ;;
checkpoint_mode) echo "explicit" ;;
checkpoint_push) echo "false" ;;
explain_level) echo "default" ;;
codex_reviews) echo "enabled" ;;
gstack_contributor) echo "false" ;;
skip_eng_review) echo "false" ;;
@ -120,145 +107,19 @@ lookup_default() {
cross_project_learnings) echo "" ;; # intentionally empty → unset triggers first-time prompt
artifacts_sync_mode) echo "off" ;;
artifacts_sync_mode_prompted) echo "false" ;;
plan_tune_hooks) echo "prompt" ;; # prompt | yes | no — controls ./setup plan-tune hook install
redact_repo_visibility) echo "" ;; # empty → fall through to gh/glab detection
redact_prepush_hook) echo "false" ;;
# Brain-aware planning (v1.48 / T5+T10+T16). Defaults documented inline:
# brain_trust_policy@<hash> — unset on fresh install; setup-gbrain
# writes 'personal' for local engines,
# asks the user for remote-ambiguous.
# salience_allowlist — empty falls through to
# SALIENCE_DEFAULT_ALLOWLIST (D9).
# user_slug_at_<hash> — empty triggers resolve-user-slug
# fallback chain (D4 A3) on first call.
brain_trust_policy*) echo "unset" ;;
salience_allowlist) echo "" ;;
user_slug_at_*) echo "" ;;
*) echo "" ;;
esac
}
# ──────────────────────────────────────────────────────────────────────
# Brain-integration helpers (T5+T10+T16)
# ──────────────────────────────────────────────────────────────────────
# Compute sha8 of a string. Used for endpoint hashing.
sha8_of() {
printf '%s' "$1" | shasum -a 256 | cut -c1-8
}
# Detect the active brain endpoint hash. Reads ~/.claude.json for the gbrain
# MCP server URL. Falls back to the literal 'local' when no MCP is configured.
endpoint_hash() {
_claude_json="$HOME/.claude.json"
if [ -f "$_claude_json" ] && command -v jq >/dev/null 2>&1; then
_url=$(jq -r '.mcpServers.gbrain.url // .mcpServers.gbrain.transport.url // empty' "$_claude_json" 2>/dev/null)
if [ -n "$_url" ] && [ "$_url" != "null" ]; then
sha8_of "$_url"
return 0
fi
fi
printf '%s' "local"
}
# Detect endpoint hash collisions. When two distinct endpoints share the same
# sha8 prefix (rare but possible), escalate to sha16 by emitting the longer
# hash. Detection: scan config file for existing brain_trust_policy@<hash> or
# user_slug_at_<hash> keys; if any non-active hash equals the active sha8 but
# would differ at sha16, the active endpoint needs sha16.
endpoint_hash_with_collision_check() {
_active=$(endpoint_hash)
if [ "$_active" = "local" ]; then
printf '%s' "$_active"
return 0
fi
# If a different endpoint (different URL) shares this sha8, escalate.
# We only catch this when the config has another endpoint recorded.
_matching=$(grep -E "^(brain_trust_policy|user_slug_at)@${_active}" "$CONFIG_FILE" 2>/dev/null | head -1 || true)
_claude_json="$HOME/.claude.json"
if [ -n "$_matching" ] && [ -f "$_claude_json" ] && command -v jq >/dev/null 2>&1; then
_url=$(jq -r '.mcpServers.gbrain.url // .mcpServers.gbrain.transport.url // empty' "$_claude_json" 2>/dev/null)
_sha16=$(printf '%s' "$_url" | shasum -a 256 | cut -c1-16)
# Look for any sha16-namespaced key that conflicts. If a stored sha16 exists
# and differs from current sha16, that's the collision evidence; emit sha16.
_stored16=$(grep -E "^(brain_trust_policy|user_slug_at)@${_sha16}" "$CONFIG_FILE" 2>/dev/null | head -1 || true)
if [ -n "$_stored16" ]; then
printf '%s' "$_sha16"
return 0
fi
fi
printf '%s' "$_active"
}
# Resolve the user-slug per D4 A3 chain:
# 1. mcp__gbrain__whoami.client_name (best effort via gbrain CLI shell-out)
# 2. $USER env
# 3. sha8($(git config user.email))
# 4. anonymous-<sha8(hostname)>
# Persists result via gstack-config set user_slug_at_<endpoint-hash> on first call.
resolve_user_slug() {
_hash=$(endpoint_hash_with_collision_check)
_stored=$(grep -E "^user_slug_at_${_hash}:" "$CONFIG_FILE" 2>/dev/null | tail -1 | awk '{print $2}' | tr -d '[:space:]' || true)
if [ -n "$_stored" ]; then
printf '%s' "$_stored"
return 0
fi
_slug=""
# Layer 1: gbrain whoami
if command -v gbrain >/dev/null 2>&1; then
_whoami=$(gbrain whoami --json 2>/dev/null || true)
if [ -n "$_whoami" ] && command -v jq >/dev/null 2>&1; then
_client_name=$(printf '%s' "$_whoami" | jq -r '.client_name // .token_name // empty' 2>/dev/null || true)
if [ -n "$_client_name" ] && [ "$_client_name" != "null" ]; then
_slug=$(printf '%s' "$_client_name" | tr '[:upper:] ' '[:lower:]-' | tr -dc '[:alnum:]-')
fi
fi
fi
# Layer 2: $USER
if [ -z "$_slug" ] && [ -n "${USER:-}" ]; then
_slug=$(printf '%s' "$USER" | tr '[:upper:] ' '[:lower:]-' | tr -dc '[:alnum:]-')
fi
# Layer 3: sha8 of git email
if [ -z "$_slug" ]; then
_email=$(git config user.email 2>/dev/null || true)
if [ -n "$_email" ]; then
_slug="email-$(sha8_of "$_email")"
fi
fi
# Layer 4: anonymous-<sha8(hostname)>
if [ -z "$_slug" ]; then
_slug="anonymous-$(sha8_of "$(hostname 2>/dev/null || echo unknown)")"
fi
# Persist via direct file write (avoid recursion into gstack-config set)
mkdir -p "$STATE_DIR"
if [ ! -f "$CONFIG_FILE" ]; then
printf '%s' "$CONFIG_HEADER" > "$CONFIG_FILE"
fi
if ! grep -qE "^user_slug_at_${_hash}:" "$CONFIG_FILE" 2>/dev/null; then
echo "user_slug_at_${_hash}: ${_slug}" >> "$CONFIG_FILE"
fi
printf '%s' "$_slug"
}
case "${1:-}" in
get)
KEY="${2:?Usage: gstack-config get <key>}"
# Validate key (alphanumeric + underscore + optional @<hash> suffix for
# endpoint-namespaced keys introduced by the brain-aware planning layer)
if ! printf '%s' "$KEY" | grep -qE '^[a-zA-Z0-9_]+(@[a-f0-9]+)?$'; then
echo "Error: key must contain only alphanumeric characters, underscores, and an optional @<hex-hash> suffix" >&2
# Validate key (alphanumeric + underscore only)
if ! printf '%s' "$KEY" | grep -qE '^[a-zA-Z0-9_]+$'; then
echo "Error: key must contain only alphanumeric characters and underscores" >&2
exit 1
fi
# Use literal match for keys containing @ (sha hashes), regex otherwise
VALUE=$(grep -F "${KEY}:" "$CONFIG_FILE" 2>/dev/null | grep -E "^${KEY%@*}(@[a-f0-9]+)?:" | grep -F "${KEY}:" | tail -1 | awk '{print $2}' | tr -d '[:space:]' || true)
VALUE=$(grep -E "^${KEY}:" "$CONFIG_FILE" 2>/dev/null | tail -1 | awk '{print $2}' | tr -d '[:space:]' || true)
if [ -z "$VALUE" ]; then
VALUE=$(lookup_default "$KEY")
fi
@ -267,17 +128,11 @@ case "${1:-}" in
set)
KEY="${2:?Usage: gstack-config set <key> <value>}"
VALUE="${3:?Usage: gstack-config set <key> <value>}"
# Validate key (alphanumeric + underscore + optional @<hash> suffix)
if ! printf '%s' "$KEY" | grep -qE '^[a-zA-Z0-9_]+(@[a-f0-9]+)?$'; then
echo "Error: key must contain only alphanumeric characters, underscores, and an optional @<hex-hash> suffix" >&2
# Validate key (alphanumeric + underscore only)
if ! printf '%s' "$KEY" | grep -qE '^[a-zA-Z0-9_]+$'; then
echo "Error: key must contain only alphanumeric characters and underscores" >&2
exit 1
fi
# Validate brain_trust_policy value domain (D4 / D11)
if printf '%s' "$KEY" | grep -qE '^brain_trust_policy(@|$)' && \
[ "$VALUE" != "personal" ] && [ "$VALUE" != "shared" ] && [ "$VALUE" != "unset" ]; then
echo "Warning: brain_trust_policy '$VALUE' not recognized. Valid values: personal, shared, unset. Using unset." >&2
VALUE="unset"
fi
# V1: whitelist values for keys with closed value domains. Unknown values warn + default.
if [ "$KEY" = "explain_level" ] && [ "$VALUE" != "default" ] && [ "$VALUE" != "terse" ]; then
echo "Warning: explain_level '$VALUE' not recognized. Valid values: default, terse. Using default." >&2
@ -287,21 +142,6 @@ case "${1:-}" in
echo "Warning: artifacts_sync_mode '$VALUE' not recognized. Valid values: off, artifacts-only, full. Using off." >&2
VALUE="off"
fi
# redact_repo_visibility: a LOCAL override for repos gh/glab can't read (e.g.
# self-hosted GitLab). It lives in ~/.gstack/config.yaml (never committed), so
# it can't be used to weaken the gate repo-wide for other contributors.
if [ "$KEY" = "redact_repo_visibility" ] && [ "$VALUE" != "public" ] && [ "$VALUE" != "private" ] && [ "$VALUE" != "unknown" ]; then
echo "Warning: redact_repo_visibility '$VALUE' not recognized. Valid values: public, private, unknown. Using unknown." >&2
VALUE="unknown"
fi
if [ "$KEY" = "redact_prepush_hook" ] && [ "$VALUE" != "true" ] && [ "$VALUE" != "false" ]; then
echo "Warning: redact_prepush_hook '$VALUE' not recognized. Valid values: true, false. Using false." >&2
VALUE="false"
fi
if [ "$KEY" = "plan_tune_hooks" ] && [ "$VALUE" != "prompt" ] && [ "$VALUE" != "yes" ] && [ "$VALUE" != "no" ]; then
echo "Warning: plan_tune_hooks '$VALUE' not recognized. Valid values: prompt, yes, no. Using prompt." >&2
VALUE="prompt"
fi
mkdir -p "$STATE_DIR"
# Write annotated header on first creation
if [ ! -f "$CONFIG_FILE" ]; then
@ -329,9 +169,9 @@ case "${1:-}" in
echo ""
echo "# ─── Active values (including defaults for unset keys) ───"
for KEY in proactive routing_declined telemetry auto_upgrade update_check \
skill_prefix checkpoint_mode checkpoint_push explain_level \
codex_reviews gstack_contributor skip_eng_review workspace_root \
artifacts_sync_mode artifacts_sync_mode_prompted plan_tune_hooks; do
skill_prefix checkpoint_mode checkpoint_push codex_reviews \
gstack_contributor skip_eng_review workspace_root \
artifacts_sync_mode artifacts_sync_mode_prompted; do
VALUE=$(grep -E "^${KEY}:" "$CONFIG_FILE" 2>/dev/null | tail -1 | awk '{print $2}' | tr -d '[:space:]' || true)
SOURCE="default"
if [ -n "$VALUE" ]; then
@ -345,68 +185,14 @@ case "${1:-}" in
defaults)
echo "# gstack-config defaults"
for KEY in proactive routing_declined telemetry auto_upgrade update_check \
skill_prefix checkpoint_mode checkpoint_push explain_level \
codex_reviews gstack_contributor skip_eng_review workspace_root \
artifacts_sync_mode artifacts_sync_mode_prompted plan_tune_hooks; do
skill_prefix checkpoint_mode checkpoint_push codex_reviews \
gstack_contributor skip_eng_review workspace_root \
artifacts_sync_mode artifacts_sync_mode_prompted; do
printf ' %-24s %s\n' "$KEY:" "$(lookup_default "$KEY")"
done
;;
endpoint-hash)
# Brain integration helper (T10): print active brain endpoint sha8
endpoint_hash_with_collision_check
;;
resolve-user-slug)
# Brain integration helper (T16 / D4 A3): resolve + persist user-slug
resolve_user_slug
;;
gbrain-refresh)
# Brain integration helper: re-detect gbrain installation state and
# persist to ~/.gstack/gbrain-detection.json. gen-skill-docs reads this
# file (when invoked with --respect-detection) to decide whether to
# render GBRAIN_CONTEXT_LOAD and GBRAIN_SAVE_RESULTS blocks in
# generated SKILL.md files.
#
# Run this after installing or uninstalling gbrain so your locally
# generated SKILL.md files match your installation state.
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
DETECT_BIN="$SCRIPT_DIR/gstack-gbrain-detect"
DETECTION_FILE="$STATE_DIR/gbrain-detection.json"
mkdir -p "$STATE_DIR"
if [ ! -x "$DETECT_BIN" ]; then
echo "gstack-gbrain-detect not found at $DETECT_BIN" >&2
exit 1
fi
if ! "$DETECT_BIN" > "$DETECTION_FILE.tmp" 2>/dev/null; then
printf '{"gbrain_on_path":false,"gbrain_local_status":"no-cli"}\n' > "$DETECTION_FILE.tmp"
fi
mv "$DETECTION_FILE.tmp" "$DETECTION_FILE"
# Summarize for the user. Use python (already required elsewhere) to
# parse the JSON portably; fall back to grep if python is unavailable.
PYTHON_CMD=$(command -v python3 || command -v python || true)
if [ -n "$PYTHON_CMD" ]; then
STATUS=$("$PYTHON_CMD" -c "import json,sys; d=json.load(open('$DETECTION_FILE')); print(d.get('gbrain_local_status','unknown'))" 2>/dev/null || echo unknown)
VERSION=$("$PYTHON_CMD" -c "import json,sys; d=json.load(open('$DETECTION_FILE')); print(d.get('gbrain_version') or 'unknown')" 2>/dev/null || echo unknown)
else
STATUS=$(grep -o '"gbrain_local_status":[[:space:]]*"[^"]*"' "$DETECTION_FILE" | sed 's/.*"\([^"]*\)"$/\1/')
VERSION=$(grep -o '"gbrain_version":[[:space:]]*"[^"]*"' "$DETECTION_FILE" | sed 's/.*"\([^"]*\)"$/\1/')
[ -z "$STATUS" ] && STATUS=unknown
[ -z "$VERSION" ] && VERSION=unknown
fi
case "$STATUS" in
ok)
echo "Detected gbrain v$VERSION → brain-aware blocks will render in planning-skill SKILL.md files."
echo "Run 'bun run gen:skill-docs' in the gstack repo (or re-run ./setup) to regenerate now."
;;
*)
echo "gbrain not detected (local-status: $STATUS) → brain-aware blocks will be suppressed in planning-skill SKILL.md files."
echo "Install gbrain (see /setup-gbrain) and re-run 'gstack-config gbrain-refresh' once it's configured."
;;
esac
;;
*)
echo "Usage: gstack-config {get|set|list|defaults|endpoint-hash|resolve-user-slug|gbrain-refresh} [key] [value]"
echo "Usage: gstack-config {get|set|list|defaults} [key] [value]"
exit 1
;;
esac

View File

@ -17,9 +17,6 @@
# --check-mismatch detect meaningful gaps between declared and observed.
# --migrate migrate builder-profile.jsonl → developer-profile.json.
# Idempotent; archives the source file on success.
# --log-session append a session entry (from /office-hours) to
# sessions[] and update aggregates. Required fields:
# date, mode. Silent skip on invalid input.
#
# Profile file: ~/.gstack/developer-profile.json (unified schema — see
# docs/designs/PLAN_TUNING_V0.md). Event file: ~/.gstack/projects/{SLUG}/
@ -28,8 +25,7 @@ set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
# GSTACK_STATE_ROOT takes precedence over GSTACK_HOME (test isolation per D16).
GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}"
GSTACK_HOME="${GSTACK_HOME:-$HOME/.gstack}"
PROFILE_FILE="$GSTACK_HOME/developer-profile.json"
LEGACY_FILE="$GSTACK_HOME/builder-profile.jsonl"
eval "$("$SCRIPT_DIR/gstack-slug" 2>/dev/null || true)"
@ -158,65 +154,6 @@ ensure_profile() {
EOF
}
# -----------------------------------------------------------------------
# Record session: append a session entry from /office-hours to sessions[]
# and update aggregates (signals_accumulated, resources_shown, topics).
# Fix for #1671: the writer side of the v1.0.0.0 migration. Reader and
# writer now share the same file.
# Silent skip on invalid input (matches gstack-timeline-log:22-26 pattern).
# -----------------------------------------------------------------------
do_log_session() {
local INPUT="${1:-}"
if [ -z "$INPUT" ]; then
return 0
fi
# Validate: input must be parseable JSON with required fields (date, mode).
if ! printf '%s' "$INPUT" | bun -e "
const j = JSON.parse(await Bun.stdin.text());
if (!j.date || !j.mode) process.exit(1);
" 2>/dev/null; then
return 0
fi
ensure_profile
local TMPOUT
TMPOUT=$(mktemp "$GSTACK_HOME/developer-profile.json.XXXXXX.tmp")
trap 'rm -f "$TMPOUT"' EXIT
PROFILE_FILE_PATH="$PROFILE_FILE" RECORD_INPUT="$INPUT" TMPOUT_PATH="$TMPOUT" bun -e "
const fs = require('fs');
const entry = JSON.parse(process.env.RECORD_INPUT);
if (!entry.ts) entry.ts = new Date().toISOString();
const profile = JSON.parse(fs.readFileSync(process.env.PROFILE_FILE_PATH, 'utf-8'));
profile.sessions = profile.sessions || [];
profile.sessions.push(entry);
profile.signals_accumulated = profile.signals_accumulated || {};
for (const s of (entry.signals || [])) {
profile.signals_accumulated[s] = (profile.signals_accumulated[s] || 0) + 1;
}
profile.resources_shown = profile.resources_shown || [];
const resSet = new Set(profile.resources_shown);
for (const r of (entry.resources_shown || [])) resSet.add(r);
profile.resources_shown = Array.from(resSet);
profile.topics = profile.topics || [];
const topicSet = new Set(profile.topics);
for (const t of (entry.topics || [])) topicSet.add(t);
profile.topics = Array.from(topicSet);
fs.writeFileSync(process.env.TMPOUT_PATH, JSON.stringify(profile, null, 2));
"
mv "$TMPOUT" "$PROFILE_FILE"
trap - EXIT
"$SCRIPT_DIR/gstack-brain-enqueue" "developer-profile.json" 2>/dev/null &
}
# -----------------------------------------------------------------------
# Read: emit legacy KEY: VALUE output for /office-hours compat.
# -----------------------------------------------------------------------
@ -231,19 +168,14 @@ do_read() {
else if (count >= 4) tier = 'regular';
else if (count >= 1) tier = 'welcome_back';
// LAST_* / CROSS_PROJECT must reflect real sessions, not resource-tracking
// events (the Phase 6 auto-append). Without this filter, a session's
// resources entry written immediately after the real session would clobber
// LAST_PROJECT/LAST_ASSIGNMENT/LAST_DESIGN_TITLE.
const realSessions = sessions.filter(e => e.mode !== 'resources');
const last = realSessions[realSessions.length - 1] || {};
const prev = realSessions[realSessions.length - 2] || {};
const last = sessions[count - 1] || {};
const prev = sessions[count - 2] || {};
const crossProject = prev.project_slug && last.project_slug
? prev.project_slug !== last.project_slug
: false;
const designs = realSessions.map(e => e.design_doc || '').filter(Boolean);
const designTitles = realSessions
const designs = sessions.map(e => e.design_doc || '').filter(Boolean);
const designTitles = sessions
.map(e => (e.design_doc ? (e.project_slug || 'unknown') : ''))
.filter(Boolean);
@ -509,7 +441,6 @@ case "$CMD" in
--vibe) do_vibe ;;
--check-mismatch) do_check_mismatch ;;
--migrate) do_migrate ;;
--log-session) do_log_session "$@" ;;
--help|-h) sed -n '1,/^set -euo/p' "$0" | sed 's|^# \?||' ;;
*)
echo "gstack-developer-profile: unknown subcommand '$CMD'" >&2

View File

@ -57,7 +57,7 @@ while IFS= read -r f; do
*.md) DOCS=true ;;
# Config
package.json|package-lock.json|yarn.lock|bun.lock|bun.lockb) CONFIG=true ;;
package.json|package-lock.json|yarn.lock|bun.lockb) CONFIG=true ;;
Gemfile|Gemfile.lock) CONFIG=true ;;
*.yml|*.yaml) CONFIG=true ;;
.github/*) CONFIG=true ;;

View File

@ -1,181 +0,0 @@
#!/usr/bin/env bash
# gstack-distill-apply — apply a single distillation proposal after user Y.
#
# Plan-tune cathedral T11. Reads distillation-proposals.json, applies the
# Nth proposal to the right surface:
#
# preference → gstack-question-preference --write
# declared-nudge → atomic update to ~/.gstack/developer-profile.json declared
# memory-nugget → append to ~/.gstack/free-text-memory.json (local fallback)
#
# Always confirm before calling this from the skill — the bin assumes the user
# already approved (Codex #15 trust boundary). The skill template (/plan-tune
# distill review section) handles the confirm UX.
#
# gbrain integration: when gbrain is configured, the skill template ALSO
# invokes mcp__gbrain__put_page / extract_facts / add_tag in the same turn
# (those are MCP tools, not CLI-callable). Pass --gbrain-published true to
# mark the proposal as mirrored to gbrain. The local file always gets the
# write so it's the durable source-of-truth even on machines without gbrain.
#
# Usage:
# gstack-distill-apply --proposal <N> # apply Nth proposal
# gstack-distill-apply --proposal <N> --gbrain-published true
# gstack-distill-apply --list # show pending proposals
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}"
eval "$("$SCRIPT_DIR/gstack-slug" 2>/dev/null || true)"
SLUG="${SLUG:-unknown}"
PROJECT_DIR="$GSTACK_HOME/projects/$SLUG"
PROPOSAL_FILE="$PROJECT_DIR/distillation-proposals.json"
MEMORY_FILE="$GSTACK_HOME/free-text-memory.json"
PROFILE_FILE="$GSTACK_HOME/developer-profile.json"
ACTION="apply"
PROPOSAL_IDX=""
GBRAIN_PUBLISHED="false"
while [ $# -gt 0 ]; do
case "$1" in
--proposal) PROPOSAL_IDX="$2"; shift 2 ;;
--gbrain-published) GBRAIN_PUBLISHED="$2"; shift 2 ;;
--list) ACTION="list"; shift ;;
--help|-h)
sed -n '1,/^set -euo/p' "$0" | sed 's|^# \?||'
exit 0
;;
*) echo "unknown arg: $1" >&2; exit 1 ;;
esac
done
if [ ! -f "$PROPOSAL_FILE" ]; then
echo "NO_PROPOSALS: $PROPOSAL_FILE missing — run gstack-distill-free-text first"
exit 0
fi
if [ "$ACTION" = "list" ]; then
PROPOSAL_FILE_PATH="$PROPOSAL_FILE" bun -e '
const fs = require("fs");
const p = JSON.parse(fs.readFileSync(process.env.PROPOSAL_FILE_PATH, "utf-8"));
const proposals = p.proposals || [];
if (proposals.length === 0) { console.log("(no proposals)"); process.exit(0); }
console.log("GENERATED: " + p.generated_at);
console.log("SOURCE_EVENTS: " + (p.source_event_count || 0));
proposals.forEach((pr, i) => {
console.log("");
console.log("[" + i + "] " + (pr.kind || "?") + " (confidence: " + (pr.confidence || "?") + ")");
if (pr.rationale) console.log(" rationale: " + pr.rationale);
if (pr.kind === "preference") {
console.log(" question_id: " + pr.question_id);
console.log(" preference: " + pr.preference);
} else if (pr.kind === "declared-nudge") {
console.log(" dimension: " + pr.dimension);
console.log(" direction: " + pr.direction + " (" + (pr.magnitude || "?") + ")");
} else if (pr.kind === "memory-nugget") {
console.log(" nugget: " + pr.nugget);
console.log(" signal_keys: " + JSON.stringify(pr.applies_to_signal_keys || []));
}
if (pr.source_quotes && pr.source_quotes.length) {
console.log(" quotes:");
pr.source_quotes.forEach((q) => console.log(" - \"" + q + "\""));
}
});
'
exit 0
fi
if [ -z "$PROPOSAL_IDX" ]; then
echo "--proposal <N> required" >&2
exit 1
fi
# Apply via bun. Each kind has its own surface.
mkdir -p "$PROJECT_DIR"
PROPOSAL_IDX="$PROPOSAL_IDX" \
PROPOSAL_FILE_PATH="$PROPOSAL_FILE" \
MEMORY_FILE_PATH="$MEMORY_FILE" \
PROFILE_FILE_PATH="$PROFILE_FILE" \
PREF_BIN="$SCRIPT_DIR/gstack-question-preference" \
GBRAIN_PUBLISHED="$GBRAIN_PUBLISHED" \
bun -e '
const fs = require("fs");
const { spawnSync } = require("child_process");
const idx = parseInt(process.env.PROPOSAL_IDX, 10);
const p = JSON.parse(fs.readFileSync(process.env.PROPOSAL_FILE_PATH, "utf-8"));
const proposals = p.proposals || [];
if (!Number.isInteger(idx) || idx < 0 || idx >= proposals.length) {
process.stderr.write("invalid --proposal index " + idx + " (have " + proposals.length + ")\n");
process.exit(1);
}
const pr = proposals[idx];
const stamp = new Date().toISOString();
// Memory-nugget: always write to local file (durable source-of-truth even
// when gbrain is configured — gbrain is mirror, file is canon for the
// PreToolUse hook injection path in Layer 8).
if (pr.kind === "memory-nugget") {
const memPath = process.env.MEMORY_FILE_PATH;
let mem = { nuggets: [] };
try { mem = JSON.parse(fs.readFileSync(memPath, "utf-8")); } catch {}
if (!Array.isArray(mem.nuggets)) mem.nuggets = [];
mem.nuggets.push({
nugget: pr.nugget,
applies_to_signal_keys: pr.applies_to_signal_keys || [],
applied_at: stamp,
gbrain_published: process.env.GBRAIN_PUBLISHED === "true",
source_quotes: pr.source_quotes || [],
});
const tmp = memPath + ".tmp";
fs.writeFileSync(tmp, JSON.stringify(mem, null, 2));
fs.renameSync(tmp, memPath);
console.log("APPLIED: memory-nugget appended to " + memPath);
}
// Preference: route through gstack-question-preference for the user-origin
// gate + event audit trail. source=plan-tune is the allowed value since
// the user opt-in came from inside /plan-tune.
if (pr.kind === "preference") {
const res = spawnSync(process.env.PREF_BIN, [
"--write",
JSON.stringify({
question_id: pr.question_id,
preference: pr.preference,
source: "plan-tune",
free_text: (pr.source_quotes || []).join(" | ").slice(0, 300),
}),
], { encoding: "utf-8", stdio: ["ignore", "pipe", "pipe"], timeout: 5000 });
if (res.status !== 0) {
process.stderr.write("preference apply failed: " + (res.stderr || res.stdout) + "\n");
process.exit(1);
}
console.log("APPLIED: preference " + pr.question_id + " → " + pr.preference);
}
// Declared-nudge: atomic update to developer-profile.json declared. Magnitude
// tiers: small=0.05, medium=0.10, large=0.15. Clamp to [0, 1].
if (pr.kind === "declared-nudge") {
const mag = { small: 0.05, medium: 0.10, large: 0.15 }[pr.magnitude || "small"] || 0.05;
const delta = pr.direction === "down" ? -mag : mag;
const profilePath = process.env.PROFILE_FILE_PATH;
let profile = {};
try { profile = JSON.parse(fs.readFileSync(profilePath, "utf-8")); } catch {}
profile.declared = profile.declared || {};
const cur = typeof profile.declared[pr.dimension] === "number" ? profile.declared[pr.dimension] : 0.5;
const next = Math.max(0, Math.min(1, cur + delta));
profile.declared[pr.dimension] = +next.toFixed(3);
profile.declared_at = stamp;
const tmp = profilePath + ".tmp";
fs.writeFileSync(tmp, JSON.stringify(profile, null, 2));
fs.renameSync(tmp, profilePath);
console.log("APPLIED: declared." + pr.dimension + " " + cur + " → " + profile.declared[pr.dimension]);
}
// Mark the proposal as applied so /plan-tune list shows it consumed.
pr.applied_at = stamp;
pr.gbrain_published = process.env.GBRAIN_PUBLISHED === "true";
const tmp = process.env.PROPOSAL_FILE_PATH + ".tmp";
fs.writeFileSync(tmp, JSON.stringify(p, null, 2));
fs.renameSync(tmp, process.env.PROPOSAL_FILE_PATH);
'

View File

@ -1,272 +0,0 @@
#!/usr/bin/env bash
# gstack-distill-free-text — Layer 8 "dream cycle" batch distiller.
#
# Reads auq-other free-text events from this project's question-log.jsonl,
# sends them to Claude via the Anthropic SDK, and writes structured proposals
# the user can review via /plan-tune distill. Proposals require explicit
# user Y before applying — never autonomous (Codex #15 trust boundary).
#
# Usage:
# gstack-distill-free-text # sync, prompts at end
# gstack-distill-free-text --background # spawn detached; results
# # surface on next /plan-tune
# gstack-distill-free-text --dry-run # show prompt, no API call
# gstack-distill-free-text --status # show last-run stats
#
# No rate cap — the natural rate of free-text events (rare; user has to type
# "Other" then content) bounds this loop already. Each Haiku call is ~$0.01,
# so even a runaway at one-per-minute would be ~$14/day worst case. The
# cumulative cost log at $GSTACK_STATE_ROOT/distill-cost.jsonl gives full
# auditability via --status when you want it.
# Per D6: Anthropic SDK direct call, fail-loud on missing ANTHROPIC_API_KEY.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}"
eval "$("$SCRIPT_DIR/gstack-slug" 2>/dev/null || true)"
SLUG="${SLUG:-unknown}"
PROJECT_DIR="$GSTACK_HOME/projects/$SLUG"
LOG_FILE="$PROJECT_DIR/question-log.jsonl"
PROPOSAL_FILE="$PROJECT_DIR/distillation-proposals.json"
COST_LOG="$GSTACK_HOME/distill-cost.jsonl"
mkdir -p "$PROJECT_DIR"
MODE="sync"
case "${1:-}" in
--background) MODE="background" ;;
--dry-run) MODE="dry-run" ;;
--status) MODE="status" ;;
--help|-h)
sed -n '1,/^set -euo/p' "$0" | sed 's|^# \?||'
exit 0
;;
'') ;;
*) echo "unknown arg: $1" >&2; exit 1 ;;
esac
# --- Status subcommand --------------------------------------------------
if [ "$MODE" = "status" ]; then
COST_LOG_PATH="$COST_LOG" SLUG_PATH="$SLUG" bun -e '
const fs = require("fs");
const slug = process.env.SLUG_PATH;
const path = process.env.COST_LOG_PATH;
if (!fs.existsSync(path)) { console.log("no distill runs yet"); process.exit(0); }
const lines = fs.readFileSync(path, "utf-8").trim().split("\n").filter(Boolean);
const mine = lines.map((l) => JSON.parse(l)).filter((e) => e.slug === slug);
if (mine.length === 0) { console.log("no distill runs yet for slug=" + slug); process.exit(0); }
const totalUsd = mine.reduce((a, e) => a + (e.cost_usd_est || 0), 0);
const todayIso = new Date().toISOString().slice(0, 10);
const today = mine.filter((e) => (e.ts || "").startsWith(todayIso));
const todayUsd = today.reduce((a, e) => a + (e.cost_usd_est || 0), 0);
console.log("RUNS: " + mine.length);
console.log("TODAY: " + today.length + " run(s), $" + todayUsd.toFixed(4));
console.log("ESTIMATED_TOTAL_USD: $" + totalUsd.toFixed(4));
const last = mine[mine.length - 1];
console.log("LAST_RUN: " + (last.ts || "?") + " | " + (last.proposals_count || 0) + " proposals");
'
exit 0
fi
# --- Background mode: detach + invoke self synchronously ---------------
if [ "$MODE" = "background" ]; then
nohup "$0" >/dev/null 2>&1 &
echo "DISTILL_SPAWNED: pid=$!"
exit 0
fi
# No rate cap. Natural input rate (free-text events are rare) + Haiku price
# (~$0.01/run) keep this bounded. Use --status to audit spend.
# --- Gather unprocessed auq-other events from this project -------------
if [ ! -f "$LOG_FILE" ]; then
echo "NO_LOG: no question-log.jsonl in $PROJECT_DIR"
exit 0
fi
EVENTS_JSON=$(LOG_FILE_PATH="$LOG_FILE" bun -e '
const fs = require("fs");
const lines = fs.readFileSync(process.env.LOG_FILE_PATH, "utf-8").trim().split("\n").filter(Boolean);
const out = [];
for (const l of lines) {
try {
const e = JSON.parse(l);
if (e.source === "auq-other" && !e.distilled_at && e.free_text) {
out.push({
ts: e.ts,
question_id: e.question_id,
question_summary: e.question_summary,
free_text: e.free_text,
session_id: e.session_id,
});
}
} catch {}
}
process.stdout.write(JSON.stringify(out));
')
EVENT_COUNT=$(printf '%s' "$EVENTS_JSON" | bun -e 'const a = JSON.parse(await Bun.stdin.text()); console.log(a.length);')
if [ "$EVENT_COUNT" -eq 0 ]; then
echo "NO_FREE_TEXT: nothing to distill"
exit 0
fi
# --- Build distill prompt ---------------------------------------------
# Heredoc into temp file (avoids $(cat <<'PROMPT'...) which choked the
# bash parser on apostrophes elsewhere in the script).
DISTILL_PROMPT_FILE=$(mktemp)
trap 'rm -f "$DISTILL_PROMPT_FILE"' EXIT
cat > "$DISTILL_PROMPT_FILE" <<'PROMPT'
You are gstack dream-cycle distiller. Below are free-text responses the
user typed into AskUserQuestion prompts (option "Other") across recent gstack
sessions. For each response, extract structured signal that should update the
user plan-tune profile or preferences.
Return strict JSON with this shape:
{
"proposals": [
{
"kind": "preference" | "declared-nudge" | "memory-nugget",
"confidence": 0.0-1.0,
"source_quotes": ["<verbatim quote 1>", "<verbatim quote 2>"],
"question_id": "<id>",
"preference": "never-ask" | "always-ask" | "ask-only-for-one-way",
"dimension": "scope_appetite | risk_tolerance | detail_preference | autonomy | architecture_care",
"direction": "up | down",
"magnitude": "small | medium | large",
"rationale": "<one sentence>",
"nugget": "<one-line memory>",
"applies_to_signal_keys": ["scope-appetite", "..."]
}
]
}
Rules:
- Reject any proposal where confidence < 0.7.
- Quote VERBATIM from the user free_text. Never paraphrase a source quote.
- A single user response may produce multiple proposals.
- If nothing meaningful to extract, return {"proposals": []}.
- No commentary outside the JSON.
PROMPT
DISTILL_PROMPT=$(cat "$DISTILL_PROMPT_FILE")
# --- Dry-run: emit prompt + events, exit ------------------------------
if [ "$MODE" = "dry-run" ]; then
echo "=== DISTILL PROMPT ==="
echo "$DISTILL_PROMPT"
echo
echo "=== EVENTS ($EVENT_COUNT) ==="
echo "$EVENTS_JSON" | bun -e 'console.log(JSON.stringify(JSON.parse(await Bun.stdin.text()), null, 2));'
exit 0
fi
# --- SDK call: fail-loud on missing key -------------------------------
if [ -z "${ANTHROPIC_API_KEY:-}" ]; then
cat <<EOF >&2
gstack-distill-free-text: ANTHROPIC_API_KEY not set.
Dream-cycle distillation needs an API key for the SDK call. Set
ANTHROPIC_API_KEY in your environment, or run with --dry-run to see
what would be sent without actually calling.
Note: this is a separate billing/auth surface from your interactive
Claude Code session (per Codex correction in D6).
EOF
exit 1
fi
# Run the SDK call in bun. Emits JSON: {proposals_count, cost_usd_est}.
RESULT=$(EVENTS_JSON="$EVENTS_JSON" DISTILL_PROMPT="$DISTILL_PROMPT" \
PROPOSAL_FILE_PATH="$PROPOSAL_FILE" LOG_FILE_PATH="$LOG_FILE" \
ANTHROPIC_API_KEY="$ANTHROPIC_API_KEY" \
bun --cwd "$ROOT_DIR" -e '
const fs = require("fs");
const Anthropic = require("@anthropic-ai/sdk").default;
const client = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY });
const events = JSON.parse(process.env.EVENTS_JSON);
const prompt = process.env.DISTILL_PROMPT + "\n\nFREE-TEXT RESPONSES (JSON array):\n" + JSON.stringify(events, null, 2);
// Pricing (Haiku 4.5 — cheap, fast, sufficient for structured extraction).
// Per token, USD: input $0.001/1k = 1e-6, output $0.005/1k = 5e-6.
const INPUT_PER_TOKEN = 1e-6;
const OUTPUT_PER_TOKEN = 5e-6;
const resp = await client.messages.create({
model: "claude-haiku-4-5-20251001",
max_tokens: 4096,
messages: [{ role: "user", content: prompt }],
});
const text = resp.content.map((b) => (b.type === "text" ? b.text : "")).join("");
// Strip optional fenced code blocks the model may wrap JSON in.
const stripped = text.replace(/^```(?:json)?\s*/i, "").replace(/```\s*$/i, "").trim();
let parsed;
try { parsed = JSON.parse(stripped); } catch (e) {
process.stderr.write("DISTILL: model returned non-JSON: " + text.slice(0, 200) + "\n");
process.exit(1);
}
const proposals = Array.isArray(parsed.proposals) ? parsed.proposals : [];
// Keep only proposals with confidence >= 0.7 (model is told this rule;
// double-check in case it slipped).
const filtered = proposals.filter((p) => typeof p.confidence === "number" && p.confidence >= 0.7);
// Write proposals file (overwrite — only the latest run is reviewable).
fs.writeFileSync(process.env.PROPOSAL_FILE_PATH, JSON.stringify({
generated_at: new Date().toISOString(),
source_event_count: events.length,
proposals: filtered,
}, null, 2));
// Mark source events as distilled_at so they do not re-propose.
// Update question-log.jsonl in place: read all, rewrite with distilled_at
// set on the matching events. Match by ts + question_id.
const logPath = process.env.LOG_FILE_PATH;
const distilledAt = new Date().toISOString();
const matchKeys = new Set(events.map((e) => (e.ts || "") + "::" + (e.question_id || "")));
const lines = fs.readFileSync(logPath, "utf-8").split("\n");
const out = [];
for (const ln of lines) {
if (!ln.trim()) { out.push(ln); continue; }
try {
const e = JSON.parse(ln);
const key = (e.ts || "") + "::" + (e.question_id || "");
if (matchKeys.has(key)) {
e.distilled_at = distilledAt;
out.push(JSON.stringify(e));
} else {
out.push(ln);
}
} catch { out.push(ln); }
}
fs.writeFileSync(logPath, out.join("\n"));
// Cost estimate from usage tokens.
const usage = resp.usage || {};
const inTok = usage.input_tokens || 0;
const outTok = usage.output_tokens || 0;
const cost = inTok * INPUT_PER_TOKEN + outTok * OUTPUT_PER_TOKEN;
process.stdout.write(JSON.stringify({
proposals_count: filtered.length,
rejected_low_confidence: proposals.length - filtered.length,
input_tokens: inTok,
output_tokens: outTok,
cost_usd_est: cost,
}));
')
# Append cost log line.
TS=$(date -u +%Y-%m-%dT%H:%M:%SZ)
echo "{\"ts\":\"$TS\",\"slug\":\"$SLUG\",$(echo "$RESULT" | sed 's/^{//; s/}$//')}" >> "$COST_LOG"
echo "DISTILL_COMPLETE:"
echo " proposals_file: $PROPOSAL_FILE"
echo " $RESULT"

View File

@ -18,8 +18,7 @@
* "gstack_brain_sync_mode": "off"|"artifacts-only"|"full",
* "gstack_brain_git": true|false,
* "gstack_artifacts_remote": "https://..." | "",
* "gbrain_local_status": "ok"|"no-cli"|"missing-config"|"broken-config"|"broken-db",
* "gbrain_pooler_mode": "transaction"|"session"|null
* "gbrain_local_status": "ok"|"no-cli"|"missing-config"|"broken-config"|"broken-db"
* }
*
* Backward compatibility (per plan codex #5): the 9 pre-existing fields stay
@ -43,7 +42,6 @@ import {
resolveGbrainBin,
readGbrainVersion,
} from "../lib/gbrain-local-status";
import { isTransactionModePooler } from "../lib/gbrain-exec";
const STATE_DIR = process.env.GSTACK_HOME || join(userHome(), ".gstack");
const SCRIPT_DIR = __dirname;
@ -100,17 +98,6 @@ function detectConfig(): { exists: boolean; engine: "pglite" | "postgres" | null
return { exists: true, engine: null };
}
// --- pooler mode detection (#1435) ---
//
// Reads DATABASE_URL from ~/.gbrain/config.json and checks whether it targets
// a PgBouncer transaction-mode pooler (port 6543). Surfaced so /sync-gbrain
// and /setup-gbrain can advise users when search may require GBRAIN_PREPARE.
function detectPoolerMode(): "transaction" | "session" | "unknown" | null {
const parsed = tryReadJSON(GBRAIN_CONFIG) as { database_url?: string } | null;
if (!parsed?.database_url) return null;
return isTransactionModePooler(parsed.database_url) ? "transaction" : "session";
}
// --- gbrain doctor health (any nonzero exit or non-"ok"/"warnings" status → false) ---
//
// Uses --fast to avoid hanging on a dead DB. Per the local-status classifier
@ -228,7 +215,6 @@ function main(): void {
gstack_brain_git: detectBrainGit(),
gstack_artifacts_remote: detectArtifactsRemote(),
gbrain_local_status: localEngineStatus({ noCache }),
gbrain_pooler_mode: detectPoolerMode(),
};
process.stdout.write(JSON.stringify(out, null, 2) + "\n");

View File

@ -19,14 +19,9 @@
# - git
# - network reachability to https://github.com
#
# gbrain installs at the latest default-branch HEAD by default — the hard pin
# was removed in #1744 (it had drifted ~23 versions behind). Pass
# --pinned-commit <sha> to install a specific commit for reproducibility. A
# minimum-version floor (MIN_GBRAIN_VERSION) hard-fails the install when the
# resulting gbrain is too old for gstack's sync integration, and a fast
# `gbrain doctor` self-test hard-fails a broken install when gbrain is already
# configured. This keeps the version gate that the pin used to provide without
# freezing users 23 releases behind.
# The pinned commit is declared here rather than resolved dynamically so
# upgrades are explicit and reviewable. Update PINNED_COMMIT when gstack
# verifies compatibility with a new gbrain release.
#
# Env:
# GBRAIN_INSTALL_DIR — override default install path (~/gbrain)
@ -38,14 +33,8 @@
set -euo pipefail
# --- defaults ---
# No version pin by default — install the latest default-branch HEAD (#1744).
# --pinned-commit <sha> overrides for reproducibility.
PINNED_COMMIT=""
PINNED_TAG=""
# Minimum gbrain version gstack's integration is known to work with. The
# `sources list --json` wrapped-object shape + federated sources landed by 0.20;
# older predates the surface gstack drives. Hard-fail below this floor (#1744).
MIN_GBRAIN_VERSION="0.20.0"
PINNED_COMMIT="08b3698e90532b7b66c445e6b1d8cdfe71822802" # gbrain v0.18.2
PINNED_TAG="v0.18.2"
GBRAIN_REPO_URL="https://github.com/garrytan/gbrain.git"
DEFAULT_INSTALL_DIR="${GBRAIN_INSTALL_DIR:-$HOME/gbrain}"
INSTALL_DIR="$DEFAULT_INSTALL_DIR"
@ -124,7 +113,7 @@ elif [ -n "$DETECTED_CLONE" ]; then
else
# Fresh clone path.
if $DRY_RUN; then
log "DRY RUN: would clone $GBRAIN_REPO_URL ${PINNED_COMMIT:+@ $PINNED_COMMIT }→ $INSTALL_DIR (latest HEAD unless --pinned-commit)"
log "DRY RUN: would clone $GBRAIN_REPO_URL @ $PINNED_COMMIT → $INSTALL_DIR"
exit 0
fi
if [ -d "$INSTALL_DIR" ]; then
@ -132,12 +121,8 @@ else
fi
log "cloning $GBRAIN_REPO_URL → $INSTALL_DIR"
git clone --quiet "$GBRAIN_REPO_URL" "$INSTALL_DIR"
if [ -n "$PINNED_COMMIT" ]; then
( cd "$INSTALL_DIR" && git checkout --quiet "$PINNED_COMMIT" )
log "checked out pinned commit $PINNED_COMMIT${PINNED_TAG:+ ($PINNED_TAG)}"
else
log "installed latest gbrain (default-branch HEAD)"
fi
( cd "$INSTALL_DIR" && git checkout --quiet "$PINNED_COMMIT" )
log "pinned to $PINNED_COMMIT${PINNED_TAG:+ ($PINNED_TAG)}"
fi
if $DRY_RUN; then
@ -210,44 +195,6 @@ fi
log "installed gbrain $actual_version from $INSTALL_DIR"
# --- minimum-version floor (#1744) ---
# Unpinning means new installs track gbrain HEAD. Hard-fail if the resulting
# version is below the floor gstack's sync integration needs — same exit-3 posture
# as the PATH-shadow / version-mismatch failures above. A warning here is exactly
# how the data-loss class slipped through, so this gate fails closed.
version_lt() {
# 0 (true) when $1 < $2 by version sort; equal versions are NOT less-than.
[ "$1" = "$2" ] && return 1
[ "$(printf '%s\n%s\n' "$1" "$2" | sort -V | head -1)" = "$1" ]
}
if version_lt "$actual_norm" "$MIN_GBRAIN_VERSION"; then
echo "" >&2
echo "gstack-gbrain-install: gbrain $actual_version is below the minimum gstack-tested version ($MIN_GBRAIN_VERSION)." >&2
echo " gstack's sync integration needs the v0.20+ source/list surface." >&2
echo " Fix: update the gbrain clone at $INSTALL_DIR to a newer release (git pull), then" >&2
echo " re-run /setup-gbrain. Or pass --pinned-commit <sha> to install a specific newer commit." >&2
echo "" >&2
exit 3
fi
# --- functional self-test when gbrain is already configured (#1744) ---
# When a brain config exists (re-install / detected clone), run a fast doctor as
# a hard gate so a broken gbrain is caught at setup, not at data-loss time.
# Pre-init installs skip this (config not written yet); the full
# `/sync-gbrain --dry-run` self-test runs from /setup-gbrain after `gbrain init`.
_GBRAIN_HOME_CHECK="${GBRAIN_HOME:-$HOME/.gbrain}"
if [ -f "$_GBRAIN_HOME_CHECK/config.json" ]; then
if ! gbrain doctor --fast >/dev/null 2>&1; then
echo "" >&2
echo "gstack-gbrain-install: gbrain $actual_version installed but 'gbrain doctor --fast' failed." >&2
echo " Refusing to leave a broken gbrain in place. Run 'gbrain doctor' to see what's wrong," >&2
echo " fix it, then re-run /setup-gbrain." >&2
echo "" >&2
exit 3
fi
log "gbrain doctor --fast passed"
fi
# v1.40.0.0 post-install validation (T6 / codex review #19): --ignore-scripts
# may skip artifacts gbrain needs at runtime, especially on Windows
# MSYS/MINGW where we DID pass --ignore-scripts. `gbrain --version` above
@ -270,13 +217,4 @@ if ! gbrain sources --help >/dev/null 2>&1; then
fi
echo ""
if [ -n "${VOYAGE_API_KEY:-}" ]; then
echo "Next: gbrain init --pglite --embedding-model voyage:voyage-code-3 --embedding-dimensions 1024"
echo " (or run /setup-gbrain for the full setup flow)"
else
echo "Next: gbrain init --pglite (or run /setup-gbrain for the full setup flow)"
echo ""
echo "Tip: set VOYAGE_API_KEY before init to use voyage-code-3 (best embedding"
echo "model for code retrieval on Voyage). Without it, gbrain falls back to its"
echo "auto-selected provider (OpenAI when OPENAI_API_KEY is set, etc.)."
fi
echo "Next: gbrain init --pglite (or run /setup-gbrain for the full setup flow)"

View File

@ -27,22 +27,8 @@
# restore), D16 (pooler URL paste hygiene with redacted preview).
# _gstack_gbrain_validate_varname <name> — returns 0 if usable, 2 otherwise.
# `local LC_ALL=C` is load-bearing twice over:
# 1. In many macOS shells the default locale (e.g. en_US.UTF-8) makes `case`
# glob brackets like `[A-Z]` match lowercase letters too. Without the
# LC_ALL=C pin, names like `lower-case` pass validation and then trip
# `printf -v "$varname"` and `export "$varname"` with "not a valid
# identifier" errors the caller can't easily distinguish from other
# failures.
# 2. `local` is required because this file is documented as a sourced helper
# (see header), so a bare `LC_ALL=C` would mutate the caller's locale for
# the rest of the process — silently affecting downstream `sort`, `tr`,
# and any locale-aware glob in the same shell.
# Together they give ASCII-only bracket semantics on both macOS and Linux
# (matching the documented `[A-Z_][A-Z0-9_]*` contract) without leaking.
_gstack_gbrain_validate_varname() {
local name="$1"
local LC_ALL=C
case "$name" in
[A-Z_][A-Z0-9_]*) return 0 ;;
*) return 2 ;;

View File

@ -339,7 +339,7 @@ cmd_pooler_url() {
# Prefer the singular Session Pooler config when Supabase returns an
# array (response shape can vary by project state). Fall back to the
# first PRIMARY entry if no "session" pool_mode is present.
local db_user db_host db_port db_name pool_mode
local db_user db_host db_port db_name
local first_or_session
if printf '%s' "$resp" | jq -e 'type == "array"' >/dev/null 2>&1; then
first_or_session=$(printf '%s' "$resp" | jq '[.[] | select(.pool_mode == "session")][0] // .[0]')
@ -351,27 +351,11 @@ cmd_pooler_url() {
db_host=$(printf '%s' "$first_or_session" | jq -r '.db_host // empty')
db_port=$(printf '%s' "$first_or_session" | jq -r '.db_port // empty')
db_name=$(printf '%s' "$first_or_session" | jq -r '.db_name // empty')
pool_mode=$(printf '%s' "$first_or_session" | jq -r '.pool_mode // empty')
if [ -z "$db_user" ] || [ -z "$db_host" ] || [ -z "$db_port" ] || [ -z "$db_name" ]; then
die "pooler-url: missing pooler config fields (db_user/db_host/db_port/db_name); re-poll or check project state"
fi
# Issue #1301: New Supabase projects' Management API returns a single
# transaction-mode pooler at port 6543, but the shared pooler tenant
# for fresh projects only listens on the session port 5432. Trusting
# db_port verbatim makes `gbrain init` hang to TCP timeout (transaction
# port unreachable) before falling into "tenant not found"-style errors
# that look like auth bugs. Rewrite transaction/6543 -> session/5432.
# Override with GSTACK_SUPABASE_TRUST_API_PORT=1 if a future API version
# starts returning a working transaction port and this rewrite is wrong.
if [ "${GSTACK_SUPABASE_TRUST_API_PORT:-0}" != "1" ] \
&& [ "$pool_mode" = "transaction" ] && [ "$db_port" = "6543" ]; then
echo "pooler-url: API returned transaction pooler (port 6543); shared pooler for new projects listens on session port 5432 — rewriting (set GSTACK_SUPABASE_TRUST_API_PORT=1 to disable)" >&2
db_port=5432
pool_mode="session"
fi
local url="postgresql://${db_user}:${DB_PASS}@${db_host}:${db_port}/${db_name}"
if $json_mode; then

View File

@ -37,10 +37,9 @@ import { createHash } from "crypto";
import "../lib/conductor-env-shim";
import { detectEngineTier, withErrorContext, canonicalizeRemote } from "../lib/gstack-memory-helpers";
import { ensureSourceRegistered, sourcePageCount, parseSourcesList } from "../lib/gbrain-sources";
import { detectAutopilot, decideSourceRemove, decideCodeSync } from "../lib/gbrain-guards";
import { ensureSourceRegistered, sourcePageCount } from "../lib/gbrain-sources";
import { localEngineStatus, type LocalEngineStatus } from "../lib/gbrain-local-status";
import { buildGbrainEnv, spawnGbrain, execGbrainJson, NEEDS_SHELL_ON_WINDOWS } from "../lib/gbrain-exec";
import { buildGbrainEnv, spawnGbrain, execGbrainJson } from "../lib/gbrain-exec";
// ── Types ──────────────────────────────────────────────────────────────────
@ -53,8 +52,6 @@ interface CliArgs {
noMemory: boolean;
noBrainSync: boolean;
codeOnly: boolean;
/** #1734: opt-in to sync a URL-managed source whose code walk may auto-reclone. */
allowReclone: boolean;
}
interface CodeStageDetail {
@ -62,7 +59,7 @@ interface CodeStageDetail {
source_path?: string;
page_count?: number | null;
last_imported?: string;
status?: "ok" | "skipped" | "failed" | "refused-autopilot" | "refused-reclone";
status?: "ok" | "skipped" | "failed";
}
interface StageResult {
@ -83,115 +80,6 @@ const STATE_PATH = join(GSTACK_HOME, ".gbrain-sync-state.json");
const LOCK_PATH = join(GSTACK_HOME, ".sync-gbrain.lock");
const STALE_LOCK_MS = 5 * 60 * 1000;
// Default 35-minute timeout for code-walk + memory-ingest stages. Override via
// GSTACK_SYNC_CODE_TIMEOUT_MS / GSTACK_SYNC_MEMORY_TIMEOUT_MS. Bounds-checked
// in resolveStageTimeoutMs below so wildly-low values don't make resume
// useless and wildly-high values don't mask config typos. See #1611.
const DEFAULT_STAGE_TIMEOUT_MS = 35 * 60 * 1000; // 2_100_000ms = 35min
const MIN_STAGE_TIMEOUT_MS = 60_000; // 1 minute floor
const MAX_STAGE_TIMEOUT_MS = 86_400_000; // 24 hour ceiling
/**
* Parse a stage-timeout env value with bounds validation. Returns the bounded
* value or the default with a stderr warning if the env was malformed or
* out-of-range. Exported for the regression test.
*/
export function resolveStageTimeoutMs(
envValue: string | undefined,
envName: string,
): number {
if (envValue === undefined || envValue === "") return DEFAULT_STAGE_TIMEOUT_MS;
const n = Number.parseInt(envValue, 10);
if (!Number.isFinite(n) || Number.isNaN(n) || n <= 0) {
console.warn(
`[sync] ${envName}="${envValue}" is not a positive integer; falling back to ${DEFAULT_STAGE_TIMEOUT_MS}ms`,
);
return DEFAULT_STAGE_TIMEOUT_MS;
}
if (n < MIN_STAGE_TIMEOUT_MS) {
console.warn(
`[sync] ${envName}=${n} is below the ${MIN_STAGE_TIMEOUT_MS}ms (1min) floor; falling back to ${DEFAULT_STAGE_TIMEOUT_MS}ms`,
);
return DEFAULT_STAGE_TIMEOUT_MS;
}
if (n > MAX_STAGE_TIMEOUT_MS) {
console.warn(
`[sync] ${envName}=${n} is above the ${MAX_STAGE_TIMEOUT_MS}ms (24h) ceiling; falling back to ${DEFAULT_STAGE_TIMEOUT_MS}ms`,
);
return DEFAULT_STAGE_TIMEOUT_MS;
}
return n;
}
/**
* gbrain writes ~/.gbrain/import-checkpoint.json on every import run. If a
* previous /sync-gbrain hit the timeout (SIGTERM = exit 143), the checkpoint
* + its staging dir survive on disk. Detect both and let gbrain resume from
* processedIndex+1 on the next run. If the staging dir is missing/empty/
* unreadable, fall through to a fresh restage with a one-line warning so the
* user sees we noticed. See #1611 + plan D1/C1.
*/
interface GbrainCheckpoint {
dir?: string;
totalFiles?: number;
processedIndex?: number;
completedFiles?: number;
timestamp?: string;
}
export function readGbrainCheckpoint(): GbrainCheckpoint | null {
// Read HOME from env so tests can redirect via process.env.HOME = ...
// (Node/Bun's os.homedir() caches at process start and ignores later
// mutations.)
const home = process.env.HOME || homedir();
const cpPath = join(home, ".gbrain", "import-checkpoint.json");
if (!existsSync(cpPath)) return null;
try {
const raw = readFileSync(cpPath, "utf-8");
const parsed = JSON.parse(raw);
if (!parsed || typeof parsed !== "object") return null;
return parsed as GbrainCheckpoint;
} catch {
// Corrupt JSON — treat as no checkpoint and fall through to fresh restage.
return null;
}
}
export type ResumeVerdict =
| { kind: "no-checkpoint" }
| { kind: "resume"; stagingDir: string; processedIndex: number; totalFiles: number }
| { kind: "stale-staging-missing"; stagingDir: string };
/**
* Decide whether the next memory-ingest run should resume from gbrain's
* checkpoint or restage from scratch.
* - no checkpoint run a fresh ingest pass
* - checkpoint + staging ok resume (gbrain picks up at processedIndex+1)
* - checkpoint + staging gone warn, fall through to fresh restage
*/
export function decideResume(): ResumeVerdict {
const cp = readGbrainCheckpoint();
if (!cp || !cp.dir) return { kind: "no-checkpoint" };
const stagingDir = cp.dir;
if (!existsSync(stagingDir)) {
return { kind: "stale-staging-missing", stagingDir };
}
// Treat "non-empty" as the safe-to-resume signal. statSync on a missing
// file throws; we already handled missing above so this is dir-level shape.
try {
const st = statSync(stagingDir);
if (!st.isDirectory()) return { kind: "stale-staging-missing", stagingDir };
} catch {
return { kind: "stale-staging-missing", stagingDir };
}
return {
kind: "resume",
stagingDir,
processedIndex: cp.processedIndex ?? 0,
totalFiles: cp.totalFiles ?? 0,
};
}
// ── CLI ────────────────────────────────────────────────────────────────────
function printUsage(): void {
@ -208,8 +96,6 @@ Options:
--no-memory Skip the gstack-memory-ingest stage (transcripts + artifacts).
--no-brain-sync Skip the gstack-brain-sync git pipeline stage.
--code-only Only run the code-import stage (alias for --no-memory --no-brain-sync).
--allow-reclone Permit the code walk for URL-managed sources (remote_url set)
even though gbrain may auto-reclone the working tree (#1734).
--help This text.
Stages run in order: code memory ingest curated git push.
@ -225,7 +111,6 @@ function parseArgs(): CliArgs {
let noMemory = false;
let noBrainSync = false;
let codeOnly = false;
let allowReclone = false;
for (let i = 0; i < args.length; i++) {
const a = args[i];
@ -237,7 +122,6 @@ function parseArgs(): CliArgs {
case "--no-code": noCode = true; break;
case "--no-memory": noMemory = true; break;
case "--no-brain-sync": noBrainSync = true; break;
case "--allow-reclone": allowReclone = true; break;
case "--code-only":
codeOnly = true;
noMemory = true;
@ -254,7 +138,7 @@ function parseArgs(): CliArgs {
}
}
return { mode, quiet, noCode, noMemory, noBrainSync, codeOnly, allowReclone };
return { mode, quiet, noCode, noMemory, noBrainSync, codeOnly };
}
// ── Helpers ────────────────────────────────────────────────────────────────
@ -403,18 +287,14 @@ function gbrainSupportsSourcesRename(env?: NodeJS.ProcessEnv): boolean {
* `env` is the environment passed to the spawned `gbrain` process; defaults
* to `process.env`. Tests inject a PATH that points at a gbrain shim so the
* helper can be exercised without a real gbrain CLI.
*
* Shape note: `gbrain sources list --json` returns `{sources: [...]}` (v0.20+);
* older versions returned a flat array. Accept both for forward/backward compat
* (mirrors `probeSource`/`sourcePageCount` in lib/gbrain-sources.ts).
*/
export function sourceLocalPath(sourceId: string, env?: NodeJS.ProcessEnv): string | null {
const raw = execGbrainJson<unknown>(
const list = execGbrainJson<Array<{ id: string; local_path?: string }>>(
["sources", "list", "--json"],
{ baseEnv: env },
);
if (!raw) return null;
const found = parseSourcesList(raw).find((s) => s.id === sourceId);
if (!list) return null;
const found = list.find((s) => s.id === sourceId);
return found?.local_path ?? null;
}
@ -473,50 +353,20 @@ export function planHostnameFoldMigration(
return { kind: "pending-cleanup", oldId: legacyPathHashId };
}
export interface GuardedRemoveResult {
removed: boolean;
/** True when a guard refused the remove (autopilot active or unsafe source). */
skipped: boolean;
reason: string;
}
/**
* #1734: run `gbrain sources remove <id> --confirm-destructive` only behind the
* data-loss guards. Checked immediately before the destructive op (E8: as late
* as possible) so the autopilot window is as small as we can make it without a
* gbrain-side lease. Refuses when autopilot is active or when the source is
* user-managed and gbrain can't keep its storage. Pure side-effect helper; the
* caller decides whether a skip is fatal (it never is today removes are
* best-effort cleanup).
*/
export function safeSourcesRemove(sourceId: string, env?: NodeJS.ProcessEnv): GuardedRemoveResult {
const ap = detectAutopilot(env);
if (ap.active) {
return {
removed: false,
skipped: true,
reason: `autopilot active (${ap.signal}); refusing destructive remove of ${sourceId}. ` +
`Stop autopilot, then re-run /sync-gbrain.`,
};
}
const decision = decideSourceRemove(sourceId, env);
if (!decision.allow) {
return { removed: false, skipped: true, reason: decision.reason };
}
const r = spawnGbrain(
["sources", "remove", sourceId, "--confirm-destructive", ...decision.extraArgs],
{ baseEnv: env },
);
return { removed: r.status === 0, skipped: false, reason: decision.reason };
}
/**
* Remove an orphaned source. Called only after new-source sync verifies pages
* exist, so the old source is provably redundant before deletion. Routed through
* safeSourcesRemove for the #1734 guards.
* exist, so the old source is provably redundant before deletion.
*
* Flag note: existing call sites used `--confirm-destructive` here and
* `--yes` in `lib/gbrain-sources.ts` gbrain 0.35.0.0 accepts neither
* deterministically (the subcommand surface help is generic). We pass
* `--confirm-destructive` to match the existing call site convention; the
* flag-helper centralization in commit 4 (lib/gbrain-exec.ts) will resolve
* the inconsistency across the codebase.
*/
export function removeOrphanedSource(oldId: string, env?: NodeJS.ProcessEnv): boolean {
return safeSourcesRemove(oldId, env).removed;
const r = spawnGbrain(["sources", "remove", oldId, "--confirm-destructive"], { baseEnv: env });
return r.status === 0;
}
/**
@ -695,12 +545,13 @@ async function runCodeImport(args: CliArgs): Promise<StageResult> {
const legacyId = deriveLegacyCodeSourceId(root);
let legacyRemoved = false;
if (legacyId !== sourceId) {
// #1734: route through the data-loss guards (autopilot + source-safety).
const rm = safeSourcesRemove(legacyId, gbrainEnv);
if (rm.skipped && !args.quiet) {
console.error(`[sync:code] legacy-source cleanup skipped: ${rm.reason}`);
}
if (rm.removed) legacyRemoved = true;
const rm = spawnGbrain(["sources", "remove", legacyId, "--confirm-destructive"], {
timeout: 30_000,
baseEnv: gbrainEnv,
});
// Treat absent-source as success (clean state). gbrain emits "not found" on
// missing id; treat any non-zero exit without "not found" as a soft fail.
if (rm.status === 0) legacyRemoved = true;
}
// Step 0b: Hostname-fold migration (#1414).
@ -738,80 +589,28 @@ async function runCodeImport(args: CliArgs): Promise<StageResult> {
};
}
// Step 2: Always run the page-creating file walk first, then (for --full)
// a full re-embed.
//
// `gbrain reindex-code` only RE-EMBEDS pages that already exist; it never
// walks the filesystem. On a freshly-registered source (0 pages) a --full
// run that called reindex-code alone found nothing ("No code pages to
// reindex"), finished in ~1s, and left the code index permanently empty
// while still reporting OK. The page-creating walk is `sync --strategy
// code`, so --full must run it FIRST, then reindex-code, to honor the
// documented "full walk + reindex" contract for both fresh and populated
// sources.
const codeTimeoutMs = resolveStageTimeoutMs(
process.env.GSTACK_SYNC_CODE_TIMEOUT_MS,
"GSTACK_SYNC_CODE_TIMEOUT_MS",
);
// Step 2: Run sync or reindex.
const syncArgs = args.mode === "full"
? ["reindex-code", "--source", sourceId, "--yes"]
: ["sync", "--strategy", "code", "--source", sourceId];
// #1734 guards, checked immediately before the destructive walk (E8):
// - autopilot active → refuse (the race that wiped a working tree).
// - URL-managed source → the walk can auto-reclone (rm-rf); require
// --allow-reclone. Both surface a visible reason and fail the stage so the
// verdict shows ERR rather than silently skipping protection.
const apBeforeWalk = detectAutopilot(gbrainEnv);
if (apBeforeWalk.active) {
return {
name: "code", ran: true, ok: false, duration_ms: Date.now() - t0,
summary: `refused: gbrain autopilot active (${apBeforeWalk.signal}). Stop autopilot, then re-run /sync-gbrain.`,
detail: { source_id: sourceId, source_path: root, status: "refused-autopilot" },
};
}
const reclone = decideCodeSync(sourceId, gbrainEnv, args.allowReclone);
if (!reclone.allow) {
return {
name: "code", ran: true, ok: false, duration_ms: Date.now() - t0,
summary: `refused: ${reclone.reason}`,
detail: { source_id: sourceId, source_path: root, status: "refused-reclone" },
};
}
const walkResult = spawnGbrain(["sync", "--strategy", "code", "--source", sourceId], {
const syncResult = spawnGbrain(syncArgs, {
stdio: args.quiet ? ["ignore", "ignore", "ignore"] : ["ignore", "inherit", "inherit"],
timeout: codeTimeoutMs,
timeout: 35 * 60 * 1000,
baseEnv: gbrainEnv,
});
if (walkResult.status !== 0) {
if (syncResult.status !== 0) {
return {
name: "code",
ran: true,
ok: false,
duration_ms: Date.now() - t0,
summary: `gbrain sync --strategy code --source ${sourceId} exited ${walkResult.status}`,
summary: `gbrain ${syncArgs.join(" ")} exited ${syncResult.status}`,
detail: { source_id: sourceId, source_path: root, status: "failed" },
};
}
if (args.mode === "full") {
const reindexResult = spawnGbrain(["reindex-code", "--source", sourceId, "--yes"], {
stdio: args.quiet ? ["ignore", "ignore", "ignore"] : ["ignore", "inherit", "inherit"],
timeout: codeTimeoutMs,
baseEnv: gbrainEnv,
});
if (reindexResult.status !== 0) {
return {
name: "code",
ran: true,
ok: false,
duration_ms: Date.now() - t0,
summary: `gbrain reindex-code --source ${sourceId} exited ${reindexResult.status}`,
detail: { source_id: sourceId, source_path: root, status: "failed" },
};
}
}
// Step 3: Pin this worktree's CWD to the source via .gbrain-source. Subsequent
// gbrain code-def / code-refs / code-callers calls from anywhere under <root>
// route to this source by default — no --source flag needed.
@ -939,25 +738,6 @@ function runMemoryIngest(args: CliArgs): StageResult {
return skipStageForLocalStatus("memory", localStatus, t0);
}
// Resume detection (#1611 / plan D1 + C1). If a previous run hit the
// timeout and gbrain left ~/.gbrain/import-checkpoint.json plus its staging
// dir on disk, signal the grandchild via env so it skips the prepare phase
// and lets `gbrain import` resume from processedIndex+1 against the same
// staging dir. If the staging dir is gone (disk pressure cleanup, OS
// reboot, user manual cleanup), warn and fall through to a fresh restage.
const resume = decideResume();
const childEnv = buildGbrainEnv({ announce: false });
if (resume.kind === "resume") {
console.error(
`[sync:memory] resuming from gbrain checkpoint (${resume.processedIndex}/${resume.totalFiles} files staged at ${resume.stagingDir})`,
);
childEnv.GSTACK_INGEST_RESUME_DIR = resume.stagingDir;
} else if (resume.kind === "stale-staging-missing") {
console.error(
`[sync:memory] previous checkpoint stale (staging dir ${resume.stagingDir} gone), restaging from scratch`,
);
}
const ingestPath = join(import.meta.dir, "gstack-memory-ingest.ts");
const ingestArgs = ["run", ingestPath];
if (args.mode === "full") ingestArgs.push("--bulk");
@ -968,14 +748,10 @@ function runMemoryIngest(args: CliArgs): StageResult {
// .env.local footgun affects gstack-memory-ingest.ts too, not just the
// direct gbrain spawns in this file). The grandchild calls gbrain import
// internally and must see the DATABASE_URL from gbrain's own config.
const memoryTimeoutMs = resolveStageTimeoutMs(
process.env.GSTACK_SYNC_MEMORY_TIMEOUT_MS,
"GSTACK_SYNC_MEMORY_TIMEOUT_MS",
);
const result = spawnSync("bun", ingestArgs, {
encoding: "utf-8",
timeout: memoryTimeoutMs,
env: childEnv,
timeout: 35 * 60 * 1000,
env: buildGbrainEnv({ announce: false }),
});
// D6: parse [memory-ingest] lines from the child's stderr. ERR-prefixed
@ -1017,17 +793,13 @@ function runBrainSyncPush(args: CliArgs): StageResult {
return { name: "brain-sync", ran: false, ok: true, duration_ms: 0, summary: "skipped (gstack-brain-sync not installed)" };
}
// #1731: gstack-brain-sync is a bash shebang script; Windows can't spawn it
// without a shell, which surfaced as "brain-sync exited undefined".
spawnSync(brainSyncPath, ["--discover-new"], {
stdio: args.quiet ? ["ignore", "ignore", "ignore"] : ["ignore", "inherit", "inherit"],
timeout: 60 * 1000,
shell: NEEDS_SHELL_ON_WINDOWS,
});
const result = spawnSync(brainSyncPath, ["--once"], {
stdio: args.quiet ? ["ignore", "ignore", "ignore"] : ["ignore", "inherit", "inherit"],
timeout: 60 * 1000,
shell: NEEDS_SHELL_ON_WINDOWS,
});
return {

View File

@ -273,23 +273,16 @@ function resolveClaudeCodeCwd(
return null;
}
export function extractCwdFromJsonl(filePath: string): string | null {
// Read a capped prefix so huge JSONL files don't blow up memory. 64KB
// comfortably fits the largest observed session headers; the old 8KB cap
// would sometimes fall inside a single long line and silently drop the
// project (JSON.parse failure on the truncated tail).
const MAX_BYTES = 64 * 1024;
const MAX_LINES = 30;
function extractCwdFromJsonl(filePath: string): string | null {
try {
// Read only the first 8KB to avoid loading huge JSONL files into memory
const fd = openSync(filePath, "r");
const buf = Buffer.alloc(MAX_BYTES);
const bytesRead = readSync(fd, buf, 0, MAX_BYTES, 0);
const buf = Buffer.alloc(8192);
const bytesRead = readSync(fd, buf, 0, 8192, 0);
closeSync(fd);
const text = buf.toString("utf-8", 0, bytesRead);
// Drop the final segment — it may be an incomplete line at the cap boundary.
const parts = text.split("\n");
const completeLines = parts.length > 1 ? parts.slice(0, -1) : parts;
for (const line of completeLines.slice(0, MAX_LINES)) {
const lines = text.split("\n").slice(0, 15);
for (const line of lines) {
if (!line.trim()) continue;
try {
const obj = JSON.parse(line);

View File

@ -1,39 +0,0 @@
#!/usr/bin/env bash
# gstack-ios-qa-daemon — Mac-side daemon that brokers tailnet/loopback traffic
# to a connected iPhone running the in-app StateServer over the CoreDevice USB
# tunnel. Single-instance via flock on ~/.gstack/ios-qa-daemon.pid.
#
# Usage:
# gstack-ios-qa-daemon # loopback-only (local USB)
# gstack-ios-qa-daemon --tailnet # additionally open tailnet listener
#
# Environment:
# GSTACK_IOS_DAEMON_PORT — loopback listener port (default 9099)
# GSTACK_IOS_TARGET_UDID — target iOS device UDID (optional; otherwise
# the first paired connected device is used)
# GSTACK_IOS_TARGET_BUNDLE_ID — bundle ID of the iOS app hosting StateServer
# (default com.gstack.iosqa.fixture)
#
# Readiness protocol: prints `READY: port=<n> pid=<pid>` to stdout once both
# listeners are bound. Spawners read stdin with a ~5s timeout to confirm.
#
# Exits cleanly when no active loopback clients are connected AND no remote
# session tokens are outstanding.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
GSTACK_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
ENTRY="$GSTACK_DIR/ios-qa/daemon/src/index.ts"
if [ ! -f "$ENTRY" ]; then
echo "gstack-ios-qa-daemon: missing $ENTRY (gstack install incomplete?)" >&2
exit 1
fi
if ! command -v bun >/dev/null 2>&1; then
echo "gstack-ios-qa-daemon: bun runtime not on PATH — install from https://bun.sh" >&2
exit 1
fi
exec bun run "$ENTRY" "$@"

View File

@ -1,28 +0,0 @@
#!/usr/bin/env bash
# gstack-ios-qa-mint — manage the tailnet allowlist for remote iOS QA agents.
#
# This is the owner-grant path: it writes identities into the local allowlist
# so a remote agent on the tailnet can self-service mint a session token via
# POST /auth/mint against the daemon.
#
# Run `gstack-ios-qa-mint --help` for full usage.
#
# Allowlist file: ~/.gstack/ios-qa-allowlist.json (mode 0600).
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
GSTACK_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
ENTRY="$GSTACK_DIR/ios-qa/daemon/src/cli-mint.ts"
if [ ! -f "$ENTRY" ]; then
echo "gstack-ios-qa-mint: missing $ENTRY (gstack install incomplete?)" >&2
exit 1
fi
if ! command -v bun >/dev/null 2>&1; then
echo "gstack-ios-qa-mint: bun runtime not on PATH — install from https://bun.sh" >&2
exit 1
fi
exec bun run "$ENTRY" "$@"

View File

@ -53,25 +53,18 @@ for path in paths:
continue
if line in seen:
continue
# Prefer ISO ts field for sort; fall back to SHA-256. The line
# content is the final tiebreaker so the order is total: two
# entries sharing a ts must resolve identically regardless of
# which side they arrive on. Without it, equal-ts entries fall
# back to insertion order (base, ours, theirs), and since ours
# and theirs are swapped depending on which machine runs the
# merge, the two sides produce divergent files that never
# converge.
# Prefer ISO ts field for sort; fall back to SHA-256.
sort_key = None
try:
obj = json.loads(line)
ts = obj.get('ts') or obj.get('timestamp')
if isinstance(ts, str):
sort_key = (0, ts, line)
sort_key = (0, ts)
except (json.JSONDecodeError, ValueError, TypeError):
pass
if sort_key is None:
h = hashlib.sha256(line.encode('utf-8')).hexdigest()
sort_key = (1, h, line)
sort_key = (1, h)
seen[line] = sort_key
except FileNotFoundError:
# Absent base / absent ours / absent theirs are all valid.

View File

@ -27,53 +27,35 @@ done
LEARNINGS_FILE="$GSTACK_HOME/projects/$SLUG/learnings.jsonl"
# Collect cross-project JSONL files separately so the trust gate can distinguish
# current-project rows from rows loaded from other projects.
CROSS_FILES=()
# Collect all JSONL files to search
FILES=()
[ -f "$LEARNINGS_FILE" ] && FILES+=("$LEARNINGS_FILE")
if [ "$CROSS_PROJECT" = true ]; then
# Add other projects' learnings (max 5)
while IFS= read -r f; do
CROSS_FILES+=("$f")
[ ${#CROSS_FILES[@]} -ge 5 ] && break
done < <(find "$GSTACK_HOME/projects" -name "learnings.jsonl" -not -path "*/$SLUG/*" 2>/dev/null)
# Add other projects' learnings (max 5, sorted by mtime)
for f in $(find "$GSTACK_HOME/projects" -name "learnings.jsonl" -not -path "*/$SLUG/*" 2>/dev/null | head -5); do
FILES+=("$f")
done
fi
if [ ! -f "$LEARNINGS_FILE" ] && [ ${#CROSS_FILES[@]} -eq 0 ]; then
if [ ${#FILES[@]} -eq 0 ]; then
exit 0
fi
emit_tagged_file() {
local tag="$1"
local file="$2"
local line
while IFS= read -r line || [ -n "$line" ]; do
[ -n "$line" ] && printf '%s\t%s\n' "$tag" "$line"
done < "$file"
}
# Process all files through bun for JSON parsing, decay, dedup, filtering
{
[ -f "$LEARNINGS_FILE" ] && emit_tagged_file current "$LEARNINGS_FILE"
if [ ${#CROSS_FILES[@]} -gt 0 ]; then
for f in "${CROSS_FILES[@]}"; do
emit_tagged_file cross "$f"
done
fi
} | GSTACK_SEARCH_TYPE="$TYPE" GSTACK_SEARCH_QUERY="$QUERY" GSTACK_SEARCH_LIMIT="$LIMIT" GSTACK_SEARCH_CROSS="$CROSS_PROJECT" bun -e "
GSTACK_SEARCH_TYPE="$TYPE" GSTACK_SEARCH_QUERY="$QUERY" GSTACK_SEARCH_LIMIT="$LIMIT" GSTACK_SEARCH_SLUG="$SLUG" GSTACK_SEARCH_CROSS="$CROSS_PROJECT" \
cat "${FILES[@]}" 2>/dev/null | GSTACK_SEARCH_TYPE="$TYPE" GSTACK_SEARCH_QUERY="$QUERY" GSTACK_SEARCH_LIMIT="$LIMIT" GSTACK_SEARCH_SLUG="$SLUG" GSTACK_SEARCH_CROSS="$CROSS_PROJECT" bun -e "
const lines = (await Bun.stdin.text()).trim().split('\n').filter(Boolean);
const now = Date.now();
const type = process.env.GSTACK_SEARCH_TYPE || '';
const queryRaw = (process.env.GSTACK_SEARCH_QUERY || '').toLowerCase();
const queryTokens = queryRaw.split(/\s+/).filter(Boolean);
const limit = parseInt(process.env.GSTACK_SEARCH_LIMIT || '10', 10);
const slug = process.env.GSTACK_SEARCH_SLUG || '';
const entries = [];
for (const taggedLine of lines) {
for (const line of lines) {
try {
const tabIndex = taggedLine.indexOf('\t');
const sourceTag = tabIndex === -1 ? 'current' : taggedLine.slice(0, tabIndex);
const line = tabIndex === -1 ? taggedLine : taggedLine.slice(tabIndex + 1);
const e = JSON.parse(line);
if (!e.key || !e.type) continue;
@ -87,7 +69,7 @@ for (const taggedLine of lines) {
// Determine if this is from the current project or cross-project
// Cross-project entries are tagged for display
const isCrossProject = sourceTag === 'cross';
const isCrossProject = !line.includes(slug) && process.env.GSTACK_SEARCH_CROSS === 'true';
e._crossProject = isCrossProject;
// Trust gate: cross-project learnings only loaded if trusted (user-stated)

View File

@ -194,7 +194,7 @@ Options:
--all-history Walk transcripts older than 90 days too.
--sources <list> Comma-separated subset: ${ALL_TYPES.join(",")}
--limit <N> Stop after N pages written (smoke testing).
--no-write Skip gbrain put calls (still updates state file).
--no-write Skip gbrain put_page calls (still updates state file).
Used by tests + dry runs without actual ingest.
--scan-secrets Opt-in per-file gitleaks scan during prepare. Off by
default; gstack-brain-sync already gates the git-push
@ -1061,7 +1061,7 @@ async function probeMode(args: CliArgs): Promise<ProbeReport> {
}
// Per ED2: ~25-35 min for ~11.7K transcripts = ~150ms/page synchronous
// (gitleaks + render + put + embedding). Scale linearly.
// (gitleaks + render + put_page + embedding). Scale linearly.
const estimateMinutes = Math.max(1, Math.round((newCount + updatedCount) * 0.15 / 60));
return {
@ -1272,39 +1272,13 @@ function cleanupStagingDir(dir: string): void {
* 1. forward the signal to the child (otherwise gbrain orphans, holds the
* PGLite write lock, and burns CPU observed during 2026-05-10 cold-run
* testing)
* 2. PRESERVE the staging dir when gbrain has written an import-checkpoint
* pointing at it (the next /sync-gbrain run can resume from
* processedIndex+1). Otherwise synchronously clean up before
* process.exit, since `finally` blocks in ingestPass never run after
* process.exit fires from inside a signal handler.
*
* Resume semantics added for #1611: prior behavior unconditionally cleaned
* up the staging dir on SIGTERM, so the gbrain checkpoint always pointed at
* a missing dir and the next run had to restage from scratch.
* 2. synchronously clean up the staging dir BEFORE process.exit (otherwise
* finally blocks in async callers don't run after process.exit from
* inside a signal handler, leaking the staging dir on every interrupt)
*/
let _activeImportChild: ChildProcess | null = null;
let _activeStagingDir: string | null = null;
let _signalHandlersInstalled = false;
/**
* Returns true if gbrain has written ~/.gbrain/import-checkpoint.json with
* `dir` matching the current active staging dir. Indicates the next run
* can resume against this staging dir.
*/
function stagingDirIsCheckpointed(stagingDir: string): boolean {
try {
// Read HOME from env so tests can redirect; homedir() caches.
const home = process.env.HOME || homedir();
const cpPath = join(home, ".gbrain", "import-checkpoint.json");
if (!existsSync(cpPath)) return false;
const raw = readFileSync(cpPath, "utf-8");
const cp = JSON.parse(raw) as { dir?: string };
return cp.dir === stagingDir;
} catch {
return false;
}
}
function installSignalForwarder(): void {
if (_signalHandlersInstalled) return;
_signalHandlersInstalled = true;
@ -1316,24 +1290,11 @@ function installSignalForwarder(): void {
// child may have already exited between the alive-check and the kill
}
}
// Synchronously clean up the active staging dir before exiting. The async
// `finally` blocks in ingestPass never run after process.exit fires from
// inside this handler, so cleanup has to happen here.
if (_activeStagingDir) {
if (stagingDirIsCheckpointed(_activeStagingDir)) {
// Preserve for next-run resume. The orchestrator's decideResume()
// (in gstack-gbrain-sync.ts) will see the checkpoint + dir and
// re-invoke gbrain import against this same staging dir, picking
// up from processedIndex+1. See #1611.
try {
process.stderr.write(
`[memory-ingest] ${signal} received — preserving staging dir for resume: ${_activeStagingDir}\n`,
);
} catch {
// best-effort: stderr may be closed already
}
} else {
// No checkpoint pointing here — the import never reached gbrain or
// crashed before writing one. Clean up so we don't leak the dir.
cleanupStagingDir(_activeStagingDir);
}
cleanupStagingDir(_activeStagingDir);
_activeStagingDir = null;
}
// Re-raise to default action so the parent actually exits. Without this,
@ -1349,32 +1310,10 @@ function installSignalForwarder(): void {
* that kill the child on parent SIGTERM/SIGINT. Returns the same shape as
* spawnSync's result so the caller doesn't care which mode was used.
*/
/**
* #1611: the `gbrain import` is the long pole on big brains. Its timeout is
* configurable via GSTACK_INGEST_TIMEOUT_MS (default 30 min, 1min24h) so large
* memory corpora aren't SIGTERM'd mid-import. On timeout we SIGTERM the child,
* which preserves gbrain's import-checkpoint.json (see installSignalForwarder)
* so the next run resumes instead of restarting from scratch.
*/
const DEFAULT_IMPORT_TIMEOUT_MS = 30 * 60 * 1000;
export function resolveImportTimeoutMs(
raw: string | undefined = process.env.GSTACK_INGEST_TIMEOUT_MS,
): number {
if (raw === undefined || raw === "") return DEFAULT_IMPORT_TIMEOUT_MS;
const n = Number.parseInt(raw, 10);
if (!Number.isFinite(n) || Number.isNaN(n) || n < 60_000 || n > 86_400_000) {
console.error(
`[memory-ingest] GSTACK_INGEST_TIMEOUT_MS="${raw}" invalid (need 6000086400000ms); using ${DEFAULT_IMPORT_TIMEOUT_MS}ms`,
);
return DEFAULT_IMPORT_TIMEOUT_MS;
}
return n;
}
function runGbrainImport(
stagingDir: string,
timeoutMs: number,
): Promise<{ status: number | null; stdout: string; stderr: string; timedOut: boolean }> {
): Promise<{ status: number | null; stdout: string; stderr: string }> {
installSignalForwarder();
return new Promise((resolve) => {
// Seed DATABASE_URL from gbrain's own config so this stage works
@ -1407,7 +1346,6 @@ function runGbrainImport(
status: timedOut ? null : status,
stdout,
stderr,
timedOut,
});
});
child.on("error", (err) => {
@ -1417,7 +1355,6 @@ function runGbrainImport(
status: null,
stdout,
stderr: stderr + `\n[spawn-error] ${(err as Error).message}`,
timedOut,
});
});
});
@ -1437,7 +1374,7 @@ async function ingestPass(args: CliArgs): Promise<BulkResult> {
if (args.noWrite) {
// --no-write: skip the gbrain import call but still record state for
// prepared pages (treat them as ingested for dedup purposes). Matches
// the prior contract from --help: "Skip gbrain put calls (still
// the prior contract from --help: "Skip gbrain put_page calls (still
// updates state file)".
const nowIso = new Date().toISOString();
for (const p of prep.prepared) {
@ -1507,46 +1444,19 @@ async function ingestPass(args: CliArgs): Promise<BulkResult> {
// entirely. gstack-brain-sync push will pick the dir up via its allowlist
// and the brain admin's pull job will index transcripts into the remote
// brain. Local PGLite (if any) stays code-only.
//
// Resume branch for #1611: when the orchestrator sets
// GSTACK_INGEST_RESUME_DIR (because gbrain's import-checkpoint.json points
// at an existing dir from a prior SIGTERM'd run), reuse that staging dir
// and skip the prepare/writeStaged phase entirely. gbrain's checkpoint
// tells it where to resume.
const remoteHttpMode = isRemoteHttpMcpMode();
const resumeDir = process.env.GSTACK_INGEST_RESUME_DIR;
const resuming = !remoteHttpMode
&& typeof resumeDir === "string"
&& resumeDir.length > 0
&& existsSync(resumeDir);
const stagingDir = resuming
? resumeDir!
: remoteHttpMode
? makePersistentTranscriptDir()
: makeStagingDir();
const stagingDir = remoteHttpMode
? makePersistentTranscriptDir()
: makeStagingDir();
// Register staging dir with the signal forwarder so SIGTERM/SIGINT can
// either preserve (when gbrain checkpointed it) or synchronously clean up.
// The async finally block below does NOT run after a signal-handler exit.
// In remote-http mode we skip registration — the dir is meant to persist.
// synchronously clean it up before process.exit (the async finally block
// below does NOT run after a signal-handler exit). In remote-http mode we
// skip registration — the dir is meant to persist.
if (!remoteHttpMode) {
_activeStagingDir = stagingDir;
}
try {
let staging: StagingResult;
if (resuming) {
// Pages are already on disk from the previous run. Skip writeStaged.
// The "written" count for the verdict reflects what's on disk now;
// gbrain's import will skip already-completed entries via its own
// checkpoint (processedIndex+1).
if (!args.quiet) {
console.error(
`[memory-ingest] resuming previous staging dir ${stagingDir} (skipping prepare phase)`,
);
}
staging = { staging_dir: stagingDir, written: prep.prepared.length, errors: [], stagedPathToSource: new Map() };
} else {
staging = writeStaged(prep.prepared, stagingDir);
}
const staging = writeStaged(prep.prepared, stagingDir);
failed += staging.errors.length;
if (!args.quiet && staging.errors.length > 0) {
for (const e of staging.errors.slice(0, 5)) {
@ -1632,33 +1542,13 @@ async function ingestPass(args: CliArgs): Promise<BulkResult> {
// spawn, parent termination orphans the gbrain process (observed
// during 2026-05-10 cold-run testing — gbrain kept running 15 min
// after the orchestrator timed out).
const importResult = await runGbrainImport(stagingDir, resolveImportTimeoutMs());
const importResult = await runGbrainImport(stagingDir, 30 * 60 * 1000);
const stdout = importResult.stdout || "";
const stderr = importResult.stderr || "";
const importJson = parseImportJson(stdout);
if (importResult.status !== 0) {
// #1611: on timeout, gbrain's import-checkpoint.json is preserved (the
// SIGTERM forwarder keeps the staging dir), so the next /sync-gbrain
// resumes rather than restarting. Tell the user instead of looking failed.
if (importResult.timedOut) {
const mins = Math.round(resolveImportTimeoutMs() / 60000);
const msg =
`gbrain import timed out after ${mins}min; checkpoint preserved — re-run ` +
`/sync-gbrain to resume (raise GSTACK_INGEST_TIMEOUT_MS for big brains)`;
console.error(`[memory-ingest] ${msg}`);
return {
written: 0,
skipped_secret: prep.skippedSecret,
skipped_dedup: prep.skippedDedup,
skipped_unattributed: prep.skippedUnattributed,
failed,
duration_ms: Date.now() - t0,
partial_pages: prep.partialPages,
system_error: msg,
};
}
const tail = (stderr.trim().split("\n").pop() || "").slice(0, 300);
const msg = `gbrain import exited ${importResult.status}: ${tail}`;
console.error(`[memory-ingest] ERR: ${msg}`);
@ -1854,12 +1744,7 @@ async function main(): Promise<void> {
if (result.system_error) process.exit(1);
}
// Guard so the module is import-safe for unit tests (e.g. resolveImportTimeoutMs).
// The orchestrator runs it as `bun gstack-memory-ingest.ts ...`, where
// import.meta.main is true, so the CLI path is unaffected.
if (import.meta.main) {
main().catch((err) => {
console.error(`gstack-memory-ingest fatal: ${err instanceof Error ? err.message : String(err)}`);
process.exit(1);
});
}
main().catch((err) => {
console.error(`gstack-memory-ingest fatal: ${err instanceof Error ? err.message : String(err)}`);
process.exit(1);
});

View File

@ -40,40 +40,16 @@ const ADAPTER_FACTORIES = {
type OutputFormat = 'table' | 'json' | 'markdown';
const CLI_ARGS = process.argv.slice(2);
const VALUE_FLAGS = new Set(['--models', '--prompt', '--workdir', '--timeout-ms', '--output']);
function arg(name: string, def?: string): string | undefined {
const idx = CLI_ARGS.findIndex(a => a === name || a.startsWith(name + '='));
const idx = process.argv.findIndex(a => a === name || a.startsWith(name + '='));
if (idx < 0) return def;
const eqIdx = CLI_ARGS[idx].indexOf('=');
if (eqIdx >= 0) return CLI_ARGS[idx].slice(eqIdx + 1);
return CLI_ARGS[idx + 1];
const eqIdx = process.argv[idx].indexOf('=');
if (eqIdx >= 0) return process.argv[idx].slice(eqIdx + 1);
return process.argv[idx + 1];
}
function flag(name: string): boolean {
return CLI_ARGS.includes(name);
}
function positionalArgs(args: string[]): string[] {
const positional: string[] = [];
for (let i = 0; i < args.length; i++) {
const current = args[i];
if (current === '--') {
positional.push(...args.slice(i + 1));
break;
}
if (current.startsWith('--')) {
const eqIdx = current.indexOf('=');
const flagName = eqIdx >= 0 ? current.slice(0, eqIdx) : current;
if (eqIdx < 0 && VALUE_FLAGS.has(flagName) && i + 1 < args.length) {
i++;
}
continue;
}
positional.push(current);
}
return positional;
return process.argv.includes(name);
}
function parseProviders(s: string | undefined): Array<'claude' | 'gpt' | 'gemini'> {
@ -103,7 +79,7 @@ function resolvePrompt(positional: string | undefined): string {
}
async function main(): Promise<void> {
const positional = positionalArgs(CLI_ARGS)[0];
const positional = process.argv.slice(2).find(a => !a.startsWith('--'));
const prompt = resolvePrompt(positional);
const providers = parseProviders(arg('--models'));
const workdir = arg('--workdir', process.cwd())!;

View File

@ -10,14 +10,7 @@
//
// Usage:
// gstack-next-version --base <branch> --bump <major|minor|patch|micro> \
// --current-version <X.Y.Z.W> [--workspace-root <path>|null] \
// [--version-path <path>] [--json]
//
// VERSION path resolution (monorepo support):
// 1. --version-path <path> CLI flag (highest priority)
// 2. .gstack/version-path file at the repo root (single-line relative path,
// committed so all collaborators benefit)
// 3. "VERSION" at the repo root (default, backward-compatible)
// --current-version <X.Y.Z.W> [--workspace-root <path>|null] [--json]
//
// Exit codes:
// 0 — emitted JSON successfully (may include "offline":true or "host":"unknown")
@ -52,7 +45,6 @@ type Output = {
version: string;
current_version: string;
base_version: string;
version_path: string;
bump: Bump;
host: "github" | "gitlab" | "unknown";
offline: boolean;
@ -122,28 +114,6 @@ function runCommand(cmd: string, args: string[], timeoutMs = 15000): { ok: boole
};
}
// VERSION-path resolution for monorepos. Priority: CLI flag > .gstack/version-path
// at repo root > "VERSION". Pure function; takes the repo root as an argument so
// tests can drive it with a fixture dir without mocking git.
function resolveVersionPath(override: string | undefined, repoRoot: string): string {
if (override) return override.trim();
const configFile = join(repoRoot, ".gstack", "version-path");
if (existsSync(configFile)) {
try {
const firstLine = readFileSync(configFile, "utf8").split("\n")[0]?.trim() ?? "";
if (firstLine) return firstLine;
} catch {
// fall through to default
}
}
return "VERSION";
}
function repoToplevel(): string {
const r = runCommand("git", ["rev-parse", "--show-toplevel"]);
return r.ok ? r.stdout.trim() : process.cwd();
}
function detectHost(): "github" | "gitlab" | "unknown" {
const remote = runCommand("git", ["remote", "get-url", "origin"]);
if (remote.ok) {
@ -158,19 +128,19 @@ function detectHost(): "github" | "gitlab" | "unknown" {
return "unknown";
}
function readBaseVersion(base: string, versionPath: string, warnings: string[]): string {
function readBaseVersion(base: string, warnings: string[]): string {
// git fetch is best-effort; we tolerate failure and fall back to whatever
// origin/<base> currently points at.
runCommand("git", ["fetch", "origin", base, "--quiet"], 10000);
const r = runCommand("git", ["show", `origin/${base}:${versionPath}`]);
const r = runCommand("git", ["show", `origin/${base}:VERSION`]);
if (!r.ok) {
warnings.push(`could not read ${versionPath} at origin/${base}; assuming 0.0.0.0`);
warnings.push(`could not read VERSION at origin/${base}; assuming 0.0.0.0`);
return "0.0.0.0";
}
return r.stdout.trim();
}
async function fetchGithubClaimed(base: string, versionPath: string, excludePR: number | null, warnings: string[]): Promise<{ claimed: ClaimedPR[]; offline: boolean }> {
async function fetchGithubClaimed(base: string, excludePR: number | null, warnings: string[]): Promise<{ claimed: ClaimedPR[]; offline: boolean }> {
const list = runCommand("gh", [
"pr",
"list",
@ -217,18 +187,14 @@ async function fetchGithubClaimed(base: string, versionPath: string, excludePR:
const pr = queue.shift();
if (!pr) return;
// gh passes branch name via argv, not shell — safe.
// encodeURI handles spaces in subproject paths (e.g. "Tinas Second Brain/...")
// while leaving "/" untouched so the GitHub Contents API gets the path intact.
const content = runCommand("gh", [
"api",
`repos/{owner}/{repo}/contents/${encodeURI(versionPath)}?ref=${encodeURIComponent(pr.headRefName)}`,
`repos/{owner}/{repo}/contents/VERSION?ref=${encodeURIComponent(pr.headRefName)}`,
"-q",
".content",
]);
if (!content.ok) {
warnings.push(
`PR #${pr.number}: could not fetch ${versionPath} (fork, private, or wrong path — try --version-path or .gstack/version-path)`,
);
warnings.push(`PR #${pr.number}: could not fetch VERSION (fork or private)`);
continue;
}
let versionStr: string;
@ -249,7 +215,7 @@ async function fetchGithubClaimed(base: string, versionPath: string, excludePR:
return { claimed: results, offline: false };
}
async function fetchGitlabClaimed(base: string, versionPath: string, excludePR: number | null, warnings: string[]): Promise<{ claimed: ClaimedPR[]; offline: boolean }> {
async function fetchGitlabClaimed(base: string, excludePR: number | null, warnings: string[]): Promise<{ claimed: ClaimedPR[]; offline: boolean }> {
const list = runCommand("glab", [
"mr",
"list",
@ -277,15 +243,12 @@ async function fetchGitlabClaimed(base: string, versionPath: string, excludePR:
}
const results: ClaimedPR[] = [];
for (const mr of mrs) {
// GitLab files API takes the full path URL-encoded (slashes become %2F).
const content = runCommand("glab", [
"api",
`projects/:id/repository/files/${encodeURIComponent(versionPath)}?ref=${encodeURIComponent(mr.source_branch)}`,
`projects/:id/repository/files/VERSION?ref=${encodeURIComponent(mr.source_branch)}`,
]);
if (!content.ok) {
warnings.push(
`MR !${mr.iid}: could not fetch ${versionPath} (wrong path? — try --version-path or .gstack/version-path)`,
);
warnings.push(`MR !${mr.iid}: could not fetch VERSION`);
continue;
}
try {
@ -322,7 +285,7 @@ function currentRepoSlug(): string {
return m ? m[1] : "";
}
function scanSiblings(root: string | null, versionPath: string, claimed: ClaimedPR[], warnings: string[]): Sibling[] {
function scanSiblings(root: string | null, claimed: ClaimedPR[], warnings: string[]): Sibling[] {
if (!root || !existsSync(root)) return [];
const mySlug = currentRepoSlug();
if (!mySlug) {
@ -345,7 +308,7 @@ function scanSiblings(root: string | null, versionPath: string, claimed: Claimed
continue;
}
if (!existsSync(join(p, ".git")) && !existsSync(join(p, ".git/HEAD"))) continue;
const versionFile = join(p, versionPath);
const versionFile = join(p, "VERSION");
if (!existsSync(versionFile)) continue;
let version: string;
try {
@ -383,13 +346,12 @@ function markActiveSiblings(siblings: Sibling[], baseVersion: Version): Sibling[
});
}
function parseArgs(argv: string[]): { base: string; bump: Bump; current: string; workspaceRoot?: string; excludePR: number | null; versionPath?: string; help: boolean } {
function parseArgs(argv: string[]): { base: string; bump: Bump; current: string; workspaceRoot?: string; excludePR: number | null; help: boolean } {
let base = "";
let bump: Bump | "" = "";
let current = "";
let workspaceRoot: string | undefined;
let excludePR: number | null = null;
let versionPath: string | undefined;
let help = false;
for (let i = 0; i < argv.length; i++) {
const a = argv[i];
@ -397,7 +359,6 @@ function parseArgs(argv: string[]): { base: string; bump: Bump; current: string;
else if (a === "--bump") bump = (argv[++i] ?? "") as Bump;
else if (a === "--current-version") current = argv[++i] ?? "";
else if (a === "--workspace-root") workspaceRoot = argv[++i];
else if (a === "--version-path") versionPath = argv[++i];
else if (a === "--exclude-pr") {
const n = Number(argv[++i]);
excludePR = Number.isFinite(n) && n > 0 ? n : null;
@ -414,7 +375,7 @@ function parseArgs(argv: string[]): { base: string; bump: Bump; current: string;
console.error(`Error: --bump must be major|minor|patch|micro (got ${bump})`);
process.exit(2);
}
return { base, bump: bump as Bump, current, workspaceRoot, excludePR, versionPath, help: false };
return { base, bump: bump as Bump, current, workspaceRoot, excludePR, help: false };
}
// Auto-detect: if --exclude-pr wasn't passed, check whether the current branch
@ -431,14 +392,13 @@ async function main() {
const args = parseArgs(process.argv.slice(2));
if (args.help) {
console.log(
"Usage: gstack-next-version --base <branch> --bump <level> --current-version <X.Y.Z.W> [--workspace-root <path|null>] [--version-path <path>]",
"Usage: gstack-next-version --base <branch> --bump <level> --current-version <X.Y.Z.W> [--workspace-root <path|null>]",
);
process.exit(0);
}
const warnings: string[] = [];
const host = detectHost();
const versionPath = resolveVersionPath(args.versionPath, repoToplevel());
const baseVersion = args.current || readBaseVersion(args.base, versionPath, warnings);
const baseVersion = args.current || readBaseVersion(args.base, warnings);
const baseParsed = parseVersion(baseVersion);
if (!baseParsed) {
console.error(`Error: could not parse base version '${baseVersion}'`);
@ -453,9 +413,9 @@ async function main() {
let claimed: ClaimedPR[] = [];
let offline = false;
if (host === "github") {
({ claimed, offline } = await fetchGithubClaimed(args.base, versionPath, excludePR, warnings));
({ claimed, offline } = await fetchGithubClaimed(args.base, excludePR, warnings));
} else if (host === "gitlab") {
({ claimed, offline } = await fetchGitlabClaimed(args.base, versionPath, excludePR, warnings));
({ claimed, offline } = await fetchGitlabClaimed(args.base, excludePR, warnings));
} else {
warnings.push("host unknown; queue-awareness unavailable");
}
@ -473,7 +433,7 @@ async function main() {
const { version: picked, reason } = pickNextSlot(baseParsed, claimedVersions, args.bump);
const workspaceRoot = resolveWorkspaceRoot(args.workspaceRoot);
const siblings = markActiveSiblings(scanSiblings(workspaceRoot, versionPath, claimed, warnings), baseParsed);
const siblings = markActiveSiblings(scanSiblings(workspaceRoot, claimed, warnings), baseParsed);
const activeSiblings = siblings.filter((s) => s.is_active);
// If an active sibling outranks our pick, bump past it (same bump level).
@ -493,7 +453,6 @@ async function main() {
version: fmtVersion(finalVersion),
current_version: args.current || baseVersion,
base_version: baseVersion,
version_path: versionPath,
bump: args.bump,
host,
offline,
@ -507,7 +466,7 @@ async function main() {
}
// Pure-function exports for testing
export { parseVersion, fmtVersion, bumpVersion, cmpVersion, pickNextSlot, markActiveSiblings, resolveVersionPath };
export { parseVersion, fmtVersion, bumpVersion, cmpVersion, pickNextSlot, markActiveSiblings };
// Only run main() when invoked as a script, not when imported by tests.
if (import.meta.main) {

View File

@ -9,7 +9,7 @@
# CI / container env where HOME may be unset.
#
# Chains:
# GSTACK_STATE_ROOT: GSTACK_HOME -> CLAUDE_PLUGIN_DATA (only when CLAUDE_PLUGIN_ROOT=*gstack*) -> $HOME/.gstack -> .gstack
# GSTACK_STATE_ROOT: GSTACK_HOME -> CLAUDE_PLUGIN_DATA -> $HOME/.gstack -> .gstack
# PLAN_ROOT: GSTACK_PLAN_DIR -> CLAUDE_PLANS_DIR -> $HOME/.claude/plans -> .claude/plans
# TMP_ROOT: TMPDIR -> TMP -> .gstack/tmp (and mkdir -p, best-effort)
#
@ -21,11 +21,7 @@ set -u
# State root: where gstack writes projects/, sessions/, analytics/.
if [ -n "${GSTACK_HOME:-}" ]; then
_state_root="$GSTACK_HOME"
elif [ -n "${CLAUDE_PLUGIN_DATA:-}" ] && echo "${CLAUDE_PLUGIN_ROOT:-}" | grep -qi "gstack"; then
# Guard: only trust CLAUDE_PLUGIN_DATA when CLAUDE_PLUGIN_ROOT confirms we are
# running as the gstack plugin. Without this, a CLAUDE_PLUGIN_DATA from another
# plugin (e.g. codex) that leaked into the session env via CLAUDE_ENV_FILE would
# be picked up, writing all gstack state into the wrong directory.
elif [ -n "${CLAUDE_PLUGIN_DATA:-}" ]; then
_state_root="$CLAUDE_PLUGIN_DATA"
elif [ -n "${HOME:-}" ]; then
_state_root="$HOME/.gstack"

View File

@ -28,8 +28,7 @@
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
eval "$("$SCRIPT_DIR/gstack-slug" 2>/dev/null)"
# GSTACK_STATE_ROOT takes precedence over GSTACK_HOME (test isolation per D16).
GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}"
GSTACK_HOME="${GSTACK_HOME:-$HOME/.gstack}"
mkdir -p "$GSTACK_HOME/projects/$SLUG"
INPUT="$1"
@ -50,48 +49,12 @@ if (!j.skill || !/^[a-z0-9-]+\$/.test(j.skill)) {
process.exit(1);
}
// Required: question_id (kebab-case, <=64 chars).
// Cathedral T5: hook-sourced events use 'hook-<10-char-hash>' which is
// kebab-case-compatible and passes the same regex.
// Required: question_id (kebab-case, <=64 chars)
if (!j.question_id || !/^[a-z0-9-]+\$/.test(j.question_id) || j.question_id.length > 64) {
process.stderr.write('gstack-question-log: invalid question_id, must be kebab-case <=64 chars\n');
process.exit(1);
}
// Optional: source — tags which writer produced this event.
// 'agent' (default) — preamble-driven write from inside the running agent
// 'hook' — PostToolUse hook captured it deterministically (T5)
// 'auq-other' — user picked 'Other' and typed free text (Layer 8)
// 'auto-decided' — PreToolUse enforcement hook substituted the answer (T6)
// 'codex-import-marker' / 'codex-import-pattern' — T9 backfill from Codex
const ALLOWED_SOURCES = ['agent', 'hook', 'auq-other', 'auto-decided', 'codex-import-marker', 'codex-import-pattern'];
if (j.source !== undefined) {
if (!ALLOWED_SOURCES.includes(j.source)) {
process.stderr.write('gstack-question-log: invalid source, must be one of: ' + ALLOWED_SOURCES.join(', ') + '\n');
process.exit(1);
}
} else {
j.source = 'agent';
}
// Optional: tool_use_id — Claude Code hook stdin field; used for dedup.
if (j.tool_use_id !== undefined) {
if (typeof j.tool_use_id !== 'string' || j.tool_use_id.length > 128) {
process.stderr.write('gstack-question-log: tool_use_id must be string <=128 chars\n');
process.exit(1);
}
}
// Optional: free_text — sanitize (no newlines, <=300 chars).
if (j.free_text !== undefined) {
if (typeof j.free_text !== 'string') {
process.stderr.write('gstack-question-log: free_text must be string\n');
process.exit(1);
}
if (j.free_text.length > 300) j.free_text = j.free_text.slice(0, 300);
j.free_text = j.free_text.replace(/\n+/g, ' ');
}
// Required: question_summary (non-empty, <=200 chars, no newlines)
if (typeof j.question_summary !== 'string' || !j.question_summary.length) {
process.stderr.write('gstack-question-log: question_summary required\n');
@ -201,49 +164,7 @@ if [ $VALIDATE_RC -ne 0 ] || [ -z "$VALIDATED" ]; then
exit 1
fi
LOG_FILE="$GSTACK_HOME/projects/$SLUG/question-log.jsonl"
# Cathedral T5: composite-source dedup. If this exact (source, tool_use_id)
# was already logged within the last 100 lines, skip — protects against
# hook + agent both writing the same fire (D3 plan-tune cathedral decision).
# Lookup is bounded so the bin stays cheap on hot paths.
DEDUP_SKIP=""
if [ -f "$LOG_FILE" ]; then
DEDUP_SKIP=$(VALIDATED_JSON="$VALIDATED" LOG_FILE_PATH="$LOG_FILE" bun -e '
const fs = require("fs");
const j = JSON.parse(process.env.VALIDATED_JSON);
if (!j.tool_use_id) { console.log(""); process.exit(0); }
const want = j.source + ":" + j.tool_use_id;
const lines = fs.readFileSync(process.env.LOG_FILE_PATH, "utf-8").trim().split("\n").slice(-100);
for (const ln of lines) {
try {
const p = JSON.parse(ln);
if (p.source && p.tool_use_id && (p.source + ":" + p.tool_use_id) === want) {
console.log("dup");
process.exit(0);
}
} catch {}
}
console.log("");
' 2>/dev/null)
fi
if [ "$DEDUP_SKIP" = "dup" ]; then
echo "DEDUP: skipped (source=$(echo "$VALIDATED" | bun -e 'const j=JSON.parse(await Bun.stdin.text()); console.log(j.source);'), tool_use_id duplicate)"
exit 0
fi
echo "$VALIDATED" >> "$LOG_FILE"
# Cathedral T5: fire-and-forget --derive so inferred dimensions stay current
# without per-event latency (D17). Sub-second op; output suppressed; never
# blocks the hook caller. Skipped via GSTACK_QUESTION_LOG_NO_DERIVE=1 for
# tests that don't want the side effect.
if [ -z "${GSTACK_QUESTION_LOG_NO_DERIVE:-}" ]; then
(
nohup "$SCRIPT_DIR/gstack-developer-profile" --derive >/dev/null 2>&1 &
) >/dev/null 2>&1
fi
echo "$VALIDATED" >> "$GSTACK_HOME/projects/$SLUG/question-log.jsonl"
# NOTE: question-log.jsonl is deliberately NOT enqueued for gbrain-sync.
# Per Codex v2 review, audit/derivation data stays local alongside the

View File

@ -23,8 +23,7 @@ set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
# GSTACK_STATE_ROOT takes precedence over GSTACK_HOME (test isolation per D16).
GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}"
GSTACK_HOME="${GSTACK_HOME:-$HOME/.gstack}"
eval "$("$SCRIPT_DIR/gstack-slug" 2>/dev/null || true)"
SLUG="${SLUG:-unknown}"
PREF_FILE="$GSTACK_HOME/projects/$SLUG/question-preferences.json"
@ -69,21 +68,6 @@ do_check() {
return;
}
// Split-chain carve-out: per-option calls in N-option splits emit
// question_ids of the form <skill>-split-<option-slug>. These are
// NEVER AUTO_DECIDE-eligible regardless of stored preferences — the
// whole point of splitting is restoring user sovereignty over the
// option set. See scripts/resolvers/preamble/generate-ask-user-format.ts
// \"Handling 5+ options — split, never drop\" for the surrounding
// mechanism that generates these ids.
if (/-split-/.test(qid)) {
console.log('ASK_NORMALLY');
if (pref === 'never-ask' || pref === 'ask-only-for-one-way') {
console.log('NOTE: split-chain per-option calls always ASK_NORMALLY; your ' + pref + ' preference does not apply to options inside a sequential split.');
}
return;
}
switch (pref) {
case 'never-ask':
console.log('AUTO_DECIDE');

View File

@ -1,228 +0,0 @@
#!/usr/bin/env bun
/**
* gstack-redact — scan text for secrets/PII/legal content via the shared engine.
*
* Skill-facing CLI over lib/redact-engine.ts. Reads from stdin (default) or
* --from-file, scans, and prints findings as JSON (--json) or a human table.
*
* Exit codes (consumed by skill bash to gate dispatch/file/edit/commit):
* 0 clean (no HIGH, no MEDIUM)
* 2 MEDIUM present (no HIGH) — skill runs the per-finding AskUserQuestion
* 3 HIGH present — skill blocks
*
* WARN findings (tool-fence-degraded credentials) never change the exit code.
*
* Flags:
* --json Emit JSON {findings, counts, repoVisibility, oversize}
* --repo-visibility V public | private | unknown (default unknown=public-strict wording)
* --from-file PATH Read input from PATH instead of stdin
* --allowlist PATH Newline-delimited exact spans to suppress
* --self-email EMAIL Suppress this email (the invoking user's own)
* --repo-public-emails PATH Newline-delimited repo-public emails to suppress
* --auto-redact IDS Comma-separated finding ids to auto-redact;
* prints the redacted body to stdout + diff to stderr.
* --max-bytes N Override the fail-closed size cap (default 1 MiB).
*
* Security note: this is a GUARDRAIL, not airtight enforcement. A determined
* user can always bypass it (direct gh/git). It catches accidents.
*/
import * as fs from "fs";
import * as path from "path";
import { spawnSync } from "child_process";
import {
scan,
applyRedactions,
exitCodeFor,
type RepoVisibility,
type ScanOptions,
type Finding,
} from "../lib/redact-engine";
const MAX_STDIN_BYTES = 16 * 1024 * 1024; // hard ceiling before the engine cap
// ── pre-push hook install/uninstall (chains any existing hook) ────────────────
const MANAGED_MARKER = "# gstack-redact pre-push (managed)";
function hooksPath(): string {
const r = spawnSync("git", ["rev-parse", "--git-path", "hooks"], { encoding: "utf8" });
if (r.status !== 0) {
process.stderr.write("gstack-redact: not in a git repo\n");
process.exit(1);
}
return r.stdout.trim();
}
function installPrepushHook(): void {
const dir = hooksPath();
fs.mkdirSync(dir, { recursive: true });
const hookPath = path.join(dir, "pre-push");
const prepushBin = path.join(import.meta.dir, "gstack-redact-prepush");
// If a non-managed hook exists, preserve it as pre-push.local and chain it.
if (fs.existsSync(hookPath)) {
const existing = fs.readFileSync(hookPath, "utf8");
if (existing.includes(MANAGED_MARKER)) {
process.stdout.write("gstack-redact: pre-push hook already installed.\n");
return;
}
const localPath = path.join(dir, "pre-push.local");
fs.renameSync(hookPath, localPath);
fs.chmodSync(localPath, 0o755);
process.stdout.write("gstack-redact: preserved existing hook as pre-push.local (chained).\n");
}
// stdin is single-consume: capture it once, feed both the chained hook and ours.
const wrapper = `#!/usr/bin/env bash
${MANAGED_MARKER}
set -euo pipefail
_input="$(cat)"
_local="$(git rev-parse --git-path hooks/pre-push.local)"
if [ -x "$_local" ]; then
printf '%s' "$_input" | "$_local" "$@" || exit $?
fi
printf '%s' "$_input" | bun "${prepushBin}" "$@"
`;
fs.writeFileSync(hookPath, wrapper, { mode: 0o755 });
fs.chmodSync(hookPath, 0o755);
process.stdout.write(`gstack-redact: installed pre-push hook at ${hookPath}\n`);
}
function uninstallPrepushHook(): void {
const dir = hooksPath();
const hookPath = path.join(dir, "pre-push");
const localPath = path.join(dir, "pre-push.local");
if (!fs.existsSync(hookPath) || !fs.readFileSync(hookPath, "utf8").includes(MANAGED_MARKER)) {
process.stdout.write("gstack-redact: no managed pre-push hook to remove.\n");
return;
}
if (fs.existsSync(localPath)) {
fs.renameSync(localPath, hookPath); // restore the chained original
process.stdout.write("gstack-redact: removed managed hook, restored pre-push.local.\n");
} else {
fs.unlinkSync(hookPath);
process.stdout.write("gstack-redact: removed managed pre-push hook.\n");
}
}
function arg(name: string): string | undefined {
const i = process.argv.indexOf(name);
return i >= 0 ? process.argv[i + 1] : undefined;
}
function flag(name: string): boolean {
return process.argv.includes(name);
}
function readInput(): string {
const file = arg("--from-file");
if (file) {
const st = fs.statSync(file);
if (st.size > MAX_STDIN_BYTES) {
// Don't even read it — fail closed at the CLI boundary.
process.stderr.write(`gstack-redact: input file too large (${st.size} bytes)\n`);
process.exit(3);
}
return fs.readFileSync(file, "utf8");
}
// stdin
const chunks: Buffer[] = [];
let total = 0;
const fd = 0;
const buf = Buffer.alloc(65536);
while (true) {
let n = 0;
try {
n = fs.readSync(fd, buf, 0, buf.length, null);
} catch (e: any) {
if (e.code === "EAGAIN") continue;
if (e.code === "EOF") break;
throw e;
}
if (n === 0) break;
total += n;
if (total > MAX_STDIN_BYTES) {
process.stderr.write("gstack-redact: stdin too large\n");
process.exit(3);
}
chunks.push(Buffer.from(buf.subarray(0, n)));
}
return Buffer.concat(chunks).toString("utf8");
}
function readLines(path: string | undefined): string[] | undefined {
if (!path || !fs.existsSync(path)) return undefined;
return fs
.readFileSync(path, "utf8")
.split("\n")
.map((l) => l.trim())
.filter(Boolean);
}
function buildOpts(): ScanOptions {
const vis = (arg("--repo-visibility") as RepoVisibility) || "unknown";
const maxBytes = arg("--max-bytes");
return {
repoVisibility: ["public", "private", "unknown"].includes(vis) ? vis : "unknown",
allowlist: readLines(arg("--allowlist")),
selfEmail: arg("--self-email"),
repoPublicEmails: readLines(arg("--repo-public-emails")),
...(maxBytes ? { maxBytes: parseInt(maxBytes, 10) } : {}),
};
}
function humanTable(findings: Finding[]): string {
if (!findings.length) return " (no findings)";
const rows = findings.map(
(f) =>
` ${f.severity.padEnd(6)} ${f.id.padEnd(24)} ${String(f.line).padStart(4)}:${String(
f.col,
).padEnd(3)} ${f.preview}`,
);
return rows.join("\n");
}
function main() {
// Subcommands (positional, not flags).
const sub = process.argv[2];
if (sub === "install-prepush-hook") return installPrepushHook();
if (sub === "uninstall-prepush-hook") return uninstallPrepushHook();
const opts = buildOpts();
const input = readInput();
// Auto-redact mode: print redacted body to stdout, diff to stderr, exit 0.
const autoIds = arg("--auto-redact");
if (autoIds) {
const { body, diff, skipped } = applyRedactions(input, autoIds.split(","), opts);
process.stdout.write(body);
if (diff) process.stderr.write(diff + "\n");
if (skipped.length) {
process.stderr.write(
`\ngstack-redact: ${skipped.length} finding(s) could not be auto-redacted (structural) — edit manually:\n` +
skipped.map((f) => ` ${f.id} @ ${f.line}:${f.col}`).join("\n") +
"\n",
);
}
process.exit(0);
}
const result = scan(input, opts);
const code = exitCodeFor(result);
if (flag("--json")) {
process.stdout.write(JSON.stringify(result, null, 2) + "\n");
} else {
const vis = result.repoVisibility.toUpperCase();
process.stdout.write(`gstack-redact scan — repo ${vis}\n`);
if (result.oversize) {
process.stdout.write(" BLOCKED — input too large to scan safely (fail-closed)\n");
} else {
process.stdout.write(humanTable(result.findings) + "\n");
const { HIGH, MEDIUM, LOW, WARN } = result.counts;
process.stdout.write(` HIGH=${HIGH} MEDIUM=${MEDIUM} LOW=${LOW} WARN=${WARN}\n`);
}
}
process.exit(code);
}
main();

View File

@ -1,146 +0,0 @@
#!/usr/bin/env bun
/**
* gstack-redact-prepush — git pre-push hook that scans the diff being pushed for
* HIGH-severity credentials and blocks the push on a hit.
*
* THIS IS A GUARDRAIL, NOT ENFORCEMENT. `git push --no-verify` bypasses it, as
* does `GSTACK_REDACT_PREPUSH=skip`. It catches accidental credential pushes,
* the most common real-world leak. It does NOT scan history, binary/LFS/submodule
* files, or non-added lines. History scanning is /cso's job.
*
* Git pre-push interface: refs are read from STDIN, one per line:
* <local ref> <local sha> <remote ref> <remote sha>
* We scan the ADDED lines of <remote sha>..<local sha> per ref (what's being
* pushed). Special cases:
* - remote sha all-zeroes → new branch: diff against merge-base with the
* remote's default branch (fallback: scan all commits unique to local ref).
* - local sha all-zeroes → branch delete: nothing to scan, skip.
* - force-push → remote..local still gives the net new content.
*
* Behavior:
* - HIGH finding in added lines → print + exit 1 (block), for public AND private.
* - MEDIUM → warn (non-blocking). LOW/WARN → silent.
* - GSTACK_REDACT_PREPUSH=skip → log + exit 0 (escape valve).
*
* Installed/uninstalled via `gstack-redact install-prepush-hook` (see the
* gstack-redact CLI), which chains any pre-existing hook.
*/
import { spawnSync } from "child_process";
import * as fs from "fs";
import * as os from "os";
import * as path from "path";
import { scan, type Finding } from "../lib/redact-engine";
const ZERO = /^0+$/;
// The canonical empty-tree object; diffing against it yields all content as added.
const EMPTY_TREE = "4b825dc642cb6eb9a060e54bf8d69288fbee4904";
function git(args: string[]): string {
const r = spawnSync("git", args, { encoding: "utf8", maxBuffer: 64 * 1024 * 1024 });
return r.status === 0 ? (r.stdout ?? "") : "";
}
function defaultRemoteBranch(): string {
// origin/HEAD → origin/main, fall back to main/master.
const sym = git(["symbolic-ref", "refs/remotes/origin/HEAD"]).trim();
if (sym) return sym.replace("refs/remotes/", "");
for (const b of ["origin/main", "origin/master"]) {
if (git(["rev-parse", "--verify", b]).trim()) return b;
}
return "origin/main";
}
/** Return the added-line text for a ref update being pushed. */
function addedLinesFor(localSha: string, remoteSha: string): string {
let range: string;
if (ZERO.test(remoteSha)) {
// New branch: prefer what's unique to localSha vs the remote default branch.
// With no merge-base (e.g. no remote yet), diff against the empty tree so ALL
// branch content is scanned as added — fail-safe (scans more, never less).
const base = git(["merge-base", localSha, defaultRemoteBranch()]).trim();
range = base ? `${base}..${localSha}` : `${EMPTY_TREE}..${localSha}`;
} else {
// Existing branch (incl. force-push): net new content remote..local.
range = `${remoteSha}..${localSha}`;
}
// -U0: only changed lines; we keep lines starting with '+' (added), drop the
// +++ file header. Unified diff added lines start with a single '+'.
const diff = git(["diff", "--unified=0", "--no-color", range]);
const added: string[] = [];
for (const line of diff.split("\n")) {
if (line.startsWith("+") && !line.startsWith("+++")) {
added.push(line.slice(1));
}
}
return added.join("\n");
}
function logSkip(reason: string): void {
try {
const home = process.env.GSTACK_HOME || path.join(os.homedir(), ".gstack");
const dir = path.join(home, "security");
fs.mkdirSync(dir, { recursive: true });
fs.appendFileSync(
path.join(dir, "prepush-skip.jsonl"),
JSON.stringify({ ts: new Date().toISOString(), reason }) + "\n",
);
} catch {
// best-effort; never block a push because logging failed
}
}
function main() {
if ((process.env.GSTACK_REDACT_PREPUSH || "").toLowerCase() === "skip") {
logSkip(process.env.GSTACK_REDACT_PREPUSH_REASON || "env-skip");
process.stderr.write("gstack-redact-prepush: skipped via GSTACK_REDACT_PREPUSH=skip\n");
process.exit(0);
}
const stdin = fs.readFileSync(0, "utf8");
const refs = stdin
.split("\n")
.map((l) => l.trim())
.filter(Boolean)
.map((l) => l.split(/\s+/));
const allHigh: Finding[] = [];
let mediumCount = 0;
for (const [, localSha, , remoteSha] of refs) {
if (!localSha || ZERO.test(localSha)) continue; // branch delete → nothing pushed
const added = addedLinesFor(localSha, remoteSha || "0");
if (!added.trim()) continue;
// Visibility doesn't change HIGH behavior; pass private so nothing is treated
// as public-strict (HIGH blocks regardless either way).
const result = scan(added, { repoVisibility: "private" });
for (const f of result.findings) {
if (f.severity === "HIGH") allHigh.push(f);
else if (f.severity === "MEDIUM") mediumCount++;
}
}
if (mediumCount > 0) {
process.stderr.write(
`gstack-redact-prepush: ${mediumCount} MEDIUM finding(s) in pushed diff (PII/internal). ` +
"Not blocking. Review before this becomes public.\n",
);
}
if (allHigh.length > 0) {
process.stderr.write(
"\n⛔ gstack-redact-prepush BLOCKED the push — credential(s) in the pushed diff:\n\n",
);
for (const f of allHigh) {
process.stderr.write(` HIGH ${f.id} ${f.preview}\n`);
}
process.stderr.write(
"\nRotate the credential (a pushed secret is compromised) and remove it from the diff.\n" +
"This is a guardrail: `git push --no-verify` or `GSTACK_REDACT_PREPUSH=skip git push` bypass it.\n",
);
process.exit(1);
}
process.exit(0);
}
main();

View File

@ -46,17 +46,6 @@ _cleanup_skill_entry() {
fi
}
_link_root_skill_alias() {
local target="$SKILLS_DIR/_gstack-command"
[ -f "$INSTALL_DIR/SKILL.md" ] || return 0
[ -L "$target" ] && rm -f "$target"
mkdir -p "$target"
ln -snf "$INSTALL_DIR/SKILL.md" "$target/SKILL.md"
}
_link_root_skill_alias
# Discover skills (directories with SKILL.md, excluding meta dirs)
SKILL_COUNT=0
for skill_dir in "$INSTALL_DIR"/*/; do

View File

@ -1,44 +1,21 @@
#!/usr/bin/env bash
# gstack-settings-hook — manage Claude Code hooks in ~/.claude/settings.json
# gstack-settings-hook — add/remove SessionStart hooks in Claude Code settings.json
#
# Two shapes:
#
# 1. Legacy (SessionStart only — used by setup --team and gstack-uninstall):
# gstack-settings-hook add <cmd> # adds SessionStart hook
# gstack-settings-hook remove <cmd> # removes matching SessionStart hook
#
# 2. Schema-aware (plan-tune cathedral T3 — supports PreToolUse + PostToolUse):
# gstack-settings-hook add-event --event <SessionStart|PreToolUse|PostToolUse> \
# --command <cmd> --source <tag> [--matcher <regex>] [--timeout <s>]
# gstack-settings-hook remove-source --source <tag>
# gstack-settings-hook diff-event --event ... --command ... --source ... [--matcher ...]
# gstack-settings-hook rollback # restore latest backup
# gstack-settings-hook list-sources # show all gstack-tagged hook entries
#
# Every add-event/remove-source writes a backup to ~/.claude/settings.json.bak.<ts>
# before mutating (Codex correction — silent settings.json mutation is wrong).
#
# Dedup: legacy `add`/`remove` dedupe by the historical `gstack-session-update`
# substring. Schema-aware `add-event` dedupes by (event, matcher, _gstack_source) so
# multiple gstack registrations (plan-tune, ...) don't collide.
# Usage:
# gstack-settings-hook add <hook-command> # add SessionStart hook
# gstack-settings-hook remove <hook-command> # remove SessionStart hook
#
# Requires: bun (already a gstack hard dependency)
# Writes atomically: .tmp + rename to prevent corruption on crash/disk-full.
set -euo pipefail
ACTION="${1:-}"
HOOK_CMD="${2:-}"
SETTINGS_FILE="${GSTACK_SETTINGS_FILE:-$HOME/.claude/settings.json}"
if [ -z "$ACTION" ]; then
cat <<EOF >&2
Usage:
gstack-settings-hook add <hook-command> # legacy SessionStart add
gstack-settings-hook remove <hook-command> # legacy SessionStart remove
gstack-settings-hook add-event --event <name> --command <cmd> --source <tag> [--matcher <re>] [--timeout <s>]
gstack-settings-hook remove-source --source <tag>
gstack-settings-hook diff-event --event <name> --command <cmd> --source <tag> [--matcher <re>] [--timeout <s>]
gstack-settings-hook rollback
gstack-settings-hook list-sources
EOF
if [ -z "$ACTION" ] || [ -z "$HOOK_CMD" ]; then
echo "Usage: gstack-settings-hook {add|remove} <hook-command>" >&2
exit 1
fi
@ -47,239 +24,59 @@ if ! command -v bun >/dev/null 2>&1; then
exit 1
fi
backup_settings() {
if [ -f "$SETTINGS_FILE" ]; then
local ts
ts=$(date +%Y%m%d-%H%M%S)
cp "$SETTINGS_FILE" "$SETTINGS_FILE.bak.$ts"
echo "$SETTINGS_FILE.bak.$ts" > "$SETTINGS_FILE.bak-latest"
fi
}
# --- legacy SessionStart add/remove (backwards compat) -----------------
case "$ACTION" in
add)
HOOK_CMD="${2:-}"
if [ -z "$HOOK_CMD" ]; then
echo "Usage: gstack-settings-hook add <hook-command>" >&2
exit 1
fi
backup_settings
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" GSTACK_HOOK_CMD="$HOOK_CMD" bun -e '
const fs = require("fs");
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" GSTACK_HOOK_CMD="$HOOK_CMD" bun -e "
const fs = require('fs');
const settingsPath = process.env.GSTACK_SETTINGS_PATH;
const hookCmd = process.env.GSTACK_HOOK_CMD;
let settings = {};
try { settings = JSON.parse(fs.readFileSync(settingsPath, "utf8")); } catch {}
try { settings = JSON.parse(fs.readFileSync(settingsPath, 'utf8')); } catch {}
if (!settings.hooks) settings.hooks = {};
if (!settings.hooks.SessionStart) settings.hooks.SessionStart = [];
// Dedup: check if hook command already registered
const exists = settings.hooks.SessionStart.some(entry =>
entry.hooks && entry.hooks.some(h => h.command && h.command.includes("gstack-session-update"))
entry.hooks && entry.hooks.some(h => h.command && h.command.includes('gstack-session-update'))
);
if (!exists) {
settings.hooks.SessionStart.push({
hooks: [{ type: "command", command: hookCmd }]
hooks: [{ type: 'command', command: hookCmd }]
});
}
const tmp = settingsPath + ".tmp";
fs.writeFileSync(tmp, JSON.stringify(settings, null, 2) + "\n");
fs.renameSync(tmp, settingsPath);
' 2>/dev/null
;;
const tmp = settingsPath + '.tmp';
fs.writeFileSync(tmp, JSON.stringify(settings, null, 2) + '\n');
fs.renameSync(tmp, settingsPath);
" 2>/dev/null
;;
remove)
HOOK_CMD="${2:-}"
if [ -z "$HOOK_CMD" ]; then
echo "Usage: gstack-settings-hook remove <hook-command>" >&2
exit 1
fi
[ -f "$SETTINGS_FILE" ] || exit 1
backup_settings
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" bun -e '
const fs = require("fs");
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" bun -e "
const fs = require('fs');
const settingsPath = process.env.GSTACK_SETTINGS_PATH;
let settings = {};
try { settings = JSON.parse(fs.readFileSync(settingsPath, "utf8")); } catch { process.exit(0); }
try { settings = JSON.parse(fs.readFileSync(settingsPath, 'utf8')); } catch { process.exit(0); }
if (settings.hooks && settings.hooks.SessionStart) {
settings.hooks.SessionStart = settings.hooks.SessionStart.filter(entry =>
!(entry.hooks && entry.hooks.some(h => h.command && h.command.includes("gstack-session-update")))
!(entry.hooks && entry.hooks.some(h => h.command && h.command.includes('gstack-session-update')))
);
if (settings.hooks.SessionStart.length === 0) delete settings.hooks.SessionStart;
if (Object.keys(settings.hooks).length === 0) delete settings.hooks;
}
const tmp = settingsPath + ".tmp";
fs.writeFileSync(tmp, JSON.stringify(settings, null, 2) + "\n");
const tmp = settingsPath + '.tmp';
fs.writeFileSync(tmp, JSON.stringify(settings, null, 2) + '\n');
fs.renameSync(tmp, settingsPath);
' 2>/dev/null
" 2>/dev/null
;;
add-event|diff-event)
EVENT=""
COMMAND=""
SOURCE=""
MATCHER=""
TIMEOUT=""
shift
while [ $# -gt 0 ]; do
case "$1" in
--event) EVENT="$2"; shift 2 ;;
--command) COMMAND="$2"; shift 2 ;;
--source) SOURCE="$2"; shift 2 ;;
--matcher) MATCHER="$2"; shift 2 ;;
--timeout) TIMEOUT="$2"; shift 2 ;;
*) echo "unknown flag: $1" >&2; exit 1 ;;
esac
done
if [ -z "$EVENT" ] || [ -z "$COMMAND" ] || [ -z "$SOURCE" ]; then
echo "add-event/diff-event require --event, --command, --source" >&2
exit 1
fi
case "$EVENT" in
SessionStart|PreToolUse|PostToolUse|UserPromptSubmit|Stop|Notification) ;;
*) echo "invalid --event '$EVENT'; must be one of SessionStart|PreToolUse|PostToolUse|UserPromptSubmit|Stop|Notification" >&2; exit 1 ;;
esac
if [ "$ACTION" = "add-event" ]; then
backup_settings
fi
DIFF_ONLY=""
if [ "$ACTION" = "diff-event" ]; then DIFF_ONLY=1; fi
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" \
GSTACK_EVENT="$EVENT" \
GSTACK_COMMAND="$COMMAND" \
GSTACK_SOURCE="$SOURCE" \
GSTACK_MATCHER="$MATCHER" \
GSTACK_TIMEOUT="$TIMEOUT" \
GSTACK_DIFF_ONLY="$DIFF_ONLY" \
bun -e '
const fs = require("fs");
const settingsPath = process.env.GSTACK_SETTINGS_PATH;
const event = process.env.GSTACK_EVENT;
const cmd = process.env.GSTACK_COMMAND;
const source = process.env.GSTACK_SOURCE;
const matcher = process.env.GSTACK_MATCHER || "";
const timeoutRaw = process.env.GSTACK_TIMEOUT || "";
const diffOnly = process.env.GSTACK_DIFF_ONLY === "1";
let settings = {};
try { settings = JSON.parse(fs.readFileSync(settingsPath, "utf8")); } catch {}
const before = JSON.stringify(settings, null, 2);
if (!settings.hooks) settings.hooks = {};
if (!settings.hooks[event]) settings.hooks[event] = [];
const matchesEntry = (entry) => {
const sameMatcher = (entry.matcher || "") === matcher;
const sameSource = entry._gstack_source === source;
return sameMatcher && sameSource;
};
let existing = settings.hooks[event].find(matchesEntry);
const hookEntry = { type: "command", command: cmd };
if (timeoutRaw) {
const n = Number(timeoutRaw);
if (Number.isFinite(n) && n > 0) hookEntry.timeout = n;
}
if (existing) {
existing.hooks = [hookEntry];
} else {
const newEntry = { _gstack_source: source, hooks: [hookEntry] };
if (matcher) newEntry.matcher = matcher;
settings.hooks[event].push(newEntry);
}
const after = JSON.stringify(settings, null, 2);
if (diffOnly) {
console.log("--- BEFORE");
console.log(before);
console.log("--- AFTER");
console.log(after);
process.exit(0);
}
const tmp = settingsPath + ".tmp";
fs.writeFileSync(tmp, after + "\n");
fs.renameSync(tmp, settingsPath);
console.log("OK: " + event + " hook registered (source: " + source + ")");
'
;;
remove-source)
SOURCE=""
shift
while [ $# -gt 0 ]; do
case "$1" in
--source) SOURCE="$2"; shift 2 ;;
*) echo "unknown flag: $1" >&2; exit 1 ;;
esac
done
if [ -z "$SOURCE" ]; then
echo "remove-source requires --source <tag>" >&2
exit 1
fi
[ -f "$SETTINGS_FILE" ] || exit 0
backup_settings
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" GSTACK_SOURCE="$SOURCE" bun -e '
const fs = require("fs");
const settingsPath = process.env.GSTACK_SETTINGS_PATH;
const source = process.env.GSTACK_SOURCE;
let settings = {};
try { settings = JSON.parse(fs.readFileSync(settingsPath, "utf8")); } catch { process.exit(0); }
if (!settings.hooks) { process.exit(0); }
let removed = 0;
for (const event of Object.keys(settings.hooks)) {
const before = settings.hooks[event].length;
settings.hooks[event] = settings.hooks[event].filter(entry => entry._gstack_source !== source);
removed += before - settings.hooks[event].length;
if (settings.hooks[event].length === 0) delete settings.hooks[event];
}
if (Object.keys(settings.hooks).length === 0) delete settings.hooks;
const tmp = settingsPath + ".tmp";
fs.writeFileSync(tmp, JSON.stringify(settings, null, 2) + "\n");
fs.renameSync(tmp, settingsPath);
console.log("OK: removed " + removed + " hook entry/entries tagged source=" + source);
'
;;
rollback)
if [ ! -f "$SETTINGS_FILE.bak-latest" ]; then
echo "rollback: no backup pointer at $SETTINGS_FILE.bak-latest" >&2
exit 1
fi
LATEST=$(cat "$SETTINGS_FILE.bak-latest")
if [ ! -f "$LATEST" ]; then
echo "rollback: pointer references missing backup $LATEST" >&2
exit 1
fi
cp "$LATEST" "$SETTINGS_FILE"
echo "OK: restored $SETTINGS_FILE from $LATEST"
;;
list-sources)
[ -f "$SETTINGS_FILE" ] || { echo "(no settings file)"; exit 0; }
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" bun -e '
const fs = require("fs");
let settings = {};
try { settings = JSON.parse(fs.readFileSync(process.env.GSTACK_SETTINGS_PATH, "utf8")); } catch { process.exit(0); }
const hooks = settings.hooks || {};
let any = false;
for (const event of Object.keys(hooks)) {
for (const entry of hooks[event]) {
if (entry._gstack_source) {
any = true;
console.log(event + "\t" + entry._gstack_source + "\t" + (entry.matcher || "(no matcher)"));
}
}
}
if (!any) console.log("(no gstack-tagged hooks)");
'
;;
*)
echo "Unknown action: $ACTION" >&2
echo "Unknown action: $ACTION (expected add or remove)" >&2
exit 1
;;
esac

View File

@ -64,14 +64,6 @@ fi
# 4. Fallback to basename only when there is no usable override, repo, or cache.
SLUG="${SLUG:-$(sanitize_slug "$(basename "$PROJECT_DIR")")}"
# 4b. Unconditional final sanitize before the value is echoed into `eval`/`source`
# output or written to cache. Every source above (override, remote, basename,
# and the cache read at step 3) already runs sanitize_slug, but filtering here
# too keeps the [a-zA-Z0-9._-] invariant promised in the header on every path —
# preserving the defense against a poisoned ~/.gstack/slug-cache/<key> injecting
# shell into `eval "$(gstack-slug)"` — and heals such a cache on the next write.
SLUG=$(sanitize_slug "${SLUG:-}")
# 5. Cache the slug for future sessions (atomic write, fail silently)
if [[ -n "$SLUG" ]]; then
mkdir -p "$CACHE_DIR" 2>/dev/null || true

View File

@ -107,13 +107,7 @@ BATCH="$BATCH]"
[ "$COUNT" -eq 0 ] && exit 0
# ─── POST to edge function ───────────────────────────────────
# Create response file atomically. If mktemp fails, refuse to continue rather
# than fall back to a predictable $$-based path (race + overwrite footgun).
RESP_FILE="$(mktemp "${TMPDIR:-/tmp}/gstack-sync-XXXXXX")" || {
echo "gstack-telemetry-sync: mktemp failed — skipping this run" >&2
exit 0
}
trap 'rm -f "$RESP_FILE"' EXIT
RESP_FILE="$(mktemp /tmp/gstack-sync-XXXXXX 2>/dev/null || echo "/tmp/gstack-sync-$$")"
HTTP_CODE="$(curl -s -w '%{http_code}' --max-time 10 \
-X POST "${SUPABASE_URL}/functions/v1/telemetry-ingest" \
-H "Content-Type: application/json" \

View File

@ -29,13 +29,11 @@ if [ ! -f "$TIMELINE_FILE" ]; then
exit 0
fi
cat "$TIMELINE_FILE" 2>/dev/null | GSTACK_TIMELINE_SINCE="$SINCE" GSTACK_TIMELINE_BRANCH="$BRANCH" GSTACK_TIMELINE_LIMIT="$LIMIT" bun -e "
cat "$TIMELINE_FILE" 2>/dev/null | bun -e "
const lines = (await Bun.stdin.text()).trim().split('\n').filter(Boolean);
const since = process.env.GSTACK_TIMELINE_SINCE || '';
const branch = process.env.GSTACK_TIMELINE_BRANCH || '';
const limitRaw = process.env.GSTACK_TIMELINE_LIMIT || '20';
const parsedLimit = Number.parseInt(limitRaw, 10);
const limit = Number.isSafeInteger(parsedLimit) && parsedLimit > 0 ? parsedLimit : 20;
const since = '${SINCE}';
const branch = '${BRANCH}';
const limit = ${LIMIT};
let sinceMs = 0;
if (since) {

View File

@ -232,10 +232,6 @@ SETTINGS_HOOK="$(dirname "$0")/gstack-settings-hook"
SESSION_UPDATE="$(dirname "$0")/gstack-session-update"
if [ -x "$SETTINGS_HOOK" ]; then
"$SETTINGS_HOOK" remove "$SESSION_UPDATE" 2>/dev/null && REMOVED+=("SessionStart hook") || true
# Cathedral T8 cleanup: also remove plan-tune PreToolUse + PostToolUse hooks.
if "$SETTINGS_HOOK" remove-source --source plan-tune-cathedral 2>/dev/null | grep -q "removed [1-9]"; then
REMOVED+=("plan-tune cathedral hooks")
fi
fi
# ─── Remove global state ────────────────────────────────────

View File

@ -1,212 +0,0 @@
#!/usr/bin/env bun
// gstack-version-bump — deterministic version-state classifier + writer for /ship.
//
// Extracted from ship Step 12 prose (v2 plan T9, hybrid CLI extraction). The
// idempotency classification and the dual-write to VERSION + package.json are
// pure deterministic logic; running them as tested code removes the single
// worst /ship footgun — re-bumping an already-shipped branch — from prose the
// agent could skip or misread when the step lives in a lazy-loaded section.
//
// What STAYS agent judgment (NOT here): the bump-LEVEL decision (micro/patch vs
// minor/major, which may AskUserQuestion on feature signals) and the queue
// collision prompt. The slot pick itself is bin/gstack-next-version. This CLI
// only answers "what state am I in?" and "write this exact version".
//
// Subcommands:
// classify --base <branch> [--version-path <p>]
// Compares VERSION vs origin/<base>:VERSION vs package.json.version.
// Emits JSON: { state, baseVersion, currentVersion, pkgVersion, pkgExists }
// state ∈ FRESH | ALREADY_BUMPED | DRIFT_STALE_PKG | DRIFT_UNEXPECTED
// Exit 0 on a decidable state (incl. DRIFT_UNEXPECTED — it's a real state
// the caller must handle), exit 2 on bad args / unresolvable base.
//
// write --version <X.Y.Z.W> [--version-path <p>]
// Validates the 4-digit pattern, writes VERSION + package.json.version.
// Use for the FRESH bump (or an approved queue rebump). Exit 3 on a
// half-write (VERSION written, package.json failed) so the caller knows
// drift exists; the next classify() will report DRIFT_STALE_PKG.
//
// repair [--version-path <p>]
// DRIFT_STALE_PKG path: sync package.json.version to the current VERSION
// file. No bump. Validates the VERSION pattern first.
//
// Contract: classify NEVER writes. write/repair mutate VERSION + package.json
// only. No git mutation, no network. Mirrors gstack-next-version's reader/writer
// split so /ship composes them.
import { existsSync, readFileSync, writeFileSync } from "node:fs";
import { execFileSync } from "node:child_process";
import { join } from "node:path";
const VERSION_RE = /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/;
const DEFAULT = "0.0.0.0";
type State = "FRESH" | "ALREADY_BUMPED" | "DRIFT_STALE_PKG" | "DRIFT_UNEXPECTED";
function fail(msg: string, code = 2): never {
process.stderr.write(`gstack-version-bump: ${msg}\n`);
process.exit(code);
}
function argVal(args: string[], flag: string): string | undefined {
const i = args.indexOf(flag);
return i >= 0 && i + 1 < args.length ? args[i + 1] : undefined;
}
/** Resolve the VERSION file path: --version-path, else .gstack/version-path, else "VERSION". */
function resolveVersionPath(cwd: string, explicit?: string): string {
if (explicit) return join(cwd, explicit);
const pin = join(cwd, ".gstack", "version-path");
if (existsSync(pin)) {
const p = readFileSync(pin, "utf-8").trim();
if (p) return join(cwd, p);
}
return join(cwd, "VERSION");
}
function readVersionFile(p: string): string {
try {
const v = readFileSync(p, "utf-8").replace(/[\r\n\s]/g, "");
return v || DEFAULT;
} catch {
return DEFAULT;
}
}
/** package.json version + existence, parsed without spawning node. */
function readPkgVersion(cwd: string): { exists: boolean; version: string } {
const pkgPath = join(cwd, "package.json");
if (!existsSync(pkgPath)) return { exists: false, version: "" };
let raw: string;
try {
raw = readFileSync(pkgPath, "utf-8");
} catch {
return { exists: true, version: "" };
}
let parsed: unknown;
try {
parsed = JSON.parse(raw);
} catch {
fail("package.json is not valid JSON. Fix the file before re-running /ship.", 2);
}
const version = (parsed as { version?: unknown })?.version;
return { exists: true, version: typeof version === "string" ? version : "" };
}
function writePkgVersion(cwd: string, version: string): void {
const pkgPath = join(cwd, "package.json");
const raw = readFileSync(pkgPath, "utf-8");
const parsed = JSON.parse(raw) as Record<string, unknown>;
parsed.version = version;
writeFileSync(pkgPath, JSON.stringify(parsed, null, 2) + "\n");
}
function baseVersion(cwd: string, base: string, versionRel: string): string {
// Verify the base ref resolves, mirroring the Step 12 guard.
try {
execFileSync("git", ["rev-parse", "--verify", `origin/${base}`], { cwd, stdio: "ignore" });
} catch {
fail(`Unable to resolve origin/${base}. Run 'git fetch origin' or verify the base branch exists.`, 2);
}
try {
const out = execFileSync("git", ["show", `origin/${base}:${versionRel}`], { cwd }).toString();
const v = out.replace(/[\r\n\s]/g, "");
return v || DEFAULT;
} catch {
// VERSION absent on base (new repo / new file) → treat as 0.0.0.0.
return DEFAULT;
}
}
function classifyState(current: string, base: string, pkgExists: boolean, pkgVersion: string): State {
if (current === base) {
// VERSION unchanged vs base. A diverging package.json means someone hand-edited
// package.json bypassing /ship — unsafe to guess which is authoritative.
if (pkgExists && pkgVersion && pkgVersion !== current) return "DRIFT_UNEXPECTED";
return "FRESH";
}
// VERSION already moved past base.
if (pkgExists && pkgVersion && pkgVersion !== current) return "DRIFT_STALE_PKG";
return "ALREADY_BUMPED";
}
function cmdClassify(args: string[], cwd: string): void {
const base = argVal(args, "--base");
if (!base) fail("classify requires --base <branch>", 2);
const versionPath = resolveVersionPath(cwd, argVal(args, "--version-path"));
const versionRel = argVal(args, "--version-path") ?? "VERSION";
const current = readVersionFile(versionPath);
const baseV = baseVersion(cwd, base!, versionRel);
const pkg = readPkgVersion(cwd);
const state = classifyState(current, baseV, pkg.exists, pkg.version);
process.stdout.write(
JSON.stringify({
state,
baseVersion: baseV,
currentVersion: current,
pkgVersion: pkg.version || null,
pkgExists: pkg.exists,
}) + "\n",
);
// DRIFT_UNEXPECTED is a real, decidable state — the caller stops on it, but the
// classification itself succeeded, so exit 0. (Bad args / unresolvable base are
// the only exit-2 cases.)
}
function cmdWrite(args: string[], cwd: string): void {
const version = argVal(args, "--version");
if (!version) fail("write requires --version <X.Y.Z.W>", 2);
if (!VERSION_RE.test(version!)) {
fail(`NEW_VERSION (${version}) does not match MAJOR.MINOR.PATCH.MICRO. Aborting.`, 2);
}
const versionPath = resolveVersionPath(cwd, argVal(args, "--version-path"));
writeFileSync(versionPath, version + "\n");
if (existsSync(join(cwd, "package.json"))) {
try {
writePkgVersion(cwd, version!);
} catch {
fail(
"failed to update package.json. VERSION was written but package.json is now stale. " +
"Re-run — classify will report DRIFT_STALE_PKG and repair will sync it.",
3,
);
}
}
process.stdout.write(JSON.stringify({ wrote: version, packageJson: existsSync(join(cwd, "package.json")) }) + "\n");
}
function cmdRepair(args: string[], cwd: string): void {
const versionPath = resolveVersionPath(cwd, argVal(args, "--version-path"));
const current = readVersionFile(versionPath);
if (!VERSION_RE.test(current)) {
fail(
`VERSION file contents (${current}) do not match MAJOR.MINOR.PATCH.MICRO. ` +
"Refusing to propagate invalid semver into package.json. Fix VERSION, then re-run /ship.",
2,
);
}
if (!existsSync(join(cwd, "package.json"))) {
fail("repair: no package.json to sync.", 2);
}
try {
writePkgVersion(cwd, current);
} catch {
fail("drift repair failed — could not update package.json.", 3);
}
process.stdout.write(JSON.stringify({ repaired: current }) + "\n");
}
// Exported for unit tests (pure logic, no I/O).
export { classifyState, VERSION_RE, type State };
if (import.meta.main) {
const [sub, ...rest] = process.argv.slice(2);
const cwd = process.cwd();
switch (sub) {
case "classify": cmdClassify(rest, cwd); break;
case "write": cmdWrite(rest, cwd); break;
case "repair": cmdRepair(rest, cwd); break;
default:
fail("usage: gstack-version-bump <classify|write|repair> [flags]", 2);
}
}

View File

@ -2,7 +2,13 @@
name: browse
preamble-tier: 1
version: 1.1.0
description: Fast headless browser for QA testing and site dogfooding. (gstack)
description: |
Fast headless browser for QA testing and site dogfooding. Navigate any URL, interact with
elements, verify page state, diff before/after actions, take annotated screenshots, check
responsive layouts, test forms and uploads, handle dialogs, and assert element states.
~100ms per command. Use when you need to test a feature, verify a deployment, dogfood a
user flow, or file a bug with evidence. Use when asked to "open in browser", "test the
site", "take a screenshot", or "dogfood this". (gstack)
triggers:
- browse a page
- headless browser
@ -16,16 +22,6 @@ allowed-tools:
<!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly -->
<!-- Regenerate: bun run gen:skill-docs -->
## When to invoke this skill
Navigate any URL, interact with
elements, verify page state, diff before/after actions, take annotated screenshots, check
responsive layouts, test forms and uploads, handle dialogs, and assert element states.
~100ms per command. Use when you need to test a feature, verify a deployment, dogfood a
user flow, or file a bug with evidence. Use when asked to "open in browser", "test the
site", "take a screenshot", or "dogfood this".
## Preamble (run first)
```bash
@ -61,7 +57,7 @@ _QUESTION_TUNING=$(~/.claude/skills/gstack/bin/gstack-config get question_tuning
echo "QUESTION_TUNING: $_QUESTION_TUNING"
mkdir -p ~/.gstack/analytics
if [ "$_TEL" != "off" ]; then
echo '{"skill":"browse","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(_repo=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null | tr -cd 'a-zA-Z0-9._-'); echo "${_repo:-unknown}")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
echo '{"skill":"browse","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
fi
for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do
if [ -f "$_PF" ]; then
@ -103,19 +99,6 @@ _CHECKPOINT_MODE=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_mode
_CHECKPOINT_PUSH=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_push 2>/dev/null || echo "false")
echo "CHECKPOINT_MODE: $_CHECKPOINT_MODE"
echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH"
# Plan-mode hint for skills like /spec that branch behavior on plan-mode state.
# Claude Code exposes plan mode via system reminders; we detect best-effort
# from CLAUDE_PLAN_FILE (set by the harness when plan mode is active) and
# fall back to "inactive". Codex hosts and Claude execution mode both end up
# inactive, which is the safe default (defaults to file+execute pipeline).
if [ -n "${CLAUDE_PLAN_FILE:-}${GSTACK_PLAN_MODE_FORCE:-}" ]; then
export GSTACK_PLAN_MODE="active"
elif [ "${GSTACK_PLAN_MODE:-}" = "active" ]; then
export GSTACK_PLAN_MODE="active"
else
export GSTACK_PLAN_MODE="inactive"
fi
echo "GSTACK_PLAN_MODE: $GSTACK_PLAN_MODE"
[ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true
```
@ -171,7 +154,7 @@ Only run `open` if yes. Always run `touch`.
If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: ask telemetry once via AskUserQuestion:
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code or file paths. Your repo name is recorded locally only and stripped before any upload.
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code, file paths, or repo names.
Options:
- A) Help gstack get better! (recommended)
@ -247,7 +230,6 @@ Key routing rules:
- Ship/deploy/PR → invoke /ship or /land-and-deploy
- Save progress → invoke /context-save
- Resume context → invoke /context-restore
- Author a backlog-ready spec/issue → invoke /spec
```
Then commit the change: `git add CLAUDE.md && git commit -m "chore: add gstack skill routing rules to CLAUDE.md"`
@ -921,7 +903,6 @@ $B prettyscreenshot --cleanup --scroll-to ".pricing" --width 1440 ~/Desktop/hero
| `disconnect` | Disconnect headed browser, return to headless mode |
| `focus [@ref]` | Bring headed browser window to foreground (macOS) |
| `handoff [message]` | Open visible Chrome at current page for user takeover |
| `memory [--json]` | Snapshot Bun heap + per-tab JS heap + Chromium process tree + bounded buffer sizes. JSON output with --json. |
| `restart` | Restart server |
| `resume` | Re-snapshot after user takeover, return control to AI |
| `state save|load <name>` | Save/load browser state (cookies + URLs) |

View File

@ -18,12 +18,9 @@
import { chromium, type Browser, type BrowserContext, type BrowserContextOptions, type Page, type Locator, type Cookie } from 'playwright';
import { writeSecureFile, mkdirSecure } from './file-permissions';
import { addConsoleEntry, addNetworkEntry, addDialogEntry, networkBuffer, type DialogEntry } from './buffers';
import { emitActivity } from './activity';
import { validateNavigationUrl } from './url-validation';
import { TabSession, type RefEntry } from './tab-session';
import { resolveChromiumProfile, cleanSingletonLocks } from './config';
import { withCdpSession } from './cdp-bridge';
import type { MemorySnapshot, MemoryStructureStats, MemoryTabSnapshot, MemoryProcess } from './memory-snapshot';
/**
* Detect whether GSTACK_CHROMIUM_PATH points at a custom Chromium build that
@ -43,83 +40,6 @@ export function isCustomChromium(): boolean {
return p.includes('GBrowser') || p.includes('gbrowser');
}
/**
* Decide whether Playwright should request Chromium's sandbox.
*
* Returns false on Windows (BunNodeChromium chain breaks the sandbox,
* GitHub #276) and on Linux under root / CI / container (sandbox needs
* unprivileged user namespaces, which are missing for root and typically
* disabled in containers).
*
* When false, Playwright auto-adds --no-sandbox to the launch args the
* desired behavior in those environments. When true, Playwright does NOT
* add --no-sandbox, which keeps Chromium's "unsupported command-line flag"
* yellow infobar from appearing on every headed launch.
*
* The headless launch path also pushes an explicit '--no-sandbox' into args
* when CI/CONTAINER/root is set; that push is now defensively redundant
* (Playwright will add it anyway when this returns false) and harmless.
*/
export function shouldEnableChromiumSandbox(): boolean {
if (process.platform === 'win32') return false;
// Explicit user override for Ubuntu/AppArmor and similar environments where
// unprivileged Chromium sandboxing is blocked even for normal users (the
// sandbox needs unprivileged user namespaces that the host policy denies,
// so /qa hangs without --no-sandbox). Setting GSTACK_CHROMIUM_NO_SANDBOX=1
// forces the sandbox off without changing the default for everyone else.
// See #1562.
if (process.env.GSTACK_CHROMIUM_NO_SANDBOX === '1') return false;
const isRoot = typeof process.getuid === 'function' && process.getuid() === 0;
return !(process.env.CI || process.env.CONTAINER || isRoot);
}
/**
* Resolve why the underlying Chromium ChildProcess is going away.
*
* The 'disconnected' Playwright event fires before the child process emits
* its own 'exit' in most cases, so .exitCode is null at that moment. Wait
* briefly (capped at 1s) for the exit then read .exitCode + .signalCode:
*
* exitCode === 0 && no signal 'clean' (user Cmd+Q, normal shutdown)
* anything else 'crash' (signal-kill, SIGSEGV, OOM, non-zero exit)
*
* Process supervisors (gbrowser's gbd HealthMonitor in cmd/gbd/health.go)
* read our exit code to decide whether to restart. The two callers in this
* file ride on top of this: a 'clean' result exits with code 0 (gbd skips
* restart, treats as user-intent); a 'crash' result keeps the existing
* per-path exit semantics (launch1, launchHeaded2, handoff1) and gbd
* restarts on backoff.
*/
export async function resolveDisconnectCause(browser: Browser | null): Promise<'clean' | 'crash'> {
const proc = browser?.process();
if (proc && proc.exitCode === null && proc.signalCode === null) {
await new Promise<void>((resolve) => {
const timer = setTimeout(resolve, 1000);
proc.once('exit', () => {
clearTimeout(timer);
resolve();
});
});
}
return proc?.exitCode === 0 && proc?.signalCode == null ? 'clean' : 'crash';
}
/**
* Headless `launch()` disconnect handler. Exits 0 on clean user-quit, 1 on
* crash. Inlined into the launch() body via a one-line dispatch so
* browser-manager's flow stays grep-friendly.
*/
export async function handleChromiumDisconnect(browser: Browser | null): Promise<void> {
const cause = await resolveDisconnectCause(browser);
if (cause === 'clean') {
console.error('[browse] Chromium closed cleanly (user-initiated quit). Server exiting (0).');
process.exit(0);
}
console.error('[browse] FATAL: Chromium process crashed or was killed. Server exiting (1).');
console.error('[browse] Console/network logs flushed to .gstack/browse-*.log');
process.exit(1);
}
export type { RefEntry };
// Re-export TabSession for consumers
@ -197,60 +117,11 @@ export class BrowserManager {
private connectionMode: 'launched' | 'headed' = 'launched';
private intentionalDisconnect = false;
// ─── Tab Count Guardrail (D5 + Codex single-tab flag) ───────
// Idempotent threshold trackers: each guardrail fires exactly once per
// upward crossing of its threshold and re-arms when the tab count drops
// back below. Pre-guardrail, nothing tracked tab count growth and a
// user could accumulate hundreds of tabs (each holding 50300 MB of
// Chromium-side RSS) without warning until the OS OOM-killer fired.
// The toast UX lives in the sidebar (extension/sidepanel.js); the
// server-side responsibility is the audit-trail activity entry that
// appears in the activity feed even when the sidebar is closed.
private static readonly TAB_GUARDRAIL_SOFT = 50;
private static readonly TAB_GUARDRAIL_HARD = 200;
private tabGuardrailSoftHit = false;
private tabGuardrailHardHit = false;
/**
* Called from context.on('page') after a new tab is tracked. Emits at
* most one activity entry per upward crossing of each threshold.
*/
private checkTabGuardrails(): void {
const total = this.pages.size;
if (!this.tabGuardrailSoftHit && total >= BrowserManager.TAB_GUARDRAIL_SOFT) {
this.tabGuardrailSoftHit = true;
const msg = `Tab count crossed ${BrowserManager.TAB_GUARDRAIL_SOFT} (now ${total}). Consider closing unused tabs — each Chromium tab holds 50300 MB.`;
console.warn(`[browse] ${msg}`);
emitActivity({ type: 'error', command: 'tab-guardrail', error: msg, tabs: total });
}
if (!this.tabGuardrailHardHit && total >= BrowserManager.TAB_GUARDRAIL_HARD) {
this.tabGuardrailHardHit = true;
const msg = `Tab count crossed ${BrowserManager.TAB_GUARDRAIL_HARD} (now ${total}). OOM risk imminent. Open the sidebar to see top RAM consumers.`;
console.error(`[browse] ${msg}`);
emitActivity({ type: 'error', command: 'tab-guardrail', error: msg, tabs: total });
}
}
/** Called from page.on('close') so the guardrails re-arm. */
private recheckTabGuardrailsOnClose(): void {
const total = this.pages.size;
if (this.tabGuardrailSoftHit && total < BrowserManager.TAB_GUARDRAIL_SOFT) {
this.tabGuardrailSoftHit = false;
}
if (this.tabGuardrailHardHit && total < BrowserManager.TAB_GUARDRAIL_HARD) {
this.tabGuardrailHardHit = false;
}
}
// Called when the headed browser disconnects without intentional teardown
// (user closed the window). Wired up by server.ts to run full cleanup
// (sidebar-agent, state file, profile locks) before exiting with code 2.
// Returns void or a Promise; rejections are caught and fall back to exit(2).
// `exitCode` is the resolved process exit code from the disconnect cause:
// 0 on clean user-initiated quit (e.g., Cmd+Q on headed Chromium), 2 on
// crash/signal-kill. Callers (server.ts) forward it to their shutdown
// pipeline so process supervisors (gbrowser's gbd) read the right signal.
public onDisconnect: ((exitCode?: number) => void | Promise<void>) | null = null;
public onDisconnect: (() => void | Promise<void>) | null = null;
getConnectionMode(): 'launched' | 'headed' { return this.connectionMode; }
@ -355,16 +226,12 @@ export class BrowserManager {
}
if (extensionsDir) {
// Skip --load-extension when running against a custom Chromium build that
// already bakes the extension in (e.g., GBrowser / GStack Browser.app).
// Loading it twice causes a ServiceWorkerState::SetWorkerId DCHECK crash.
if (!isCustomChromium()) {
launchArgs.push(
`--disable-extensions-except=${extensionsDir}`,
`--load-extension=${extensionsDir}`,
);
}
launchArgs.push('--window-position=-9999,-9999', '--window-size=1,1');
launchArgs.push(
`--disable-extensions-except=${extensionsDir}`,
`--load-extension=${extensionsDir}`,
'--window-position=-9999,-9999',
'--window-size=1,1',
);
useHeadless = false; // extensions require headed mode; off-screen window simulates headless
console.log(`[browse] Extensions loaded from: ${extensionsDir}`);
}
@ -373,25 +240,17 @@ export class BrowserManager {
headless: useHeadless,
// On Windows, Chromium's sandbox fails when the server is spawned through
// the Bun→Node process chain (GitHub #276). Disable it — local daemon
// browsing user-specified URLs has marginal sandbox benefit. Also disabled
// on Linux root/CI/container, where the sandbox requires unprivileged user
// namespaces that aren't available.
chromiumSandbox: shouldEnableChromiumSandbox(),
// browsing user-specified URLs has marginal sandbox benefit.
chromiumSandbox: process.platform !== 'win32',
...(launchArgs.length > 0 ? { args: launchArgs } : {}),
...(this.proxyConfig ? { proxy: this.proxyConfig } : {}),
});
// Chromium disconnect → distinguish clean user-quit from crash. Both
// events look identical to Playwright (one 'disconnected' fires), but
// the underlying ChildProcess exit code separates them:
// exitCode === 0 → clean quit (user Cmd+Q on macOS, normal shutdown)
// exitCode !== 0 → crash, signal-kill, or OOM
// Process supervisors (gbrowser's gbd) consume our exit code: code 0
// means "user wanted this, don't restart"; non-zero means "crash, please
// bring me back." Without this distinction every Cmd+Q gets treated as
// a crash and the user-visible window keeps respawning.
// Chromium crash → exit with clear message
this.browser.on('disconnected', () => {
void handleChromiumDisconnect(this.browser);
console.error('[browse] FATAL: Chromium process crashed or was killed. Server exiting.');
console.error('[browse] Console/network logs flushed to .gstack/browse-*.log');
process.exit(1);
});
const contextOptions: BrowserContextOptions = {
@ -556,10 +415,6 @@ export class BrowserManager {
this.context = await chromium.launchPersistentContext(userDataDir, {
headless: false,
// Match the sandbox policy used by launch() above. Without this,
// Playwright auto-adds --no-sandbox on every headed launch and the user
// sees Chromium's "unsupported command-line flag" yellow infobar.
chromiumSandbox: shouldEnableChromiumSandbox(),
args: launchArgs,
viewport: null, // Use browser's default viewport (real window size)
userAgent: this.customUserAgent || customUA,
@ -668,7 +523,6 @@ export class BrowserManager {
// Inject indicator on the new tab
page.evaluate(indicatorScript).catch(() => {});
console.log(`[browse] New tab detected (id=${id}, total=${this.pages.size})`);
this.checkTabGuardrails();
});
// Persistent context opens a default page — adopt it instead of creating a new one
@ -688,45 +542,32 @@ export class BrowserManager {
await this.newTab();
}
// Browser disconnect handler — distinguish user Cmd+Q from real crash.
// Clean exit (Chromium exit code 0) → process.exit(0) so process
// supervisors (gbrowser's gbd) treat it as user intent and skip the
// restart loop. Crash → process.exit(2) preserves the legacy headed
// semantics that's distinct from launch()'s code 1.
// Always calls onDisconnect() first to trigger full shutdown (kill
// sidebar-agent, save session, clean profile locks + state file) so
// crashes don't strand resources either.
// Browser disconnect handler — exit code 2 distinguishes from crashes (1).
// Calls onDisconnect() to trigger full shutdown (kill sidebar-agent, save
// session, clean profile locks + state file) before exit. Falls back to
// direct process.exit(2) if no callback is wired up, or if the callback
// throws/rejects — never leave the process running with a dead browser.
if (this.browser) {
this.browser.on('disconnected', () => {
if (this.intentionalDisconnect) return;
const browserRef = this.browser;
void (async () => {
const cause = await resolveDisconnectCause(browserRef);
const exitCode = cause === 'clean' ? 0 : 2;
if (cause === 'clean') {
console.error('[browse] Real browser closed cleanly (user-initiated quit). Server exiting (0).');
} else {
console.error('[browse] Real browser disconnected (crash or kill). Server exiting (2).');
console.error('[browse] Run `$B connect` to reconnect.');
console.error('[browse] Real browser disconnected (user closed or crashed).');
console.error('[browse] Run `$B connect` to reconnect.');
if (!this.onDisconnect) {
process.exit(2);
return;
}
try {
const result = this.onDisconnect();
if (result && typeof (result as Promise<void>).catch === 'function') {
(result as Promise<void>).catch((err) => {
console.error('[browse] onDisconnect rejected:', err);
process.exit(2);
});
}
if (!this.onDisconnect) {
process.exit(exitCode);
return;
}
try {
const result = this.onDisconnect(exitCode);
if (result && typeof (result as Promise<void>).catch === 'function') {
(result as Promise<void>).catch((err) => {
console.error('[browse] onDisconnect rejected:', err);
process.exit(exitCode);
});
}
// onDisconnect is responsible for exit on the success path.
} catch (err) {
console.error('[browse] onDisconnect threw:', err);
process.exit(exitCode);
}
})();
} catch (err) {
console.error('[browse] onDisconnect threw:', err);
process.exit(2);
}
});
}
@ -1053,116 +894,6 @@ export class BrowserManager {
}
}
/**
* Diagnostic for `$B memory` and the /memory endpoint.
*
* Collects:
* - Bun process memory (cross-platform, accurate, no shelling).
* - Per-tab JS heap via CDP Performance.getMetrics the most portable
* per-tab signal CDP exposes. Misses native/GPU/Skia/cache memory
* (Codex flag on the eng-review; see follow-up TODO "native/GPU
* memory breakdown").
* - Chromium process tree via SystemInfo.getProcessInfo PID + type
* + CPU time. Per-process RSS is NOT exposed via CDP and the eng
* review (D2 USE_CDP) explicitly chose CDP over shelling to `ps`,
* so RSS columns are absent and `notes[]` says why.
*
* `structures` is passed in by the caller (read-commands / server) so
* browser-manager doesn't take a hard dep on every buffer-owning module.
*/
async getMemorySnapshot(structures: MemoryStructureStats): Promise<MemorySnapshot> {
const bunMem = process.memoryUsage();
const notes: string[] = [];
// Per-tab JS heap. Lazy: only the pages we already track. A target
// that died mid-snapshot is omitted, never throws.
const tabs: MemoryTabSnapshot[] = [];
for (const [id, page] of this.pages) {
try {
const url = (() => { try { return page.url(); } catch { return ''; } })();
const title = await page.title().catch(() => '');
const metrics = await withCdpSession(page, async (session) => {
await session.send('Performance.enable').catch(() => undefined);
const result = await session.send('Performance.getMetrics');
return ((result as { metrics?: Array<{ name: string; value: number }> }).metrics) ?? [];
});
const mm: Record<string, number> = {};
for (const m of metrics) mm[m.name] = m.value;
tabs.push({
id,
url,
title,
jsHeapUsed: mm.JSHeapUsedSize ?? 0,
jsHeapTotal: mm.JSHeapTotalSize ?? 0,
documents: mm.Documents ?? 0,
nodes: mm.Nodes ?? 0,
listeners: mm.JSEventListeners ?? 0,
});
} catch {
// Target died or CDP unavailable mid-snapshot — skip this tab.
}
}
// Chromium process tree. Browser handle may be on the `browser` field
// (launched mode) or accessible via `context.browser()` (persistent
// context / headed mode); try both.
let processes: MemoryProcess[] | null = null;
const browser: Browser | null = this.browser ?? (this.context ? this.context.browser() : null);
if (browser) {
try {
// `newBrowserCDPSession` is browser-wide. Not exposed on every
// Playwright TypeScript surface, but present at runtime on the
// Browser instance — use a typed cast to avoid the @ts-expect-error.
type BrowserWithCDP = Browser & {
newBrowserCDPSession?: () => Promise<{
send: (method: string, params?: unknown) => Promise<unknown>;
detach: () => Promise<void>;
}>;
};
const maybeFactory = (browser as BrowserWithCDP).newBrowserCDPSession;
if (typeof maybeFactory === 'function') {
const browserSession = await maybeFactory.call(browser);
try {
const info = (await browserSession.send('SystemInfo.getProcessInfo')) as {
processInfo?: Array<{ id: number; type: string; cpuTime: number }>;
};
processes = (info.processInfo ?? []).map((p) => ({
id: p.id,
type: p.type,
cpuTime: p.cpuTime,
}));
notes.push(
'Per-Chromium-process RSS not collected — SystemInfo.getProcessInfo exposes PID+type+CPU only. ' +
'See follow-up TODO "native/GPU memory breakdown" for the deferred fix.',
);
} finally {
await browserSession.detach().catch(() => undefined);
}
} else {
notes.push('Playwright build does not expose newBrowserCDPSession; per-process info skipped.');
}
} catch (err: any) {
notes.push(`CDP browser session unavailable: ${err?.message ?? String(err)}`);
}
} else {
notes.push('Browser handle unavailable (server connection mode); per-process info skipped.');
}
return {
bunServer: {
rss: bunMem.rss,
heapUsed: bunMem.heapUsed,
heapTotal: bunMem.heapTotal,
external: bunMem.external,
},
tabs,
processes,
structures,
capturedAt: Date.now(),
notes,
};
}
// ─── Ref Map (delegates to active session) ──────────────────
setRefMap(refs: Map<string, RefEntry>) {
this.getActiveSession().setRefMap(refs);
@ -1572,10 +1303,6 @@ export class BrowserManager {
newContext = await chromium.launchPersistentContext(userDataDir, {
headless: false,
// Match the sandbox policy used by launchHeaded() / launch(). The
// handoff path is the headless→headed re-launch and shares the same
// anti-detection posture, including no spurious --no-sandbox infobar.
chromiumSandbox: shouldEnableChromiumSandbox(),
args: launchArgs,
viewport: null,
...(this.proxyConfig ? { proxy: this.proxyConfig } : {}),
@ -1605,14 +1332,12 @@ export class BrowserManager {
await newContext.setExtraHTTPHeaders(this.extraHeaders);
}
// Register disconnect handler on new browser. Same clean-vs-crash
// discrimination as launch() / launchHeaded() above so a user-initiated
// Cmd+Q after a handoff doesn't trigger gbd's restart loop.
// Register crash handler on new browser
if (this.browser) {
const browserRef = this.browser;
this.browser.on('disconnected', () => {
if (this.intentionalDisconnect) return;
void handleChromiumDisconnect(browserRef);
console.error('[browse] FATAL: Chromium process crashed or was killed. Server exiting.');
process.exit(1);
});
}
@ -1689,7 +1414,6 @@ export class BrowserManager {
break;
}
}
this.recheckTabGuardrailsOnClose();
});
// Clear ref map on navigation — refs point to stale elements after page change
@ -1758,38 +1482,23 @@ export class BrowserManager {
}
});
// Capture response sizes via requestfinished — but DO NOT call
// response.body() here. Pre-fix, this listener materialized every
// response body across CDP just to read .length: multi-GB/hour of
// Buffer churn on long-lived headed Chromium with media-heavy
// pages, the primary Bun-side accelerant on the gbrowser-OOM
// investigation. req.sizes() pulls from the Network.loadingFinished
// event Chromium already emits — accurate for chunked transfer,
// gzip-compressed responses, and streaming media, all the cases
// where the previous Content-Length-header approach would have
// missed the size.
//
// The "single context-level CDP listener" architecture (D10's
// stretch goal — would reduce per-page listener count from N to 1
// via Target.setAutoAttach) is deferred. TODOS.md tracks it.
// Capture response sizes via response finished
page.on('requestfinished', async (req) => {
try {
const sizes = await req.sizes().catch(() => null);
if (!sizes) return;
const url = req.url();
const size = sizes.responseBodySize ?? 0;
for (let i = networkBuffer.length - 1; i >= 0; i--) {
const entry = networkBuffer.get(i);
if (entry && entry.url === url && !entry.size) {
networkBuffer.set(i, { ...entry, size });
break;
const res = await req.response();
if (res) {
const url = req.url();
const body = await res.body().catch(() => null);
const size = body ? body.length : 0;
for (let i = networkBuffer.length - 1; i >= 0; i--) {
const entry = networkBuffer.get(i);
if (entry && entry.url === url && !entry.size) {
networkBuffer.set(i, { ...entry, size });
break;
}
}
}
} catch {
// Best-effort: requestfinished fires for aborted/cached requests too,
// where sizes() is unavailable. Missing size is acceptable; an
// unbounded throw would noise the console for every cache hit.
}
} catch {}
});
}
}

View File

@ -25,84 +25,18 @@ import { logTelemetry } from './telemetry';
const CDP_TIMEOUT_MS = 5000;
const CDP_ACQUIRE_TIMEOUT_MS = 5000;
// ─── CDP session lifecycle helpers ─────────────────────────────
//
// Every direct `newCDPSession(page)` call needs a matching `session.detach()`
// to release the Chromium-side CDP target. Forgetting the detach leaves the
// target attached until the underlying transport drops (often process exit),
// which on a long-lived headed browser shows up as steadily-climbing
// browser-process RSS. To make the leak class unforgettable, callers should
// go through one of these two helpers and a static-grep test
// (browse/test/cdp-session-cleanup.test.ts) fails CI if any source file
// calls `newCDPSession(` outside this module.
/**
* Ephemeral CDP session with try/finally detach. Use for one-shot CDP work
* where the caller doesn't need session reuse e.g. archive snapshots,
* `$B memory`, a single `Page.captureScreenshot`. The session is detached
* in `finally` regardless of whether `fn` threw, so the Chromium target
* doesn't leak on the error path.
*
* For repeated use of the same page (e.g. the `$B cdp` bridge or the
* inspector), use `getOrCreateCdpSession` instead it caches and detaches
* on page close.
*/
export async function withCdpSession<T>(
page: Page,
fn: (session: any) => Promise<T>,
): Promise<T> {
const session = await page.context().newCDPSession(page);
try {
return await fn(session);
} finally {
try {
await session.detach();
} catch {
// Best-effort cleanup. Session may already be detached (target closed,
// context recreated, browser disconnect). Swallowing all errors is the
// correct cleanup posture per CLAUDE.md "best-effort cleanup paths".
}
}
}
/**
* Cached long-lived CDP session keyed by Page. First call creates the
* session and registers a `page.once('close', ...)` hook that removes the
* cache entry AND calls `session.detach()`. Pre-helper code only removed
* the cache entry, leaving the Chromium-side target attached.
*
* Pass a caller-owned WeakMap so this helper doesn't impose a single global
* cache the `$B cdp` bridge and the inspector each keep their own session
* pool with different invariants (e.g. the inspector also detaches on
* `framenavigated` because DOM/CSS domain state is tied to the document).
*/
export async function getOrCreateCdpSession(
page: Page,
cache: WeakMap<Page, any>,
): Promise<any> {
let session = cache.get(page);
if (session) return session;
session = await page.context().newCDPSession(page);
cache.set(page, session);
page.once('close', () => {
cache.delete(page);
session.detach().catch(() => {
// Best-effort cleanup — see withCdpSession finally block.
});
});
return session;
}
// ─── $B cdp bridge ─────────────────────────────────────────────
// Per-page CDPSession cache. Lifecycle delegated to getOrCreateCdpSession
// which registers a close hook that BOTH removes the cache entry AND calls
// session.detach() — pre-helper code only did the former, leaving the
// Chromium-side target attached.
// Per-page CDPSession cache. Created lazily on first allow-listed call,
// cleaned up when the page closes.
const sessionCache: WeakMap<Page, any> = new WeakMap();
async function getCdpSession(page: Page): Promise<any> {
return getOrCreateCdpSession(page, sessionCache);
let s = sessionCache.get(page);
if (s) return s;
s = await page.context().newCDPSession(page);
sessionCache.set(page, s);
// Clear cache on detach so we don't hold a stale handle.
page.once('close', () => sessionCache.delete(page));
return s;
}
export interface CdpDispatchInput {

View File

@ -13,7 +13,6 @@
*/
import type { Page } from 'playwright';
import { getOrCreateCdpSession } from './cdp-bridge';
// ─── Types ──────────────────────────────────────────────────────
@ -107,23 +106,15 @@ async function getOrCreateSession(page: Page): Promise<any> {
}
}
session = await getOrCreateCdpSession(page, cdpSessions);
session = await page.context().newCDPSession(page);
cdpSessions.set(page, session);
// Enable DOM and CSS domains on first init for this page. The session
// itself is cached + close-detached by getOrCreateCdpSession; the
// initializedPages WeakSet is inspector-layer state that needs its
// own close hook to stay in sync.
if (!initializedPages.has(page)) {
await session.send('DOM.enable');
await session.send('CSS.enable');
initializedPages.add(page);
page.once('close', () => initializedPages.delete(page));
}
// Enable DOM and CSS domains
await session.send('DOM.enable');
await session.send('CSS.enable');
initializedPages.add(page);
// Auto-detach on navigation — DOM/CSS domain state is tied to the
// document. Close-detach (from getOrCreateCdpSession) handles the
// tab-close case; framenavigated catches in-tab navigation that
// invalidates inspector state without closing the tab.
// Auto-detach on navigation
page.once('framenavigated', () => {
try {
session.detach().catch(() => {});
@ -139,41 +130,7 @@ async function getOrCreateSession(page: Page): Promise<any> {
// ─── Modification History ───────────────────────────────────────
// Bounded FIFO of style modifications. Pre-cap, this was an unbounded
// module-scoped array that grew for every CSS edit made through $B css
// across the whole browser session — small per-entry footprint but no
// upper bound, the kind of slow leak that compounds over multi-day
// inspector use. The cap is 200 because per-session undo workflows
// rarely walk back more than a handful of edits, and a user who really
// wants to roll a long change back can `$B css reset` to revert all of
// them. totalPushed is monotonic across the session so undoModification
// can tell the user when their target index has been evicted, instead
// of just "no modification at index N".
const MOD_HISTORY_CAP = 200;
const modificationHistory: StyleModification[] = [];
let modHistoryTotalPushed = 0;
function pushModification(mod: StyleModification): void {
modificationHistory.push(mod);
modHistoryTotalPushed++;
while (modificationHistory.length > MOD_HISTORY_CAP) {
modificationHistory.shift();
}
}
// Test-only entry: exposes the history-cap mechanics (push, reset, cap value)
// without requiring a CDP-driven Page. Production code must go through
// modifyStyle / undoModification / resetModifications.
export const __testInternals = {
pushModification,
MOD_HISTORY_CAP,
getRawHistory: () => modificationHistory.slice(),
getTotalPushed: () => modHistoryTotalPushed,
resetForTest: () => {
modificationHistory.length = 0;
modHistoryTotalPushed = 0;
},
};
// ─── Specificity Calculation ────────────────────────────────────
@ -602,7 +559,7 @@ export async function modifyStyle(
method,
};
pushModification(modification);
modificationHistory.push(modification);
return modification;
}
@ -612,12 +569,7 @@ export async function modifyStyle(
export async function undoModification(page: Page, index?: number): Promise<void> {
const idx = index ?? modificationHistory.length - 1;
if (idx < 0 || idx >= modificationHistory.length) {
const evictedNote = modHistoryTotalPushed > MOD_HISTORY_CAP
? ` (most recent ${MOD_HISTORY_CAP} only — ${modHistoryTotalPushed - MOD_HISTORY_CAP} earlier entries evicted at the cap)`
: '';
throw new Error(
`No modification at index ${idx}. History has ${modificationHistory.length} entries${evictedNote}.`,
);
throw new Error(`No modification at index ${idx}. History has ${modificationHistory.length} entries.`);
}
const mod = modificationHistory[idx];
@ -670,23 +622,6 @@ export function getModificationHistory(): StyleModification[] {
return [...modificationHistory];
}
/**
* Diagnostic accessor for the $B memory snapshot. Returns current buffer
* occupancy, the cap, and how many entries have been evicted since the
* last reset.
*/
export function getModificationHistoryStats(): {
current: number;
cap: number;
evicted: number;
} {
return {
current: modificationHistory.length,
cap: MOD_HISTORY_CAP,
evicted: Math.max(0, modHistoryTotalPushed - MOD_HISTORY_CAP),
};
}
/**
* Reset all modifications, restoring original values.
*/
@ -713,7 +648,6 @@ export async function resetModifications(page: Page): Promise<void> {
}
}
modificationHistory.length = 0;
modHistoryTotalPushed = 0;
}
/**

View File

@ -11,13 +11,11 @@
import * as fs from 'fs';
import * as path from 'path';
import { spawn as nodeSpawn } from 'child_process';
import { safeUnlink, safeUnlinkQuiet, safeKill, isProcessAlive } from './error-handling';
import { writeSecureFile, mkdirSecure } from './file-permissions';
import { resolveConfig, ensureStateDir, readVersionHash } from './config';
import { parseProxyConfig, computeConfigHash, ProxyConfigError } from './proxy-config';
import { redactProxyUrl } from './proxy-redact';
import { spawnTerminalAgent } from './terminal-agent-control';
const config = resolveConfig();
const IS_WINDOWS = process.platform === 'win32';
@ -211,86 +209,6 @@ function cleanupLegacyState(): void {
}
}
// ─── Chromium profile lock helpers (#1781) ─────────────────────
/** Profile dir used by headed/connect Chromium sessions. */
function chromiumProfileDir(): string {
return path.join(process.env.HOME || '/tmp', '.gstack', 'chromium-profile');
}
/** Remove Chromium SingletonLock/Socket/Cookie so a relaunch can acquire the
* profile. Safe to call when absent. */
function cleanChromiumProfileLocks(profileDir: string = chromiumProfileDir()): void {
for (const lockFile of ['SingletonLock', 'SingletonSocket', 'SingletonCookie']) {
safeUnlinkQuiet(path.join(profileDir, lockFile));
}
}
/** Kill an orphaned Chromium that still holds the profile's SingletonLock. The
* lock symlink target is "hostname-PID"; killing that PID tears down its
* renderer tree so the next launch starts clean. No-op when absent/stale. */
async function killOrphanChromium(profileDir: string = chromiumProfileDir()): Promise<void> {
try {
const lockTarget = fs.readlinkSync(path.join(profileDir, 'SingletonLock')); // "hostname-12345"
const orphanPid = parseInt(lockTarget.split('-').pop() || '', 10);
if (orphanPid && isProcessAlive(orphanPid)) {
safeKill(orphanPid, 'SIGTERM');
await new Promise(r => setTimeout(r, 1000));
if (isProcessAlive(orphanPid)) {
safeKill(orphanPid, 'SIGKILL');
await new Promise(r => setTimeout(r, 500));
}
}
} catch (err: any) {
if (err?.code !== 'ENOENT' && err?.code !== 'EINVAL') throw err;
}
}
/** Bounded /health probe. Returns true if the server answers within `attempts`
* tries spaced `backoffMs` apart distinguishes a busy-but-alive daemon from a
* dead one (#1781) so a slow server isn't killed and restarted into a crash-loop. */
async function probeHealthWithBackoff(port: number, attempts = 3, backoffMs = 250): Promise<boolean> {
for (let i = 0; i < attempts; i++) {
if (await isServerHealthy(port)) return true;
if (i < attempts - 1) await Bun.sleep(backoffMs);
}
return false;
}
/**
* Build the env for an auto-restart after a crash. headed/proxy/configHash are
* reapplied from THIS invocation OR the persisted server state, so a restart
* triggered by a plain command (goto/status, no --headed flag) never silently
* downgrades a headed session to headless (#1781). Pure + exported for tests.
*/
export function buildRestartEnv(
globalFlags: GlobalFlags | null | undefined,
oldState: ServerState | null,
): Record<string, string> {
const env: Record<string, string> = {};
if (globalFlags?.proxyUrl) env.BROWSE_PROXY_URL = globalFlags.proxyUrl;
if (globalFlags?.headed || oldState?.mode === 'headed') env.BROWSE_HEADED = '1';
const configHash = globalFlags?.configHash || oldState?.configHash;
if (configHash) env.BROWSE_CONFIG_HASH = configHash;
return env;
}
/** macOS only: pull the headed Chromium window to the user's current Space.
* "Google Chrome for Testing" frequently opens behind the active window or on
* another Space the first thing users read as "I can't see the browser"
* (#1781). Best-effort, fire-and-forget, never throws. The app name is a fixed
* literal (no interpolation). */
function raiseHeadedWindowMacOS(): void {
if (process.platform !== 'darwin') return;
try {
nodeSpawn('osascript', ['-e', 'tell application "Google Chrome for Testing" to activate'], {
stdio: 'ignore',
detached: true,
}).unref();
} catch {
// osascript missing or app not present — non-fatal
}
}
// ─── Server Lifecycle ──────────────────────────────────────────
async function startServer(extraEnv?: Record<string, string>): Promise<ServerState> {
ensureStateDir(config);
@ -299,12 +217,7 @@ async function startServer(extraEnv?: Record<string, string>): Promise<ServerSta
safeUnlink(config.stateFile);
safeUnlink(path.join(config.stateDir, 'browse-startup-error.log'));
// #1781: clear a stale Chromium profile lock (and kill the orphan still
// holding it) before launch, so an auto-restart after an abrupt kill isn't
// blocked by the previous Chromium's SingletonLock — the self-inflicted
// crash-loop. Previously only the manual connect preamble did this.
await killOrphanChromium();
cleanChromiumProfileLocks();
let proc: any = null;
// Allow the caller to opt out of the parent-process watchdog by setting
// BROWSE_PARENT_PID=0 in the environment. Useful for CI, non-interactive
@ -327,22 +240,12 @@ async function startServer(extraEnv?: Record<string, string>): Promise<ServerSta
`${extraEnvStr})}).unref()`;
Bun.spawnSync(['node', '-e', launcherCode], { stdio: ['ignore', 'ignore', 'ignore'] });
} else {
// macOS/Linux: Bun.spawn().unref() only removes the child from Bun's event
// loop — it does NOT call setsid(), so the spawned server stays in the
// parent's process session. When the CLI runs inside a session-managed
// shell (e.g. Claude Code's per-command Bash sandbox, Conductor, CI
// step runners), the session leader's exit sends SIGHUP to every PID in
// the session, killing the bun server (and its Chromium grandchildren).
// Even with BROWSE_PARENT_PID=0 disabling the watchdog, SIGHUP still
// reaps the server. Use Node's child_process.spawn with detached:true,
// which calls setsid() so the server becomes its own session leader
// (PPID=1, STAT=Ss) and survives the spawning shell's exit. Mirrors
// the Windows path's rationale — same root cause, different OS API.
nodeSpawn('bun', ['run', SERVER_SCRIPT], {
detached: true,
stdio: ['ignore', 'ignore', 'ignore'],
// macOS/Linux: Bun.spawn + unref works correctly
proc = Bun.spawn(['bun', 'run', SERVER_SCRIPT], {
stdio: ['ignore', 'pipe', 'pipe'],
env: { ...process.env, BROWSE_STATE_FILE: config.stateFile, BROWSE_PARENT_PID: parentPid, ...extraEnv },
}).unref();
});
proc.unref();
}
// Wait for server to become healthy.
@ -357,17 +260,27 @@ async function startServer(extraEnv?: Record<string, string>): Promise<ServerSta
await Bun.sleep(100);
}
// Server didn't start in time — check the on-disk startup error log.
// Both platforms now spawn with stdio: 'ignore', so the server writes
// errors to disk for the CLI to read (see server.ts start().catch).
const errorLogPath = path.join(config.stateDir, 'browse-startup-error.log');
try {
const errorLog = fs.readFileSync(errorLogPath, 'utf-8').trim();
if (errorLog) {
throw new Error(`Server failed to start:\n${errorLog}`);
// Server didn't start in time — try to get error details
if (proc?.stderr) {
// macOS/Linux: read stderr from the spawned process
const reader = proc.stderr.getReader();
const { value } = await reader.read();
if (value) {
const errText = new TextDecoder().decode(value);
throw new Error(`Server failed to start:\n${errText}`);
}
} else {
// Windows: check startup error log (server writes errors to disk since
// stderr is unavailable due to stdio: 'ignore' for detachment)
const errorLogPath = path.join(config.stateDir, 'browse-startup-error.log');
try {
const errorLog = fs.readFileSync(errorLogPath, 'utf-8').trim();
if (errorLog) {
throw new Error(`Server failed to start:\n${errorLog}`);
}
} catch (e: any) {
if (e.code !== 'ENOENT') throw e;
}
} catch (e: any) {
if (e.code !== 'ENOENT') throw e;
}
throw new Error(`Server failed to start within ${MAX_START_WAIT / 1000}s`);
}
@ -573,42 +486,26 @@ async function sendCommand(state: ServerState, command: string, args: string[],
}
} catch (err: any) {
if (err.name === 'AbortError') {
// #1781: a 30s timeout on a heavy page usually means busy, not dead.
// Don't kill a live server (that's what triggered the crash-loop) — report
// and exit so the user can retry rather than losing their (headed) window.
const ts = readState();
const alive = ts?.pid ? isProcessAlive(ts.pid) : false;
console.error(alive
? '[browse] Command timed out after 30s (server still alive — busy, not restarting). Retry, or raise load.'
: '[browse] Command timed out after 30s');
console.error('[browse] Command timed out after 30s');
process.exit(1);
}
// Connection error — server may have crashed, OR may just be busy.
// Connection error — server may have crashed
if (err.code === 'ECONNREFUSED' || err.code === 'ECONNRESET' || err.message?.includes('fetch failed')) {
const oldState = readState();
// #1781 busy-vs-dead: a single-threaded daemon under beacon/extension load
// can briefly stop answering HTTP while still alive. Before declaring a
// crash, if the process is alive give /health a bounded chance to recover
// and just retry the command — never kill+restart a live-but-busy server.
if (oldState?.pid && isProcessAlive(oldState.pid) && await probeHealthWithBackoff(oldState.port)) {
if (retries >= 1) throw new Error('[browse] Server unresponsive after retry — aborting');
console.error('[browse] Server was briefly unresponsive (busy); retrying command...');
return sendCommand(oldState, command, args, retries + 1);
}
// Truly dead (or health never recovered) → restart.
if (retries >= 1) throw new Error('[browse] Server crashed twice in a row — aborting');
console.error('[browse] Server connection lost. Restarting...');
// Kill the old server to avoid orphaned chromium processes
const oldState = readState();
if (oldState && oldState.pid) {
await killServer(oldState.pid);
}
// startServer() now clears the Chromium SingletonLock + reaps the orphan,
// so the relaunch isn't blocked by the dead Chromium's profile lock (#1781).
//
// Reapply --proxy / --headed when restarting. headed comes from THIS
// invocation OR the persisted server mode, so a restart triggered by a
// plain command (goto/status, no --headed) never silently downgrades a
// headed session to headless (#1781). Same for proxy/configHash.
const restartEnv = buildRestartEnv(_globalFlags, oldState);
// Reapply --proxy / --headed flags from this invocation when restarting
// after a crash. Without this, a proxied daemon that dies mid-command
// would silently restart in default direct/headless mode and bypass
// the SOCKS bridge.
const restartEnv: Record<string, string> = {};
if (_globalFlags?.proxyUrl) restartEnv.BROWSE_PROXY_URL = _globalFlags.proxyUrl;
if (_globalFlags?.headed) restartEnv.BROWSE_HEADED = '1';
if (_globalFlags?.configHash) restartEnv.BROWSE_CONFIG_HASH = _globalFlags.configHash;
const newState = await startServer(Object.keys(restartEnv).length ? restartEnv : undefined);
return sendCommand(newState, command, args, retries + 1);
}
@ -1069,11 +966,30 @@ Refs: After 'snapshot', use @e1, @e2... as selectors:
}
}
// Kill an orphaned Chromium still holding the profile lock (the Bun server
// PID's Chromium child can outlive an abrupt kill/crash), then clear the
// lock files so the launch is clean. Shared with the auto-restart path (#1781).
await killOrphanChromium();
cleanChromiumProfileLocks();
// Kill orphaned Chromium processes that may still hold the profile lock.
// The server PID is the Bun process; Chromium is a child that can outlive it
// if the server is killed abruptly (SIGKILL, crash, manual rm of state file).
const profileDir = path.join(process.env.HOME || '/tmp', '.gstack', 'chromium-profile');
try {
const singletonLock = path.join(profileDir, 'SingletonLock');
const lockTarget = fs.readlinkSync(singletonLock); // e.g. "hostname-12345"
const orphanPid = parseInt(lockTarget.split('-').pop() || '', 10);
if (orphanPid && isProcessAlive(orphanPid)) {
safeKill(orphanPid, 'SIGTERM');
await new Promise(resolve => setTimeout(resolve, 1000));
if (isProcessAlive(orphanPid)) {
safeKill(orphanPid, 'SIGKILL');
await new Promise(resolve => setTimeout(resolve, 500));
}
}
} catch (err: any) {
if (err?.code !== 'ENOENT' && err?.code !== 'EINVAL') throw err;
}
// Clean up Chromium profile locks (can persist after crashes)
for (const lockFile of ['SingletonLock', 'SingletonSocket', 'SingletonCookie']) {
safeUnlinkQuiet(path.join(profileDir, lockFile));
}
// Delete stale state file
safeUnlinkQuiet(config.stateFile);
@ -1111,29 +1027,38 @@ Refs: After 'snapshot', use @e1, @e2... as selectors:
});
const status = await resp.text();
console.log(`Connected to real Chrome\n${status}`);
// #1781: surface the window — it often opens behind/on another Space.
raiseHeadedWindowMacOS();
if (process.platform === 'darwin') {
console.log('(If you still don\'t see it, check Mission Control / other Spaces.)');
}
// sidebar-agent.ts spawn was here. Ripped alongside the chat queue —
// the Terminal pane runs an interactive PTY now, no more one-shot
// claude -p subprocesses to multiplex.
// Auto-start terminal agent (non-compiled bun process). Owns the PTY
// WebSocket for the sidebar Terminal pane. Routes through the shared
// spawnTerminalAgent helper so the CLI cold-start path and the
// server.ts watchdog respawn path share one implementation. The
// helper handles prior-PID cleanup, script lookup, and env wiring.
// WebSocket for the sidebar Terminal pane.
let termAgentScript = path.resolve(__dirname, 'terminal-agent.ts');
if (!fs.existsSync(termAgentScript)) {
termAgentScript = path.resolve(path.dirname(process.execPath), '..', 'src', 'terminal-agent.ts');
}
try {
const newPid = spawnTerminalAgent({
stateFile: config.stateFile,
serverPort: newState.port,
cwd: config.projectDir,
});
if (newPid) {
console.log(`[browse] Terminal agent started (PID: ${newPid})`);
if (fs.existsSync(termAgentScript)) {
// Kill old terminal-agents so a stale port file can't trick the
// server into routing /pty-session at a dead listener.
try {
const { spawnSync } = require('child_process');
spawnSync('pkill', ['-f', 'terminal-agent\\.ts'], { stdio: 'ignore', timeout: 3000 });
} catch (err: any) {
if (err?.code !== 'ENOENT') throw err;
}
const termProc = Bun.spawn(['bun', 'run', termAgentScript], {
cwd: config.projectDir,
env: {
...process.env,
BROWSE_STATE_FILE: config.stateFile,
BROWSE_SERVER_PORT: String(newState.port),
},
stdio: ['ignore', 'ignore', 'ignore'],
});
termProc.unref();
console.log(`[browse] Terminal agent started (PID: ${termProc.pid})`);
}
} catch (err: any) {
// Non-fatal: chat still works without the terminal agent.
@ -1143,96 +1068,6 @@ Refs: After 'snapshot', use @e1, @e2... as selectors:
console.error(`[browse] Connect failed: ${err.message}`);
process.exit(1);
}
// ─── Outer Supervisor (v1.44+, opt-in) ──────────────────────────
//
// Default: fire-and-forget (CLI exits, server runs detached). This is
// the contract every existing call site relies on, including Claude
// Code's Bash tool which expects `$B connect` to return promptly.
//
// Opt-in via `--supervise` flag or BROWSE_SUPERVISE=1 env: the CLI
// stays attached, polls the spawned server's PID every 30s, and
// respawns it through the same headed-mode startServer path on
// unexpected exit. Crash-loop guard: 5 respawns inside 5 min →
// give up and exit 1 with a clear error. SIGINT / SIGTERM cleanly
// tear down the supervised server before exit.
//
// Out of scope for v1.44 minimum: routing the Chromium-disconnect
// exit-code-1 path back through this supervisor. The terminal-agent
// watchdog (T5) already covers the highest-frequency restart case;
// Chromium-crash-respawn is documented as a follow-up so the
// supervisor stays a tight, testable primitive.
const superviseRequested = commandArgs.includes('--supervise')
|| process.env.BROWSE_SUPERVISE === '1';
if (!superviseRequested) {
process.exit(0);
}
console.log('[browse] Supervisor mode: monitoring server. Ctrl-C to stop.');
let supervisorExiting = false;
const teardownAndExit = (signal: string) => {
if (supervisorExiting) return;
supervisorExiting = true;
console.log(`\n[browse] ${signal} received — stopping server.`);
const state = readState();
if (state?.pid && isProcessAlive(state.pid)) {
safeKill(state.pid, 'SIGTERM');
}
process.exit(0);
};
process.on('SIGINT', () => teardownAndExit('SIGINT'));
process.on('SIGTERM', () => teardownAndExit('SIGTERM'));
const SUPERVISOR_TICK_MS = parseInt(
process.env.GSTACK_SUPERVISOR_TICK_MS || '30000',
10,
);
const SUPERVISOR_GUARD_WINDOW_MS = 5 * 60_000;
const SUPERVISOR_GUARD_MAX = 5;
const SUPERVISOR_BACKOFF_MS = (process.env.GSTACK_SUPERVISOR_BACKOFF || '1000,2000,4000,8000,30000')
.split(',').map(s => parseInt(s.trim(), 10)).filter(n => Number.isFinite(n));
const respawns: number[] = [];
while (!supervisorExiting) {
await new Promise(resolve => setTimeout(resolve, SUPERVISOR_TICK_MS));
if (supervisorExiting) break;
const state = readState();
if (state?.pid && isProcessAlive(state.pid)) continue;
// Server died. Prune rolling window and check guard.
const now = Date.now();
while (respawns.length && now - respawns[0] > SUPERVISOR_GUARD_WINDOW_MS) {
respawns.shift();
}
if (respawns.length >= SUPERVISOR_GUARD_MAX) {
console.error(
`[browse] Supervisor: ${SUPERVISOR_GUARD_MAX} crashes in ${SUPERVISOR_GUARD_WINDOW_MS / 1000}s — giving up.`,
);
process.exit(1);
}
const attempt = respawns.length;
respawns.push(now);
const backoff = SUPERVISOR_BACKOFF_MS[Math.min(attempt, SUPERVISOR_BACKOFF_MS.length - 1)] ?? 30_000;
console.warn(`[browse] Supervisor: server PID gone — respawning in ${backoff}ms (attempt ${attempt + 1}/${SUPERVISOR_GUARD_MAX})...`);
await new Promise(resolve => setTimeout(resolve, backoff));
if (supervisorExiting) break;
try {
const respawned = await startServer(serverEnv);
console.log(`[browse] Supervisor: server respawned (PID ${respawned.pid}, port ${respawned.port}).`);
// Re-spawn the terminal-agent too; same env wiring as the initial connect.
try {
spawnTerminalAgent({
stateFile: config.stateFile,
serverPort: respawned.port,
cwd: config.projectDir,
});
} catch (err: any) {
console.warn(`[browse] Supervisor: terminal-agent respawn failed: ${err?.message || err}`);
}
} catch (err: any) {
console.error(`[browse] Supervisor: server respawn failed: ${err?.message || err}`);
// Let the next tick try again — the crash-loop guard already
// bounded the retries via the rolling window.
}
}
process.exit(0);
}
@ -1283,11 +1118,11 @@ Refs: After 'snapshot', use @e1, @e2... as selectors:
safeKill(existingState.pid, 'SIGKILL');
}
}
// #1781: killing the daemon can orphan its Chromium child tree, which keeps
// holding the SingletonLock and makes the next `connect` fail to launch.
// Reap the orphan via the lock, then clear the lock files + state.
await killOrphanChromium();
cleanChromiumProfileLocks();
// Clean profile locks and state file
const profileDir = path.join(process.env.HOME || '/tmp', '.gstack', 'chromium-profile');
for (const lockFile of ['SingletonLock', 'SingletonSocket', 'SingletonCookie']) {
safeUnlinkQuiet(path.join(profileDir, lockFile));
}
// Xvfb orphan cleanup: if the recorded PID still matches our Xvfb (by
// cmdline AND start-time), kill it. PID-only would risk killing a
// recycled PID belonging to an unrelated process.
@ -1347,11 +1182,6 @@ Refs: After 'snapshot', use @e1, @e2... as selectors:
}
await sendCommand(state, command, commandArgs);
// #1781: `focus` means "show me the window". The server-side focus activates
// the page via CDP, but on macOS the app can still sit on another Space — pull
// it to the user's current Space too.
if (command === 'focus') raiseHeadedWindowMacOS();
}
if (import.meta.main) {

View File

@ -45,7 +45,6 @@ export const META_COMMANDS = new Set([
'domain-skill',
'skill',
'cdp',
'memory',
]);
export const ALL_COMMANDS = new Set([...READ_COMMANDS, ...WRITE_COMMANDS, ...META_COMMANDS]);
@ -90,7 +89,6 @@ export function wrapUntrustedContent(result: string, url: string): string {
export const COMMAND_DESCRIPTIONS: Record<string, { category: string; description: string; usage?: string }> = {
// Navigation
'memory': { category: 'Server', description: 'Snapshot Bun heap + per-tab JS heap + Chromium process tree + bounded buffer sizes. JSON output with --json.', usage: 'memory [--json]' },
'goto': { category: 'Navigation', description: 'Navigate to URL (http://, https://, or file:// scoped to cwd/TEMP_DIR)', usage: 'goto <url>' },
'load-html': { category: 'Navigation', description: 'Load HTML via setContent. Accepts a file path under safe-dirs (validated), OR --from-file <payload.json> with {"html":"...","waitUntil":"..."} for large inline HTML (Windows argv safe).', usage: 'load-html <file> [--wait-until load|domcontentloaded|networkidle] [--tab-id <N>] | load-html --from-file <payload.json> [--tab-id <N>]' },
'back': { category: 'Navigation', description: 'History back' },

View File

@ -5,7 +5,7 @@
* Outputs the absolute path to the browse binary on stdout, or exits 1 if not found.
*/
import { accessSync, constants } from 'fs';
import { existsSync } from 'fs';
import { join } from 'path';
import { homedir } from 'os';
@ -24,35 +24,6 @@ function getGitRoot(): string | null {
}
}
// Probe a path for executability. accessSync(X_OK) checks the executable
// bit on Linux/macOS and degrades to an existence check on Windows (no
// true execute bit). Mirrors make-pdf/src/browseClient.ts:159 /
// make-pdf/src/pdftotext.ts:117.
function isExecutable(p: string): boolean {
try {
accessSync(p, constants.X_OK);
return true;
} catch {
return false;
}
}
// Resolve a bare binary path to the actual file on disk. On Windows, `bun
// build --compile` appends `.exe` to the output filename, so `browse` on
// disk is actually `browse.exe`. After a bare-path probe, try the Windows
// extensions. Linux/macOS behavior is unchanged. Mirrors the helper in
// make-pdf/src/browseClient.ts:89 and make-pdf/src/pdftotext.ts:52.
function findExecutable(base: string): string | null {
if (isExecutable(base)) return base;
if (process.platform === 'win32') {
for (const ext of ['.exe', '.cmd', '.bat']) {
const withExt = base + ext;
if (isExecutable(withExt)) return withExt;
}
}
return null;
}
export function locateBinary(): string | null {
const root = getGitRoot();
const home = homedir();
@ -62,26 +33,14 @@ export function locateBinary(): string | null {
if (root) {
for (const m of markers) {
const local = join(root, m, 'skills', 'gstack', 'browse', 'dist', 'browse');
const found = findExecutable(local);
if (found) return found;
if (existsSync(local)) return local;
}
// Source-checkout fallback (no installed skill layout — the binary
// lives directly at <repo>/browse/dist/browse[.exe]). Hit by:
// - gstack repo dev workflow before `./setup` runs
// - the windows-setup-e2e.yml CI workflow which builds binaries
// in place but never installs them under a marker dir
// - make-pdf consumers running from a sibling source checkout
const sourceCheckout = join(root, 'browse', 'dist', 'browse');
const sourceFound = findExecutable(sourceCheckout);
if (sourceFound) return sourceFound;
}
// Global fallback
for (const m of markers) {
const global = join(home, m, 'skills', 'gstack', 'browse', 'dist', 'browse');
const found = findExecutable(global);
if (found) return found;
if (existsSync(global)) return global;
}
return null;

View File

@ -1,78 +0,0 @@
/**
* find-security-sidecar resolve the Node entry that runs the L4 ML
* classifier sidecar.
*
* The sidecar can't be bundled into the compiled browse binary because
* onnxruntime-node fails to dlopen from Bun's compile extract dir. It runs
* as a separate Node subprocess instead. This module resolves the right
* path + interpreter on each platform:
*
* 1. Prefer node on PATH + a bundled JS entry at
* browse/dist/security-sidecar.js (built by package.json's
* build:security-sidecar script).
* 2. Dev fallback: node + browse/src/security-sidecar-entry.ts via tsx
* (only available in the source checkout, not the compiled install).
* 3. If Node is missing or no entry resolves, return null. The /pty-inject-scan
* endpoint then responds with l4 { available: false } and the extension
* degrades to WARN+confirm (D7).
*/
import { existsSync } from "fs";
import { join, dirname } from "path";
import { execFileSync } from "child_process";
export interface SidecarLocation {
node: string;
entry: string;
/** "compiled" if running from browse/dist/, "dev" if running from src */
mode: "compiled" | "dev";
}
function nodeOnPath(): string | null {
try {
execFileSync("node", ["--version"], { stdio: "ignore", timeout: 2000 });
return "node";
} catch {
return null;
}
}
function browseRoot(): string {
// When running compiled, __dirname (via import.meta.dir) points at the
// Bun extract temp. Walk up until we find a directory containing
// browse/dist/ or browse/src/.
let candidate = dirname(import.meta.path || "");
for (let i = 0; i < 6; i += 1) {
if (existsSync(join(candidate, "browse", "dist", "security-sidecar.js"))) {
return candidate;
}
if (existsSync(join(candidate, "src", "security-sidecar-entry.ts"))) {
return candidate;
}
const next = dirname(candidate);
if (next === candidate) break;
candidate = next;
}
return process.cwd();
}
export function findSecuritySidecar(): SidecarLocation | null {
const node = nodeOnPath();
if (!node) return null;
const root = browseRoot();
const compiled = join(root, "browse", "dist", "security-sidecar.js");
if (existsSync(compiled)) {
return { node, entry: compiled, mode: "compiled" };
}
// Dev fallback. Compiled installs won't have src/ on disk so this only
// resolves when running from the source checkout.
const devEntry = join(root, "src", "security-sidecar-entry.ts");
if (existsSync(devEntry)) {
return { node, entry: devEntry, mode: "dev" };
}
return null;
}

View File

@ -1,115 +0,0 @@
// `$B memory` — diagnostic snapshot of Bun heap + per-tab JS heap +
// Chromium process tree + bounded buffer sizes. Lives in its own file
// because the meta-commands dispatcher imports it lazily — projects
// that never run the diagnostic don't pay the import-graph cost (CDP
// bridge, memory-snapshot types, buffer accessors).
import type { BrowserManager } from './browser-manager';
import { formatBytes, type MemorySnapshot, type MemoryStructureStats } from './memory-snapshot';
import { getModificationHistoryStats } from './cdp-inspector';
import { getSubscriberCount as getActivitySubscriberCount } from './activity';
import { getInspectorSubscriberCount } from './server';
import { consoleBuffer, networkBuffer, dialogBuffer } from './buffers';
import { getCaptureBuffer } from './network-capture';
/**
* Assemble the MemoryStructureStats from the modules that own each buffer.
* Browser-manager doesn't take a hard dep on every buffer-owning module
* the snapshot caller passes them in.
*/
function collectStructureStats(): MemoryStructureStats {
return {
modificationHistory: getModificationHistoryStats(),
activitySubscribers: getActivitySubscriberCount(),
inspectorSubscribers: getInspectorSubscriberCount(),
consoleBufferLen: consoleBuffer.length,
networkBufferLen: networkBuffer.length,
dialogBufferLen: dialogBuffer.length,
captureBufferBytes: getCaptureBuffer().byteSize,
};
}
/**
* Pretty-print the snapshot for terminal output. JSON mode (--json) goes
* straight through JSON.stringify so the extension footer and any test
* harness can consume it programmatically.
*/
function formatSnapshotText(s: MemorySnapshot): string {
const lines: string[] = [];
lines.push(
`Bun server: RSS: ${formatBytes(s.bunServer.rss)} ` +
`heap: ${formatBytes(s.bunServer.heapUsed)} / ${formatBytes(s.bunServer.heapTotal)} ` +
`external: ${formatBytes(s.bunServer.external)}`,
);
if (s.processes && s.processes.length > 0) {
// Group by type so the user sees "renderer: 12" vs listing 12 separate rows.
const byType: Record<string, number> = {};
for (const p of s.processes) byType[p.type] = (byType[p.type] ?? 0) + 1;
const typeSummary = Object.entries(byType)
.map(([t, n]) => `${t}=${n}`)
.join(' ');
lines.push(`Chromium processes: ${s.processes.length} total (${typeSummary})`);
} else if (s.processes === null) {
lines.push('Chromium processes: (unavailable — see notes)');
} else {
lines.push('Chromium processes: 0');
}
if (s.tabs.length > 0) {
// Sort by JS heap descending; show top 10 plus "...N more" tail.
const sorted = [...s.tabs].sort((a, b) => b.jsHeapUsed - a.jsHeapUsed);
const shown = sorted.slice(0, 10);
lines.push(`Renderers: ${s.tabs.length} tabs (top by JS heap):`);
for (const t of shown) {
const urlShort = t.url.length > 80 ? t.url.slice(0, 77) + '...' : t.url;
lines.push(
` [${formatBytes(t.jsHeapUsed).padStart(8)} JS, ` +
`${String(t.nodes).padStart(6)} nodes, ` +
`${String(t.listeners).padStart(5)} listeners] ` +
`tab #${t.id}${urlShort}`,
);
}
if (sorted.length > shown.length) {
lines.push(` ...and ${sorted.length - shown.length} more`);
}
} else {
lines.push('Renderers: (no tabs tracked)');
}
lines.push('─────────────────────────────────────────────────');
lines.push('In-memory structures (Bun side):');
const m = s.structures.modificationHistory;
lines.push(
` modificationHistory: ${m.current} / ${m.cap} entries` +
(m.evicted > 0 ? ` (${m.evicted} evicted since reset)` : ''),
);
lines.push(` inspectorSubscribers: ${s.structures.inspectorSubscribers}`);
lines.push(` activitySubscribers: ${s.structures.activitySubscribers}`);
lines.push(` consoleBuffer: ${s.structures.consoleBufferLen} entries`);
lines.push(` networkBuffer: ${s.structures.networkBufferLen} entries`);
lines.push(` dialogBuffer: ${s.structures.dialogBufferLen} entries`);
lines.push(` captureBuffer: ${formatBytes(s.structures.captureBufferBytes)}`);
if (s.notes.length > 0) {
lines.push('');
lines.push('Notes:');
for (const n of s.notes) lines.push(` - ${n}`);
}
return lines.join('\n');
}
export async function handleMemoryCommand(args: string[], bm: BrowserManager): Promise<string> {
const jsonMode = args.includes('--json');
const structures = collectStructureStats();
const snapshot = await bm.getMemorySnapshot(structures);
if (jsonMode) return JSON.stringify(snapshot);
return formatSnapshotText(snapshot);
}
/** Entry point used by the /memory HTTP endpoint — same data, always JSON. */
export async function buildMemorySnapshotJson(bm: BrowserManager): Promise<MemorySnapshot> {
const structures = collectStructureStats();
return bm.getMemorySnapshot(structures);
}

View File

@ -1,73 +0,0 @@
// Shared types for the $B memory diagnostic command and the /memory
// endpoint. Lives in its own module so server.ts, read-commands.ts, and
// the extension footer poll can import without taking a circular dep on
// browser-manager.ts.
//
// Background: the gbrowser-OOM investigation (160 GB Activity Monitor
// reading on a friend's machine) needed a diagnostic that could land
// before the next incident — measurement comes first, fixes come after.
// $B memory is that diagnostic.
/** Counts/bytes for the bounded in-memory structures on the Bun side. */
export interface MemoryStructureStats {
modificationHistory: { current: number; cap: number; evicted: number };
activitySubscribers: number;
inspectorSubscribers: number;
consoleBufferLen: number;
networkBufferLen: number;
dialogBufferLen: number;
captureBufferBytes: number;
}
/** Per-tab JS heap snapshot (CDP Performance.getMetrics). */
export interface MemoryTabSnapshot {
id: number;
url: string;
title: string;
jsHeapUsed: number;
jsHeapTotal: number;
documents: number;
nodes: number;
listeners: number;
}
/** Chromium process metadata via CDP SystemInfo.getProcessInfo. */
export interface MemoryProcess {
/** Chromium-internal process id (not OS PID). */
id: number;
/** 'browser' | 'renderer' | 'gpu' | 'utility' | 'extension' | ... */
type: string;
/** CPU time accumulated since process start (seconds). */
cpuTime: number;
}
export interface MemorySnapshot {
bunServer: {
rss: number;
heapUsed: number;
heapTotal: number;
external: number;
};
tabs: MemoryTabSnapshot[];
/**
* Chromium process tree. `null` when no browser handle is available
* (server in connection mode, or browser not yet launched).
*
* Per-process RSS is NOT included: SystemInfo.getProcessInfo returns
* id+type+cpuTime but Chromium does not expose RSS via CDP. The
* `notes[]` field tells the caller why see the follow-up TODO
* "native/GPU memory breakdown" for the deferred fix.
*/
processes: MemoryProcess[] | null;
structures: MemoryStructureStats;
capturedAt: number;
notes: string[];
}
/** Format bytes as a short human string ("1.4 GB", "312 MB", "84 KB"). */
export function formatBytes(n: number): string {
if (n < 1024) return `${n} B`;
if (n < 1024 * 1024) return `${(n / 1024).toFixed(1)} KB`;
if (n < 1024 * 1024 * 1024) return `${(n / 1024 / 1024).toFixed(1)} MB`;
return `${(n / 1024 / 1024 / 1024).toFixed(2)} GB`;
}

View File

@ -11,7 +11,6 @@ import { handleSkillCommand } from './browser-skill-commands';
import { validateNavigationUrl } from './url-validation';
import { checkScope, type TokenInfo } from './token-registry';
import { validateOutputPath, validateReadPath, SAFE_DIRECTORIES, escapeRegExp } from './path-security';
import { guardScreenshotBuffer, guardScreenshotPath } from './screenshot-size-guard';
// Re-export for backward compatibility (tests import from meta-commands)
export { validateOutputPath, escapeRegExp } from './path-security';
import * as Diff from 'diff';
@ -137,7 +136,7 @@ function parsePdfArgs(args: string[]): ParsedPdfArgs {
return result;
}
export function parsePdfFromFile(payloadPath: string): ParsedPdfArgs {
function parsePdfFromFile(payloadPath: string): ParsedPdfArgs {
// Parity with load-html --from-file (browse/src/write-commands.ts) and
// the direct load-html <file> path: every caller-supplied file path
// must pass validateReadPath so the safe-dirs policy can't be skirted
@ -150,16 +149,7 @@ export function parsePdfFromFile(payloadPath: string): ParsedPdfArgs {
);
}
const raw = fs.readFileSync(payloadPath, 'utf8');
let json: any;
try {
json = JSON.parse(raw);
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
throw new Error(`pdf: --from-file ${payloadPath} is not valid JSON (${msg}).`);
}
if (json === null || typeof json !== 'object' || Array.isArray(json)) {
throw new Error(`pdf: --from-file ${payloadPath} must be a JSON object, got ${Array.isArray(json) ? 'array' : typeof json}.`);
}
const json = JSON.parse(raw);
const out: ParsedPdfArgs = {
output: json.output || `${TEMP_DIR}/browse-page.pdf`,
format: json.format,
@ -507,10 +497,6 @@ export async function handleMetaCommand(
buffer = await page.screenshot({ clip: clipRect });
} else {
buffer = await page.screenshot({ fullPage: !viewportOnly });
// Guard the most common API-bricking case (fullPage). Element /
// clip captures usually stay within the cap; we still guard the
// path-mode below for fullPage writes.
({ buffer } = await guardScreenshotBuffer(buffer));
}
if (buffer.length > 10 * 1024 * 1024) {
throw new Error('Screenshot too large for --base64 (>10MB). Use disk path instead.');
@ -531,7 +517,6 @@ export async function handleMetaCommand(
}
await page.screenshot({ path: outputPath, fullPage: !viewportOnly });
if (!viewportOnly) await guardScreenshotPath(outputPath);
return `Screenshot saved${viewportOnly ? ' (viewport)' : ''}: ${outputPath}`;
}
@ -582,7 +567,6 @@ export async function handleMetaCommand(
const screenshotPath = `${prefix}-${vp.name}.png`;
validateOutputPath(screenshotPath);
await page.screenshot({ path: screenshotPath, fullPage: true });
await guardScreenshotPath(screenshotPath);
results.push(`${vp.name} (${vp.width}x${vp.height}): ${screenshotPath}`);
}
@ -1161,13 +1145,6 @@ export async function handleMetaCommand(
return await handleCdpCommand(args, bm);
}
case 'memory': {
// Lazy import — pulls in cdp-bridge + memory-snapshot + buffer accessors
// that aren't useful for projects that never run the diagnostic.
const { handleMemoryCommand } = await import('./memory-command');
return await handleMemoryCommand(args, bm);
}
default:
throw new Error(`Unknown meta command: ${command}`);
}

View File

@ -1,137 +0,0 @@
/**
* PTY session lease registry (v1.44+).
*
* Separates two concerns that pre-v1.44 were conflated under one token:
*
* - **sessionId** stable, non-secret identifier for a single PTY session.
* Safe to log, safe to include in URLs and server access logs, safe to
* keep in DevTools. Identifies "this terminal," not "you're allowed to
* use this terminal."
*
* - **attachToken** secret, short-lived (30 s) bearer credential that
* grants the WS upgrade for ONE attach attempt against a session. Minted
* on every /pty-session and /pty-session/reattach call; revoked when
* the WS upgrade consumes it. Kept out of logs.
*
* - **lease** server-side bookkeeping that maps sessionId expiresAt.
* Re-attach within the lease window resumes the same PTY (and replays
* the ring buffer from terminal-agent). Lease expiry tears down the
* session.
*
* Codex outside-voice (T1 of the eng review) pushed for this separation:
* "the auth token IS the session id" collapsed identity into a secret,
* meaning re-attach URLs and logs carry the bearer credential. The lease
* model fixes that without changing the user experience.
*
* Mint cadence:
* - Initial /pty-session: mint sessionId + lease + attachToken (one round trip).
* - /pty-session/reattach: validate sessionId/lease, mint fresh attachToken.
* - /pty-restart: revoke old lease, mint fresh sessionId + lease + attachToken.
* - /pty-dispose: revoke lease (and the terminal-agent disposes the PTY).
*
* Lease TTL is env-overridable so v1.44 e2e tests can compress detach
* windows to 1 s instead of waiting 30 minutes per assertion.
*/
import * as crypto from 'crypto';
interface Lease {
createdAt: number;
expiresAt: number;
}
const LEASE_TTL_MS = parseInt(
process.env.GSTACK_PTY_LEASE_TTL_MS || `${30 * 60 * 1000}`,
10,
); // 30 minutes default; covers idle-but-engaged user sessions
const MAX_LEASES = 10_000;
const leases = new Map<string, Lease>();
/**
* Mint a fresh sessionId + lease. Returns the non-secret sessionId and
* the expiry timestamp (caller surfaces both to the client). Never throws.
*/
export function mintLease(): { sessionId: string; expiresAt: number } {
const sessionId = crypto.randomBytes(32).toString('base64url');
const now = Date.now();
const expiresAt = now + LEASE_TTL_MS;
leases.set(sessionId, { createdAt: now, expiresAt });
pruneExpired(now);
return { sessionId, expiresAt };
}
/**
* Check whether a lease is still valid (exists AND not expired). Returns
* the current expiresAt for valid leases; null otherwise. Lazily prunes
* stale entries.
*/
export function validateLease(sessionId: string | null | undefined): { ok: true; expiresAt: number } | { ok: false } {
if (!sessionId) return { ok: false };
const lease = leases.get(sessionId);
if (!lease) {
pruneExpired(Date.now());
return { ok: false };
}
if (Date.now() > lease.expiresAt) {
leases.delete(sessionId);
pruneExpired(Date.now());
return { ok: false };
}
return { ok: true, expiresAt: lease.expiresAt };
}
/**
* Extend the lease's expiresAt to `now + LEASE_TTL_MS`. Caller should
* gate refresh on `expiresAt - now < REFRESH_THRESHOLD` (D10 lazy
* refresh: avoid refreshing on every keepalive when the lease is
* comfortably far from expiry).
*
* Returns `{ ok: true, expiresAt }` on success, `{ ok: false }` if the
* lease is unknown or already expired (the agent must close the WS and
* surface auth-invalid). Critical security invariant: never resurrect
* an expired lease the 30-min TTL is what bounds blast radius for a
* leaked attach token whose lease should have been GC'd.
*/
export function refreshLease(sessionId: string | null | undefined): { ok: true; expiresAt: number } | { ok: false } {
if (!sessionId) return { ok: false };
const lease = leases.get(sessionId);
if (!lease) return { ok: false };
const now = Date.now();
if (now > lease.expiresAt) {
leases.delete(sessionId);
return { ok: false };
}
lease.expiresAt = now + LEASE_TTL_MS;
return { ok: true, expiresAt: lease.expiresAt };
}
/**
* Drop a lease. Called on explicit dispose (/pty-dispose, /pty-restart,
* WS close with code 4001) and on session timeout in terminal-agent.
*/
export function revokeLease(sessionId: string | null | undefined): void {
if (!sessionId) return;
leases.delete(sessionId);
}
/** Returns the lease count — test + observability helper. */
export function leaseCount(): number {
return leases.size;
}
/** Test-only reset. */
export function __resetLeases(): void {
leases.clear();
}
function pruneExpired(now: number): void {
let checked = 0;
for (const [sessionId, lease] of leases) {
if (checked++ >= 20) break;
if (lease.expiresAt <= now) leases.delete(sessionId);
}
while (leases.size > MAX_LEASES) {
const first = leases.keys().next().value;
if (!first) break;
leases.delete(first);
}
}

View File

@ -1,106 +0,0 @@
/**
* Screenshot size guard keep full-page screenshots 2000px max-dim.
*
* The Anthropic vision API rejects images whose longest dimension exceeds
* 2000 image-pixels (post deviceScaleFactor). Full-page screenshots of long
* pages routinely exceed that, silently bricking the session: the agent
* burns turns on a base64 blob that errors model-side with no useful
* stderr surfacing on the browse side.
*
* This module centralizes the "after page.screenshot, check dimensions and
* downscale if too big" path so every full-page caller in browse/src can
* share the same enforcement. The cap is image-pixels, not CSS pixels,
* matching the Anthropic API's own threshold.
*
* Used by: snapshot.ts (annotated, heatmap), meta-commands.ts (screenshot),
* write-commands.ts (prettyscreenshot). See test/snapshot-meta-write-guard.test.ts.
*
* Closes #1214.
*/
import { writeFileSync, readFileSync } from "fs";
const MAX_DIMENSION_PX = 2000;
export interface SizeGuardResult {
/** True if the input image exceeded MAX_DIMENSION_PX and was downscaled. */
resized: boolean;
/** Final width and height (pixels) of the image as written/returned. */
width: number;
height: number;
/** Original dimensions before any downscale. */
originalWidth: number;
originalHeight: number;
}
/**
* Inspect an image buffer and downscale if its longest side exceeds the
* 2000px Anthropic vision API cap. Preserves aspect ratio. Encodes back
* to PNG. Returns the resulting buffer plus a diagnostic shape.
*
* Imports sharp lazily so the module load cost only hits screenshot paths
* (sharp's native binding is non-trivial to initialize).
*/
export async function guardScreenshotBuffer(input: Buffer): Promise<{ buffer: Buffer; result: SizeGuardResult }> {
const sharpModule = await import("sharp");
const sharp = sharpModule.default ?? sharpModule;
const image = sharp(input);
const metadata = await image.metadata();
const width = metadata.width ?? 0;
const height = metadata.height ?? 0;
const longest = Math.max(width, height);
if (longest <= MAX_DIMENSION_PX) {
return {
buffer: input,
result: {
resized: false,
width,
height,
originalWidth: width,
originalHeight: height,
},
};
}
const scale = MAX_DIMENSION_PX / longest;
const newWidth = Math.round(width * scale);
const newHeight = Math.round(height * scale);
const resized = await image
.resize(newWidth, newHeight, { fit: "inside" })
.png()
.toBuffer();
process.stderr.write(
`[screenshot-size-guard] image ${width}x${height} exceeded ${MAX_DIMENSION_PX}px max-dim; ` +
`downscaled to ${newWidth}x${newHeight} to fit Anthropic vision API\n`,
);
return {
buffer: resized,
result: {
resized: true,
width: newWidth,
height: newHeight,
originalWidth: width,
originalHeight: height,
},
};
}
/**
* File-mode variant: read the image at the given path, downscale if
* needed, and write the result back to the same path. Returns the
* diagnostic shape. Use this after `await page.screenshot({ path, ... })`.
*/
export async function guardScreenshotPath(filePath: string): Promise<SizeGuardResult> {
const input = readFileSync(filePath);
const { buffer, result } = await guardScreenshotBuffer(input);
if (result.resized) {
writeFileSync(filePath, buffer);
}
return result;
}
export const SCREENSHOT_MAX_DIMENSION_PX = MAX_DIMENSION_PX;

View File

@ -135,7 +135,7 @@ export function getClassifierStatus(): ClassifierStatus {
// ─── Model download + staging ────────────────────────────────
export async function downloadFile(url: string, dest: string): Promise<void> {
async function downloadFile(url: string, dest: string): Promise<void> {
const res = await fetch(url);
if (!res.ok || !res.body) {
throw new Error(`Failed to fetch ${url}: ${res.status} ${res.statusText}`);
@ -144,30 +144,16 @@ export async function downloadFile(url: string, dest: string): Promise<void> {
const writer = fs.createWriteStream(tmp);
// @ts-ignore — Node stream compat
const reader = res.body.getReader();
try {
let done = false;
while (!done) {
const chunk = await reader.read();
if (chunk.done) { done = true; break; }
writer.write(chunk.value);
}
await new Promise<void>((resolve, reject) => {
writer.end((err?: Error | null) => (err ? reject(err) : resolve()));
});
fs.renameSync(tmp, dest);
} catch (err) {
// Drop the half-written tmp so we don't ship a truncated model file to
// a retry's renameSync. Wait for the writer to close fully before
// unlinking: Node's createWriteStream lazily opens the FD and flushes
// buffered writes during destroy(), so a naive unlinkSync hits ENOENT
// first and the writer re-creates the file on the next tick.
await new Promise<void>((resolve) => {
writer.once('close', () => resolve());
writer.destroy();
});
try { fs.unlinkSync(tmp); } catch { /* nothing to clean */ }
throw err;
let done = false;
while (!done) {
const chunk = await reader.read();
if (chunk.done) { done = true; break; }
writer.write(chunk.value);
}
await new Promise<void>((resolve, reject) => {
writer.end((err?: Error | null) => (err ? reject(err) : resolve()));
});
fs.renameSync(tmp, dest);
}
async function ensureTestsavantStaged(onProgress?: (msg: string) => void): Promise<void> {

View File

@ -1,231 +0,0 @@
/**
* Security sidecar client IPC layer for the Node L4 classifier subprocess.
*
* Spawn model: lazy. First call to scan() spawns the sidecar, warms it (the
* sidecar's loadTestsavant call on first scan-page-content), and reuses
* the same process for every subsequent scan. The process dies when the
* browse server exits (Node's stdin-close behavior).
*
* Reliability:
* - 5s default timeout per scan. Caller can override per-call.
* - 64KB request cap. Larger payloads short-circuit with `payload-too-large`.
* - Respawn capped at 3 failures within 10 minutes; further failures
* trip a circuit breaker that returns `available: false` until reset.
* - Parent-exit cleanup: process.on('exit') sends SIGTERM to the child.
*
* Failure semantics:
* - Node not on PATH available() returns false; caller (the
* /pty-inject-scan endpoint) returns l4: { available: false } and the
* extension degrades to WARN + user confirm.
* - Scan throws or times out caller treats as L4-unavailable for that
* request and falls through to L1-L3-only verdict.
*
* Single-process singleton. Multiple callers within the same browse
* process share one sidecar.
*/
import { ChildProcessByStdio, spawn } from "child_process";
import { Readable, Writable } from "stream";
import { findSecuritySidecar } from "./find-security-sidecar";
const REQUEST_CAP_BYTES = 64 * 1024;
const DEFAULT_TIMEOUT_MS = 5000;
const RESPAWN_WINDOW_MS = 10 * 60 * 1000;
const RESPAWN_LIMIT = 3;
interface PendingRequest {
resolve: (response: unknown) => void;
reject: (err: Error) => void;
timer: ReturnType<typeof setTimeout>;
}
interface SidecarState {
child: ChildProcessByStdio<Writable, Readable, Readable> | null;
pending: Map<string, PendingRequest>;
buffer: string;
failures: number[]; // timestamps of recent failures
available: boolean;
/** True after circuit-breaker tripped; stays true until reset() */
brokenCircuit: boolean;
nextId: number;
}
let state: SidecarState | null = null;
function getState(): SidecarState {
if (!state) {
state = {
child: null,
pending: new Map(),
buffer: "",
failures: [],
available: true,
brokenCircuit: false,
nextId: 1,
};
}
return state;
}
function recordFailure(): void {
const s = getState();
const now = Date.now();
s.failures = s.failures.filter((t) => now - t < RESPAWN_WINDOW_MS);
s.failures.push(now);
if (s.failures.length >= RESPAWN_LIMIT) {
s.brokenCircuit = true;
s.available = false;
}
}
function processBuffer(): void {
const s = getState();
let idx = s.buffer.indexOf("\n");
while (idx !== -1) {
const line = s.buffer.slice(0, idx).trim();
s.buffer = s.buffer.slice(idx + 1);
idx = s.buffer.indexOf("\n");
if (!line) continue;
let parsed: { id?: string; ok?: boolean; verdict?: unknown; status?: unknown; error?: string };
try {
parsed = JSON.parse(line);
} catch {
// Malformed line — record as failure but don't reject any specific
// pending request (we don't know which one this was meant for).
recordFailure();
continue;
}
const id = typeof parsed.id === "string" ? parsed.id : null;
if (!id) continue;
const pending = s.pending.get(id);
if (!pending) continue;
s.pending.delete(id);
clearTimeout(pending.timer);
if (parsed.ok) {
pending.resolve(parsed);
} else {
recordFailure();
pending.reject(new Error(parsed.error ?? "sidecar-error"));
}
}
}
function shutdownChild(): void {
const s = getState();
if (!s.child) return;
try {
s.child.kill("SIGTERM");
} catch {
// Already dead.
}
s.child = null;
for (const [, p] of s.pending) {
clearTimeout(p.timer);
p.reject(new Error("sidecar-died"));
}
s.pending.clear();
}
function spawnSidecar(): boolean {
const s = getState();
if (s.brokenCircuit) return false;
const location = findSecuritySidecar();
if (!location) {
s.available = false;
return false;
}
try {
const child = spawn(location.node, [location.entry], {
stdio: ["pipe", "pipe", "pipe"],
detached: false,
});
child.stdout.on("data", (chunk: Buffer) => {
s.buffer += chunk.toString("utf-8");
processBuffer();
});
child.on("exit", () => {
shutdownChild();
});
child.on("error", () => {
recordFailure();
shutdownChild();
});
s.child = child;
s.available = true;
return true;
} catch {
recordFailure();
return false;
}
}
// Best-effort parent-exit cleanup. Node's "exit" event blocks async work, so
// we send SIGTERM synchronously and let the OS reap the child.
process.on("exit", () => shutdownChild());
export interface SidecarAvailability {
available: boolean;
reason?: string;
}
export function isSidecarAvailable(): SidecarAvailability {
const s = getState();
if (s.brokenCircuit) return { available: false, reason: "circuit-broken" };
if (s.child) return { available: true };
// Probe via findSecuritySidecar without spawning. If the resolver returns
// null (no node on PATH, no entry on disk), we're permanently unavailable
// until a setup re-run.
const location = findSecuritySidecar();
if (!location) return { available: false, reason: "no-node-or-entry" };
return { available: true };
}
export async function scanWithSidecar(text: string, opts?: { timeoutMs?: number }): Promise<{ verdict: unknown }> {
const s = getState();
if (s.brokenCircuit) {
throw new Error("sidecar-circuit-broken");
}
if (Buffer.byteLength(text, "utf-8") > REQUEST_CAP_BYTES) {
throw new Error("payload-too-large");
}
if (!s.child) {
if (!spawnSidecar()) {
throw new Error("sidecar-spawn-failed");
}
}
const id = String(s.nextId++);
const timeoutMs = opts?.timeoutMs ?? DEFAULT_TIMEOUT_MS;
return new Promise((resolve, reject) => {
const timer = setTimeout(() => {
s.pending.delete(id);
recordFailure();
reject(new Error("sidecar-timeout"));
}, timeoutMs);
s.pending.set(id, {
resolve: (response: unknown) => {
const r = response as { verdict?: unknown };
resolve({ verdict: r.verdict });
},
reject,
timer,
});
const payload = JSON.stringify({ id, op: "scan-page-content", text }) + "\n";
try {
s.child!.stdin.write(payload);
} catch (err) {
clearTimeout(timer);
s.pending.delete(id);
recordFailure();
reject(err instanceof Error ? err : new Error(String(err)));
}
});
}
/** Reset the circuit breaker. Test-only escape hatch. */
export function resetSidecarForTests(): void {
shutdownChild();
state = null;
}

View File

@ -1,120 +0,0 @@
/**
* Security sidecar entry Node script that hosts the L4 ML classifier on
* behalf of the compiled browse server.
*
* Why a sidecar:
* - browse/src/security-classifier.ts depends on @huggingface/transformers
* which loads onnxruntime-node, a native module that fails to `dlopen`
* from Bun's compile-binary temp extraction dir (CLAUDE.md "Sidebar
* security stack" section). Importing the classifier into server.ts
* would brick the compiled binary at startup.
* - sidebar-agent.ts (the previous host of the classifier) was removed
* when the PTY proved out. The classifier file still ships but had no
* caller exactly the gap codex flagged in #1370.
*
* This entry runs under plain Node (resolved by find-security-sidecar.ts).
* It reads NDJSON requests from stdin and writes NDJSON responses to stdout.
*
* Protocol (one JSON object per line, both directions):
* request: { id: string, op: "scan-page-content" | "ping", text?: string }
* response: { id: string, ok: true, verdict: LayerSignal } |
* { id: string, ok: false, error: string }
*
* Lifecycle:
* - Spawned lazily by security-sidecar-client.ts on first /pty-inject-scan
* - Exits when stdin closes (parent gone) standard Node behavior
* - Exits on SIGTERM cleanly
*
* Failure modes:
* - Model download fails reply { ok: false, error: "model-load" } and
* keep the loop alive for the next request (caller decides whether to
* retry or fail-safe to L1-L3-only)
*/
import * as readline from "readline";
import { scanPageContent, getClassifierStatus, loadTestsavant } from "./security-classifier";
interface Request {
id: string;
op: "scan-page-content" | "ping" | "status";
text?: string;
}
interface OkResponse {
id: string;
ok: true;
verdict?: unknown;
status?: unknown;
}
interface ErrResponse {
id: string;
ok: false;
error: string;
}
function write(obj: OkResponse | ErrResponse): void {
process.stdout.write(JSON.stringify(obj) + "\n");
}
async function handle(req: Request): Promise<void> {
if (!req || typeof req.id !== "string") {
// Drop unidentifiable requests silently — protocol invariant.
return;
}
try {
if (req.op === "ping") {
write({ id: req.id, ok: true, verdict: { layer: "ping", verdict: "alive", score: 0 } });
return;
}
if (req.op === "status") {
write({ id: req.id, ok: true, status: getClassifierStatus() });
return;
}
if (req.op === "scan-page-content") {
if (typeof req.text !== "string") {
write({ id: req.id, ok: false, error: "missing-text" });
return;
}
// Warm the classifier once per process; subsequent scans are fast.
await loadTestsavant().catch(() => {
// loadTestsavant degrades gracefully; scanPageContent below will
// return a fail-open verdict if the model never loaded.
});
const verdict = await scanPageContent(req.text);
write({ id: req.id, ok: true, verdict });
return;
}
write({ id: req.id, ok: false, error: `unknown-op:${(req as { op?: unknown }).op}` });
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
write({ id: req.id, ok: false, error: msg });
}
}
function main(): void {
// readline buffers stdin into one-line chunks. Stay alive until stdin
// closes (parent gone) — Node exits naturally then.
const rl = readline.createInterface({ input: process.stdin });
rl.on("line", (line) => {
if (!line.trim()) return;
let req: Request;
try {
req = JSON.parse(line) as Request;
} catch {
// Malformed line — write a generic error without an id, callers can
// detect via missing id and trip the circuit breaker.
write({ id: "<malformed>", ok: false, error: "malformed-json" });
return;
}
// Fire-and-forget; concurrent requests get id-correlated responses.
void handle(req);
});
rl.on("close", () => {
process.exit(0);
});
process.on("SIGTERM", () => process.exit(0));
process.on("SIGINT", () => process.exit(0));
}
main();

File diff suppressed because it is too large Load Diff

View File

@ -23,7 +23,6 @@ import * as Diff from 'diff';
import { TEMP_DIR, isPathWithin } from './platform';
import { escapeEnvelopeSentinels } from './content-security';
import { stripLoneSurrogates } from './sanitize';
import { guardScreenshotPath } from './screenshot-size-guard';
// Roles considered "interactive" for the -i flag
const INTERACTIVE_ROLES = new Set([
@ -419,7 +418,6 @@ export async function handleSnapshot(
}, boxes);
await page.screenshot({ path: screenshotPath, fullPage: true });
await guardScreenshotPath(screenshotPath);
// Always remove overlays
await page.evaluate(() => {
@ -540,7 +538,6 @@ export async function handleSnapshot(
}, boxes);
await page.screenshot({ path: heatmapPath, fullPage: true });
await guardScreenshotPath(heatmapPath);
// Remove heatmap overlays
await page.evaluate(() => {

View File

@ -1,154 +0,0 @@
// SSE endpoint helper — shared cleanup contract for stream endpoints.
//
// Pre-helper, /activity/stream and /inspector/events implemented the same
// pattern in parallel and both leaked subscribers when enqueue failed
// without a corresponding abort signal (e.g. Chromium MV3 service-worker
// suspend dropped the TCP without an abort edge). The subscriber closure
// stayed in the Set, capturing the ReadableStreamDefaultController plus
// any payloads queued behind it. Over a multi-day sidebar session this
// compounded into multi-MB of retained controllers per dead connection.
//
// Centralizing the cleanup contract here means any future SSE endpoint
// inherits the invariant — cleanup runs on abort, enqueue failure, AND
// heartbeat failure, exactly once, regardless of which edge fires first.
import { stripLoneSurrogates } from './sanitize';
/**
* JSON.stringify replacer that strips lone UTF-16 surrogates from string
* values before they get escape-encoded. Pair with stringify when the
* consumer will JSON.parse the payload back into JS strings (SSE clients
* do this). Required at every SSE egress that ships page-content-derived
* fields see CLAUDE.md "Unicode sanitization at server egress".
*/
function sanitizeReplacer(_key: string, value: unknown): unknown {
return typeof value === 'string' ? stripLoneSurrogates(value) : value;
}
/** Send an SSE event. Handles JSON encoding + lone-surrogate sanitization. */
export type SseSender = (event: string, data: unknown) => void;
export interface SseEndpointConfig<T> {
/**
* Optional. Runs once after the stream opens, before subscribing for live
* events. Use for initial event replay (activity gap detection, history
* burst) or a current-state snapshot (inspector). The `send` helper
* handles JSON encoding with sanitizeReplacer and SSE framing; pass
* any event name and any payload object.
*/
initialReplay?: (send: SseSender) => void;
/**
* Subscribe to the live event source. Receives a `notify` callback;
* returns an unsubscribe function. The callback routes through the
* helper's safeEnqueue + cleanup-on-throw, so a dead consumer ends up
* removed from the subscriber set on the very next event (instead of
* waiting for an abort that may never fire).
*/
subscribe: (notify: (entry: T) => void) => () => void;
/**
* SSE event name for live events. `data: <JSON.stringify(entry)>\n\n`
* is wrapped automatically. /activity/stream uses 'activity';
* /inspector/events uses 'inspector'.
*/
liveEventName: string;
/** Heartbeat interval in ms. Default: 15000. */
heartbeatMs?: number;
}
/**
* Build a streaming Response that owns the cleanup contract:
* - safeEnqueue catches enqueue throws cleanup
* - 15s heartbeat catches dead peers; failure cleanup
* - req.signal abort cleanup
* - cleanup is idempotent (clearInterval + unsubscribe + try close)
*/
export function createSseEndpoint<T>(
req: Request,
config: SseEndpointConfig<T>,
): Response {
const heartbeatMs = config.heartbeatMs ?? 15000;
const encoder = new TextEncoder();
const stream = new ReadableStream({
start(controller) {
let cleanedUp = false;
let heartbeat: ReturnType<typeof setInterval> | null = null;
let unsubscribe: (() => void) | null = null;
const cleanup = (): void => {
if (cleanedUp) return;
cleanedUp = true;
if (heartbeat !== null) {
clearInterval(heartbeat);
heartbeat = null;
}
if (unsubscribe !== null) {
unsubscribe();
unsubscribe = null;
}
try {
controller.close();
} catch {
// Expected: stream already closed by the consumer.
}
};
const send: SseSender = (event, data) => {
if (cleanedUp) return;
try {
controller.enqueue(
encoder.encode(
`event: ${event}\ndata: ${JSON.stringify(data, sanitizeReplacer)}\n\n`,
),
);
} catch {
// Consumer disconnected mid-write. Tear down so this subscriber
// doesn't sit in the set forever.
cleanup();
}
};
// Initial replay (caller-provided).
if (config.initialReplay) {
try {
config.initialReplay(send);
} catch {
cleanup();
return;
}
if (cleanedUp) return;
}
// Subscribe for live events.
unsubscribe = config.subscribe((entry) => {
send(config.liveEventName, entry);
});
// Heartbeat keeps NAT boxes and proxies from dropping idle SSE,
// and serves as a liveness probe: an enqueue failure here is the
// cheapest way to learn the consumer is gone without waiting for
// an abort signal that may never arrive.
heartbeat = setInterval(() => {
if (cleanedUp) return;
try {
controller.enqueue(encoder.encode(`: heartbeat\n\n`));
} catch {
cleanup();
}
}, heartbeatMs);
req.signal.addEventListener('abort', cleanup);
},
});
return new Response(stream, {
headers: {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
},
});
}

View File

@ -1,200 +1,39 @@
/**
* Stealth init scripts anti-bot detection countermeasures.
* Stealth init script webdriver-mask only (D7, codex narrowed).
*
* Two modes:
* Modern anti-bot fingerprinters check consistency between navigator
* properties (plugins.length, languages, userAgent, platform). Faking those
* to fixed values (the wintermute approach) can flag MORE bot-like, not
* less, and breaks legitimate sites that reflect on these properties.
*
* 1. DEFAULT (consistency-first, always on): masks navigator.webdriver
* and adds --disable-blink-features=AutomationControlled. This is
* the original "codex narrowed" minimum that preserves fingerprint
* consistency letting plugins/languages/chrome.runtime surface
* native Chromium values keeps the fingerprint internally coherent.
*
* 2. EXTENDED (opt-in via GSTACK_STEALTH=extended): six additional
* detection-vector patches on top of the default. Closes the
* SannySoft test corpus to a 100% pass rate. Originally proposed in
* PR #1112 (garrytan, Apr 2026).
*
* Vectors patched in extended mode:
* - navigator.webdriver property fully deleted from prototype
* (not just `false` detectors check `"webdriver" in navigator`)
* - WebGL renderer spoofed to a plausible Apple M1 Pro string
* (SwiftShader was the #1 software-GPU giveaway in containers)
* - navigator.plugins returns a real PluginArray with proper
* MimeType objects and namedItem() `instanceof PluginArray`
* passes
* - window.chrome populated with chrome.app, chrome.runtime,
* chrome.loadTimes(), chrome.csi() with correct shapes
* - navigator.mediaDevices present (some headless builds drop it)
* - CDP cdc_* property names cleared from window
*
* Trade-off: extended mode actively LIES about the browser
* environment. Sites that reflect on these properties can break or
* misbehave. Use only when the default mode triggers detection AND
* the target is anti-bot-protected. Not recommended as a global
* default.
* The honest minimum is masking navigator.webdriver, which Chromium exposes
* as a known automation tell. Letting plugins/languages/chrome.runtime
* surface their native Chromium values keeps the fingerprint internally
* consistent.
*/
import type { BrowserContext } from 'playwright';
import type { Browser, BrowserContext } from 'playwright';
/**
* Always-on default mask: navigator.webdriver returns false. Modern
* fingerprinters check the property accessor, so a one-line getter is
* sufficient when consistency with the rest of the navigator surface is
* preserved.
* Init script applied to every page in a context. Runs in the page's main
* world before any other scripts. Idempotent defining the same property
* twice in different contexts is fine.
*/
export const WEBDRIVER_MASK_SCRIPT = `Object.defineProperty(navigator, 'webdriver', { get: () => false });`;
/**
* Extended-mode init script six detection-vector patches. Applied
* AFTER the default mask, so the property-getter version remains in
* place if any of the deletion paths fail.
*
* Self-contained string so it can be passed to addInitScript({ content })
* without bundling concerns.
*/
export const EXTENDED_STEALTH_SCRIPT = `
(() => {
try {
// 1. Fully delete navigator.webdriver from the prototype so
// \`"webdriver" in navigator\` returns false (not just falsy).
delete Object.getPrototypeOf(navigator).webdriver;
} catch {}
try {
// 2. WebGL renderer spoof — SwiftShader is the canonical software-GPU
// tell. Spoof to a plausible Apple M1 Pro string.
const getParameter = WebGLRenderingContext.prototype.getParameter;
WebGLRenderingContext.prototype.getParameter = function (parameter) {
// UNMASKED_VENDOR_WEBGL (37445) → 'Apple Inc.'
if (parameter === 37445) return 'Apple Inc.';
// UNMASKED_RENDERER_WEBGL (37446) → realistic Apple silicon string
if (parameter === 37446) return 'Apple M1 Pro, OpenGL 4.1';
return getParameter.call(this, parameter);
};
} catch {}
try {
// 3. navigator.plugins: real PluginArray with MimeType objects.
const makePlugin = (name, filename, desc, mimes) => {
const p = Object.create(Plugin.prototype);
Object.defineProperties(p, {
name: { get: () => name },
filename: { get: () => filename },
description: { get: () => desc },
length: { get: () => mimes.length },
});
mimes.forEach((m, i) => { p[i] = m; });
p.item = (i) => mimes[i];
p.namedItem = (n) => mimes.find((m) => m.type === n);
return p;
};
const makeMime = (type, suffixes, desc) => {
const m = Object.create(MimeType.prototype);
Object.defineProperties(m, {
type: { get: () => type },
suffixes: { get: () => suffixes },
description: { get: () => desc },
});
return m;
};
const pdfMime = makeMime('application/pdf', 'pdf', '');
const cpdfMime = makeMime('application/x-google-chrome-pdf', 'pdf', 'Portable Document Format');
const plugins = [
makePlugin('PDF Viewer', 'internal-pdf-viewer', '', [pdfMime]),
makePlugin('Chrome PDF Viewer', 'internal-pdf-viewer', '', [cpdfMime]),
makePlugin('Chromium PDF Viewer', 'internal-pdf-viewer', '', [cpdfMime]),
];
Object.defineProperty(navigator, 'plugins', {
get: () => {
const arr = Object.create(PluginArray.prototype);
Object.defineProperty(arr, 'length', { get: () => plugins.length });
plugins.forEach((p, i) => { arr[i] = p; });
arr.item = (i) => plugins[i];
arr.namedItem = (n) => plugins.find((p) => p.name === n);
arr.refresh = () => {};
return arr;
},
});
} catch {}
try {
// 4. window.chrome shape — chrome.app + chrome.runtime + loadTimes/csi.
if (!window.chrome) {
window.chrome = {};
}
if (!window.chrome.runtime) {
window.chrome.runtime = { OnInstalledReason: {}, OnRestartRequiredReason: {} };
}
if (!window.chrome.app) {
window.chrome.app = {
isInstalled: false,
InstallState: { DISABLED: 'disabled', INSTALLED: 'installed', NOT_INSTALLED: 'not_installed' },
RunningState: { CANNOT_RUN: 'cannot_run', READY_TO_RUN: 'ready_to_run', RUNNING: 'running' },
};
}
if (!window.chrome.loadTimes) {
window.chrome.loadTimes = function () {
return { commitLoadTime: Date.now() / 1000, finishLoadTime: Date.now() / 1000 };
};
}
if (!window.chrome.csi) {
window.chrome.csi = function () {
return { startE: Date.now(), onloadT: Date.now(), pageT: 0, tran: 15 };
};
}
} catch {}
try {
// 5. mediaDevices — some headless builds drop it entirely.
if (!navigator.mediaDevices) {
Object.defineProperty(navigator, 'mediaDevices', {
get: () => ({ enumerateDevices: () => Promise.resolve([]) }),
});
}
} catch {}
try {
// 6. CDP cdc_* property cleanup. Chromium under CDP sets cdc_*-prefixed
// globals (driver injection markers); a bot detector finds them by
// iterating window keys. Strip all matching keys.
for (const k of Object.keys(window)) {
if (k.startsWith('cdc_')) {
try { delete window[k]; } catch {}
}
}
} catch {}
})();
`;
function extendedModeEnabled(): boolean {
const v = process.env.GSTACK_STEALTH;
return v === 'extended' || v === '1' || v === 'true';
}
/**
* Apply stealth patches to a fresh BrowserContext (or persistent
* context). Called by browser-manager.launch() and launchHeaded().
* Always applies the WEBDRIVER_MASK_SCRIPT; only applies the
* EXTENDED_STEALTH_SCRIPT when GSTACK_STEALTH=extended.
* Apply stealth patches to a fresh BrowserContext (or persistent context).
* Called by browser-manager.launch() and launchHeaded().
*/
export async function applyStealth(context: BrowserContext): Promise<void> {
await context.addInitScript({ content: WEBDRIVER_MASK_SCRIPT });
if (extendedModeEnabled()) {
await context.addInitScript({ content: EXTENDED_STEALTH_SCRIPT });
}
}
/**
* Args added to chromium.launch's `args` to suppress the
* AutomationControlled blink feature. This is independent of the init
* script it changes how Chromium identifies itself in the protocol
* layer.
* script it changes how Chromium identifies itself in the protocol layer.
*/
export const STEALTH_LAUNCH_ARGS = [
'--disable-blink-features=AutomationControlled',
];
/** Test-only helper: report whether extended mode is currently active. */
export function isExtendedStealthEnabled(): boolean {
return extendedModeEnabled();
}

View File

@ -1,143 +0,0 @@
/**
* terminal-agent process-control primitives shared by cli.ts spawn site,
* server.ts shutdown teardown, and the v1.44 watchdog/respawn loop.
*
* Why this exists: pre-v1.44 used `pkill -f terminal-agent\.ts`, which
* matches any process whose argv contains the string and would kill
* sibling gstack sessions on the same host. The agent now writes a
* structured `terminal-agent-pid` record (`{pid, gen, startedAt}`) and
* every kill site routes through `killAgentByRecord` here identity-based,
* no regex.
*
* The `gen` field is a per-boot generation counter. Loopback /internal/*
* calls from the parent server include `X-Browse-Gen` so a slow agent that
* the watchdog respawned around can't accidentally service a stale grant
* from the old generation.
*/
import * as fs from 'fs';
import * as path from 'path';
import { safeUnlink, safeKill, isProcessAlive } from './error-handling';
import { writeSecureFile, mkdirSecure } from './file-permissions';
/**
* Locate the terminal-agent script on disk. In dev (cli.ts running via
* `bun run`), it lives next to this file in browse/src. In a compiled
* binary, Bun's --compile bakes the source into the executable and
* exposes it relative to process.execPath. Either path must work or
* the agent can't be spawned at all.
*/
export function resolveTerminalAgentScript(searchHints: { metaDir?: string; execPath?: string } = {}): string | null {
const meta = searchHints.metaDir || __dirname;
const exec = searchHints.execPath || process.execPath;
const candidates = [
path.resolve(meta, 'terminal-agent.ts'),
path.resolve(path.dirname(exec), '..', 'src', 'terminal-agent.ts'),
];
for (const c of candidates) {
if (fs.existsSync(c)) return c;
}
return null;
}
/**
* Spawn a fresh terminal-agent as a detached child. Handles the standard
* three steps: kill any prior agent recorded at `<stateDir>/terminal-agent-pid`,
* clear the stale record, then `Bun.spawn(['bun', 'run', script], ...)` with
* env wiring. Returns the PID of the new agent on success, null when the
* agent script can't be located.
*
* Used by both the CLI cold-start path (cli.ts) and the v1.44 watchdog in
* server.ts. Centralizing here removes a copy-paste between them and means
* future spawn-env additions (e.g. BROWSE_OWNER_PID for the generation
* counter rollout) land in one place.
*/
export function spawnTerminalAgent(opts: {
stateFile: string;
serverPort: number;
cwd?: string;
/** Optional extra env vars to add to the agent's process env. */
extraEnv?: Record<string, string>;
/** Override script lookup for tests. */
scriptPath?: string;
}): number | null {
const stateDir = path.dirname(opts.stateFile);
const prior = readAgentRecord(stateDir);
if (prior) {
killAgentByRecord(prior, 'SIGTERM');
clearAgentRecord(stateDir);
}
const script = opts.scriptPath || resolveTerminalAgentScript();
if (!script || !fs.existsSync(script)) return null;
const proc = (Bun as any).spawn(['bun', 'run', script], {
cwd: opts.cwd || process.cwd(),
env: {
...process.env,
BROWSE_STATE_FILE: opts.stateFile,
BROWSE_SERVER_PORT: String(opts.serverPort),
...(opts.extraEnv || {}),
},
stdio: ['ignore', 'ignore', 'ignore'],
});
proc.unref?.();
return proc.pid ?? null;
}
export interface AgentRecord {
pid: number;
/** Random per-boot identifier. Loopback /internal/* sees X-Browse-Gen: <gen>. */
gen: string;
/** ms since epoch. Reserved for future PID-reuse guards. */
startedAt: number;
}
export function agentRecordPath(stateDir: string): string {
return path.join(stateDir, 'terminal-agent-pid');
}
/** Read the current record. Returns null on missing/malformed file. */
export function readAgentRecord(stateDir: string): AgentRecord | null {
try {
const raw = fs.readFileSync(agentRecordPath(stateDir), 'utf-8');
const j = JSON.parse(raw);
if (typeof j?.pid === 'number' && typeof j?.gen === 'string' && typeof j?.startedAt === 'number') {
return j as AgentRecord;
}
return null;
} catch {
return null;
}
}
/** Atomic write. Caller must ensure stateDir exists; agent does this at boot. */
export function writeAgentRecord(stateDir: string, record: AgentRecord): void {
try { mkdirSecure(stateDir); } catch {}
const target = agentRecordPath(stateDir);
const tmp = `${target}.tmp-${process.pid}`;
writeSecureFile(tmp, JSON.stringify(record));
fs.renameSync(tmp, target);
}
export function clearAgentRecord(stateDir: string): void {
safeUnlink(agentRecordPath(stateDir));
}
/**
* Kill the agent identified by `record`. Signal defaults to SIGTERM (give
* the agent a chance to run its own SIGTERM cleanup). Returns true if a
* signal was actually sent to a live PID; false if the PID was already
* dead (no-op). Never throws ESRCH is swallowed by safeKill.
*
* Validates liveness BEFORE signaling so a PID-reuse race (the recorded
* PID was reaped and a brand-new unrelated process now holds it) can't
* cause us to kill the wrong process. This is a best-effort defense:
* Linux/macOS don't expose process-start-time cheaply, and the gap
* between record-write and watchdog-tick is small (60s max).
*/
export function killAgentByRecord(
record: AgentRecord,
signal: NodeJS.Signals = 'SIGTERM',
): boolean {
if (!isProcessAlive(record.pid)) return false;
safeKill(record.pid, signal);
return true;
}

View File

@ -25,47 +25,16 @@ import * as path from 'path';
import * as crypto from 'crypto';
import { writeSecureFile, mkdirSecure } from './file-permissions';
import { safeUnlink } from './error-handling';
import { writeAgentRecord, clearAgentRecord } from './terminal-agent-control';
const STATE_FILE = process.env.BROWSE_STATE_FILE || path.join(process.env.HOME || '/tmp', '.gstack', 'browse.json');
const PORT_FILE = path.join(path.dirname(STATE_FILE), 'terminal-port');
const BROWSE_SERVER_PORT = parseInt(process.env.BROWSE_SERVER_PORT || '0', 10);
const EXTENSION_ID = process.env.BROWSE_EXTENSION_ID || ''; // optional: tighten Origin check
const INTERNAL_TOKEN = crypto.randomBytes(32).toString('base64url'); // shared with parent server via env at spawn
/**
* Per-boot generation identifier. Loopback /internal/* callers include
* `X-Browse-Gen: <CURRENT_GEN>` so a slow agent the watchdog respawned
* around can't service a stale grant from the prior generation. Absent
* header means "legacy caller" and is accepted (backward compat); a
* present-but-mismatched header returns 409 stale generation.
*/
const CURRENT_GEN = crypto.randomBytes(16).toString('base64url');
// In-memory attach-token registry. Parent posts /internal/grant after
// /pty-session; we validate WS upgrades against this map.
//
// v1.44+: each token is bound to a v1.44 sessionId (the stable, non-secret
// identifier from browse/src/pty-session-lease.ts). The token grants ONE
// attach for ONE session — re-attach within the lease window comes through
// /pty-session/reattach, which mints a fresh token for the same sessionId.
//
// Legacy callers can still pass `{token}` without sessionId (the value
// stays null and the WS upgrade still works); those callers don't get
// re-attach because there's no stable identifier to match against.
const validTokens = new Map<string, string | null>(); // token → sessionId
/**
* Reverse index for re-attach lookups: sessionId live PtySession.
* Populated when a WS first attaches with a known sessionId; cleared when
* the session is disposed or the lease expires. Used by:
* - /ws upgrade: if the incoming attachToken maps to a sessionId that
* already has a live session, REPLACE its ws ref instead of spawning.
* - /internal/restart: enumerate by sessionId, dispose that one session.
*
* Kept separate from the WeakMap<ws,PtySession> so re-attach can find the
* session by id even after the original ws has gone.
*/
const sessionsById = new Map<string, PtySession>();
// In-memory cookie token registry. Parent posts /internal/grant after
// /pty-session; we validate WS cookies against this set.
const validTokens = new Set<string>();
// Active PTY session per WS. One terminal per connection. Codex finding #4:
// uncaught handlers below catch bugs in framing/cleanup so they don't kill
@ -77,154 +46,12 @@ process.on('unhandledRejection', (reason) => {
console.error('[terminal-agent] unhandledRejection:', reason);
});
export interface PtySession {
interface PtySession {
proc: any | null; // Bun.Subprocess once spawned
cols: number;
rows: number;
cookie: string;
/**
* Current attached websocket. Swapped on re-attach (Commit 3): when a new
* WS upgrade matches this session's sessionId, the old liveWs is gone
* and the new ws takes its place. The PTY on-data callback closes over
* `session`, not the original `ws`, so it always writes to the current
* liveWs (or skips the write when detached and liveWs is null).
*/
liveWs: any | null;
/**
* v1.44+ stable session identifier (from pty-session-lease). Null for
* legacy /internal/grant callers that didn't pass one. Used for
* targeted /internal/restart and Commit 3 re-attach lookups.
*/
sessionId: string | null;
spawned: boolean;
/**
* 25s server-side WS keepalive interval (v1.44+). Set in the WS `open`
* handler, cleared in `close`. We send `{type:"ping",ts}` text frames so
* NAT boxes, proxies, and Chrome's MV3 panel-suspend heuristics see the
* connection as active; the client either replies with `{type:"pong"}`
* or fires its own 25s `{type:"keepalive"}` cycle. Either path keeps
* the underlying TCP from being silently dropped.
*/
pingInterval: ReturnType<typeof setInterval> | null;
/**
* Commit 3 scrollback ring buffer. Each PTY write appends a frame; the
* total byte count is capped at RING_BUFFER_MAX_BYTES with oldest frames
* evicted first. On re-attach, the surviving frames are replayed as a
* single binary frame (prefixed with the v1.44 reset sequence) so the
* user sees their last screen of output. Frame boundaries preserve UTF-8
* + ANSI-CSI boundaries because each frame is the exact buffer that
* spawnClaude's on-data callback emitted.
*/
ringBuffer: Buffer[];
ringBufferBytes: number;
/**
* Tracks whether the PTY is currently in xterm alt-screen mode. claude's
* TUI enters alt-screen (CSI ?1049h) during tool calls and exits (CSI
* ?1049l) when returning to the main prompt. On re-attach, the replay
* prelude must re-enter alt-screen if the original PTY left it active,
* otherwise the replay renders against the main screen and the cursor
* + colors end up in the wrong place.
*/
altScreenActive: boolean;
/**
* Detach state machine (Commit 3). When the WS closes for a reason OTHER
* than the v1.44 intentional-restart code (4001), we keep the PtySession
* alive for the detach window (default 60s) so a re-attach within the
* window can resume the same PTY and replay the ring buffer. The timer
* disposes the session if no re-attach arrives in time.
*/
detached: boolean;
detachTimer: ReturnType<typeof setTimeout> | null;
}
/**
* WS keepalive interval. 25s is comfortably under the lowest common NAT
* idle timeout (typically 30-60s) and shorter than Chromium's WebSocket
* dead-peer threshold. Test-overridable via env so the v1.44 e2e tests
* can compress idle-window assertions to <1s without waiting half a
* minute per assertion.
*/
const KEEPALIVE_INTERVAL_MS = parseInt(
process.env.GSTACK_PTY_KEEPALIVE_INTERVAL_MS || '25000',
10,
);
/**
* Commit 3 scrollback ring buffer cap. 1 MB is enough for a full screen
* of dense claude output (including a recent tool result), small enough
* that a worst-case 10 detached sessions only cost ~10 MB of RSS.
* Env-overridable so e2e tests can verify eviction without writing 1 MB
* of fixture data per assertion.
*/
const RING_BUFFER_MAX_BYTES = parseInt(
process.env.GSTACK_PTY_RING_BUFFER_BYTES || `${1024 * 1024}`,
10,
);
/**
* Commit 3 detach window how long to keep a session alive after WS
* close (with any code other than 4001 intentional-restart) so a
* re-attach can resume the same PTY. 60s is long enough to cover a
* Chrome MV3 service-worker suspend cycle, a wifi blip, or a brief
* laptop sleep; short enough that genuinely-closed sessions don't
* stack up unbounded.
*/
const DETACH_WINDOW_MS = parseInt(
process.env.GSTACK_PTY_DETACH_WINDOW_MS || '60000',
10,
);
/**
* Append a frame to a session's ring buffer, evicting oldest frames if
* the total byte count exceeds RING_BUFFER_MAX_BYTES. Eviction is at
* frame boundaries (one PTY write = one frame), so we never cut a
* multi-byte UTF-8 sequence or a partial ANSI CSI in half claude's
* on-data callback emits coherent frames.
*
* Side effect: scans the appended chunk for alt-screen enter/exit
* sequences (CSI ?1049h / CSI ?1049l) and updates session.altScreenActive
* so the re-attach prelude knows whether to re-enter alt-screen.
*/
export function appendToRingBuffer(session: PtySession, frame: Buffer): void {
session.ringBuffer.push(frame);
session.ringBufferBytes += frame.length;
while (session.ringBufferBytes > RING_BUFFER_MAX_BYTES && session.ringBuffer.length > 1) {
const evicted = session.ringBuffer.shift()!;
session.ringBufferBytes -= evicted.length;
}
// Alt-screen tracking. Scan for the canonical xterm enter/exit pairs.
// We do this on every append (not just on attach) so the state is
// correct even if many frames have flowed since the last attach.
const ascii = frame.toString('latin1'); // single-byte view is enough — the codes are 7-bit ASCII
// Use lastIndexOf so trailing state wins when both appear in one frame
// (e.g., a quick tool-call open+close inside one render pass).
const enterIdx = ascii.lastIndexOf('\x1b[?1049h');
const exitIdx = ascii.lastIndexOf('\x1b[?1049l');
if (enterIdx >= 0 && enterIdx > exitIdx) session.altScreenActive = true;
else if (exitIdx >= 0 && exitIdx > enterIdx) session.altScreenActive = false;
}
/**
* Build the re-attach replay payload: server-side reset prelude + the
* accumulated ring buffer. The client side writes RIS (`\x1bc`) to xterm
* BEFORE feeding this payload in, so the layout is:
*
* 1. Client: `\x1bc` (RIS full reset, clears pre-blip xterm content)
* 2. Server: `\x1b[!p` (DECSTR soft reset re-defaults char attributes)
* 3. Server: optional `\x1b[?1049h` if we were in alt-screen at detach
* 4. Server: ring buffer contents, in append order
*
* The client coordinates the order by waiting for a `{type:"reattach-begin"}`
* text frame before treating the next binary frame as replay. That separation
* is what lets us prepend reset codes without clobbering the live stream
* that resumes immediately after.
*/
export function buildReplayPayload(session: PtySession): Buffer {
const parts: Buffer[] = [];
parts.push(Buffer.from('\x1b[!p'));
if (session.altScreenActive) parts.push(Buffer.from('\x1b[?1049h'));
for (const frame of session.ringBuffer) parts.push(frame);
return Buffer.concat(parts);
}
const sessions = new WeakMap<any, PtySession>(); // ws -> session
@ -374,118 +201,6 @@ function disposeSession(session: PtySession): void {
*
* Everything else returns 404. The listener binds 127.0.0.1 only.
*/
/**
* Validate a loopback /internal/* request. Returns null when the request
* is allowed; otherwise returns the Response to send back. Centralizes
* bearer auth + the v1.44 X-Browse-Gen generation check so adding a new
* /internal/* route is a one-liner.
*/
function checkInternalAuth(req: Request): Response | null {
const auth = req.headers.get('authorization');
if (auth !== `Bearer ${INTERNAL_TOKEN}`) {
return new Response('forbidden', { status: 403 });
}
const headerGen = req.headers.get('x-browse-gen');
if (headerGen && headerGen !== CURRENT_GEN) {
return new Response('stale generation', { status: 409 });
}
return null;
}
/**
* Wrap a JSON-bodied /internal/* handler with the standard bearer-auth +
* generation-check + json-parse + error-response boilerplate. The handler
* `fn` is called with the parsed body; whatever it returns is JSON-stringified
* into a 200 Response, or the handler can return a Response directly to
* customize status / headers. Throwing from `fn` collapses to a 400 "bad".
*
* Centralizing the dance kills the copy-paste pattern of bearer + gen check
* + req.json().then(...).catch(...) that every /internal/* route needs.
* New routes become a single call to internalHandler.
*/
async function internalHandler<T>(
req: Request,
fn: (body: any) => T | Promise<T> | Response | Promise<Response>,
): Promise<Response> {
const denied = checkInternalAuth(req);
if (denied) return denied;
let body: any;
try {
body = await req.json();
} catch {
return new Response('bad', { status: 400 });
}
try {
const result = await fn(body);
if (result instanceof Response) return result;
if (result === undefined || result === null) return new Response('ok');
return new Response(JSON.stringify(result), {
status: 200,
headers: { 'Content-Type': 'application/json' },
});
} catch {
return new Response('bad', { status: 400 });
}
}
/**
* Spawn the claude PTY for a session if it hasn't been spawned yet.
* Used by both the legacy binary-frame spawn trigger and the v1.44 explicit
* `{type:"start"}` text-frame trigger. Idempotent on `session.spawned`.
*
* Returns true if claude is now running, false if spawn failed (e.g. claude
* binary not on PATH). On failure, the caller is expected to have already
* surfaced the error to the client (or will via the next frame).
*/
function maybeSpawnPty(ws: any, session: PtySession): boolean {
if (session.spawned) return true;
session.spawned = true;
let leftover = Buffer.alloc(0);
const proc = spawnClaude(session.cols, session.rows, (chunk) => {
const combined = Buffer.concat([leftover, Buffer.from(chunk)]);
// UTF-8 boundary detection (issue #1272). Look back at most 3 bytes
// for the start of an incomplete multibyte sequence and defer it.
let safeEnd = combined.length;
for (let i = combined.length - 1; i >= Math.max(0, combined.length - 3); i--) {
const b = combined[i];
if ((b & 0x80) === 0) { safeEnd = i + 1; break; }
if ((b & 0xC0) === 0x80) continue;
const expected = (b & 0xE0) === 0xC0 ? 2 : (b & 0xF0) === 0xE0 ? 3 : 4;
safeEnd = (combined.length - i >= expected) ? combined.length : i;
break;
}
const flush = combined.slice(0, safeEnd);
leftover = combined.slice(safeEnd);
if (flush.length) {
// Always record into the ring buffer (Commit 3) so re-attach can
// replay. session.liveWs is what changes across re-attaches — we
// close over `session`, not the original `ws`, so the write always
// goes to whichever ws is currently attached (or is skipped when
// detached and liveWs is null).
appendToRingBuffer(session, flush);
if (session.liveWs) {
try { session.liveWs.sendBinary(flush); } catch {}
}
}
});
if (!proc) {
try {
ws.send(JSON.stringify({
type: 'error',
code: 'CLAUDE_NOT_FOUND',
message: 'claude CLI not on PATH. Install: https://docs.anthropic.com/en/docs/claude-code',
}));
ws.close(4404, 'claude not found');
} catch {}
return false;
}
session.proc = proc;
proc.exited?.then?.(() => {
try { session.liveWs?.close(1000, 'pty exited'); } catch {}
});
return true;
}
function buildServer() {
return Bun.serve({
hostname: '127.0.0.1',
@ -496,66 +211,29 @@ function buildServer() {
const url = new URL(req.url);
// /internal/grant — loopback-only handshake from parent server.
// v1.44+: accepts `{token, sessionId?}`. The sessionId binding lets
// the agent route re-attach attempts (same sessionId, fresh token)
// back to the same PtySession. Legacy callers passing just `{token}`
// still work — sessionId becomes null and re-attach is unavailable
// for that grant.
if (url.pathname === '/internal/grant' && req.method === 'POST') {
return internalHandler(req, (body) => {
const auth = req.headers.get('authorization');
if (auth !== `Bearer ${INTERNAL_TOKEN}`) {
return new Response('forbidden', { status: 403 });
}
return req.json().then((body: any) => {
if (typeof body?.token === 'string' && body.token.length > 16) {
const sid = typeof body?.sessionId === 'string' && body.sessionId.length > 0
? body.sessionId
: null;
validTokens.set(body.token, sid);
validTokens.add(body.token);
}
});
return new Response('ok');
}).catch(() => new Response('bad', { status: 400 }));
}
// /internal/revoke — drop a token (called on WS close or bootstrap reload)
if (url.pathname === '/internal/revoke' && req.method === 'POST') {
return internalHandler(req, (body) => {
const auth = req.headers.get('authorization');
if (auth !== `Bearer ${INTERNAL_TOKEN}`) {
return new Response('forbidden', { status: 403 });
}
return req.json().then((body: any) => {
if (typeof body?.token === 'string') validTokens.delete(body.token);
});
}
// /internal/restart — dispose the PtySession for a specific sessionId.
// Scoped to one caller (not enumerate-all). Server.ts /pty-restart
// posts here with the caller's sessionId; we kill ONLY that PTY,
// leaving any other live sidebar tabs untouched. Codex T2 of the
// eng review caught this gap — pre-spec the route would have
// disposed all sessions.
if (url.pathname === '/internal/restart' && req.method === 'POST') {
return internalHandler(req, (body) => {
const sid = typeof body?.sessionId === 'string' ? body.sessionId : null;
if (!sid) return { killed: 0 };
const session = sessionsById.get(sid);
if (!session) return { killed: 0 };
// Cancel any pending detach timer before disposal — otherwise it
// would fire later against an already-disposed session.
if (session.detachTimer) {
clearTimeout(session.detachTimer);
session.detachTimer = null;
}
disposeSession(session);
sessionsById.delete(sid);
return { killed: 1 };
});
}
// /internal/healthz — liveness probe used by the v1.44 watchdog.
// Returns this agent's pid + gen + active session count without
// touching claude binary lookup (which can fail for non-process
// reasons and isn't a useful liveness signal). GET — no body to parse,
// so it stays on the bare checkInternalAuth gate.
if (url.pathname === '/internal/healthz' && req.method === 'GET') {
const denied = checkInternalAuth(req);
if (denied) return denied;
return new Response(JSON.stringify({
pid: process.pid,
gen: CURRENT_GEN,
sessions: validTokens.size,
}), { status: 200, headers: { 'Content-Type': 'application/json' } });
return new Response('ok');
}).catch(() => new Response('bad', { status: 400 }));
}
// /claude-available — bootstrap card hits this when user clicks "I installed it".
@ -627,13 +305,8 @@ function buildServer() {
return new Response('unauthorized', { status: 401 });
}
// v1.44+: surface the token's sessionId binding to the upgraded ws.
// open() reads it via ws.data and registers the session in
// sessionsById so /internal/restart and (Commit 3) re-attach
// lookups can find it.
const sessionId = validTokens.get(token) ?? null;
const upgraded = server.upgrade(req, {
data: { cookie: token, sessionId },
data: { cookie: token },
// Echo the protocol back so the browser accepts the upgrade.
// Required when the client sends Sec-WebSocket-Protocol — the
// server MUST select one of the offered protocols, otherwise
@ -647,105 +320,22 @@ function buildServer() {
},
websocket: {
/**
* Spawn the claude PTY for `session` if it hasn't been spawned yet.
* Called from both message paths: the legacy binary-frame trigger
* (any keystroke) AND the v1.44 explicit `{type:"start"}` trigger
* (forceRestart sends this on every fresh WS to get an eager prompt
* without requiring the user to type). Idempotent a second call
* after `spawned: true` is a no-op.
*/
open(ws) {
const sessionId = (ws.data as any)?.sessionId ?? null;
const cookie = (ws.data as any)?.cookie || '';
// Commit 3 re-attach: if this sessionId already has a detached
// PtySession in sessionsById, REPLACE its liveWs ref and replay
// the ring buffer. The PTY process is unchanged — claude keeps
// running through the wifi blip / panel-suspend cycle.
if (sessionId) {
const existing = sessionsById.get(sessionId);
if (existing) {
if (existing.detachTimer) {
clearTimeout(existing.detachTimer);
existing.detachTimer = null;
}
existing.detached = false;
existing.liveWs = ws;
existing.cookie = cookie;
// Re-bind the WS-keyed map so resize/close/message handlers
// can still find this session via the new ws.
sessions.set(ws, existing);
// Restart keepalive on the new ws.
if (existing.pingInterval) clearInterval(existing.pingInterval);
existing.pingInterval = setInterval(() => {
try { ws.send(JSON.stringify({ type: 'ping', ts: Date.now() })); } catch {}
}, KEEPALIVE_INTERVAL_MS);
// Tell the client to prep its xterm (write RIS) before the
// replay binary arrives. Order matters — the binary frame
// immediately after this text frame IS the replay.
try { ws.send(JSON.stringify({ type: 'reattach-begin', sessionId })); } catch {}
try { ws.sendBinary(buildReplayPayload(existing)); } catch {}
return;
}
}
const session: PtySession = {
proc: null,
cols: 80,
rows: 24,
cookie,
liveWs: ws,
sessionId,
spawned: false,
pingInterval: null,
ringBuffer: [],
ringBufferBytes: 0,
altScreenActive: false,
detached: false,
detachTimer: null,
};
session.pingInterval = setInterval(() => {
try {
ws.send(JSON.stringify({ type: 'ping', ts: Date.now() }));
} catch {
// ws likely closed mid-tick; close handler clears the interval.
}
}, KEEPALIVE_INTERVAL_MS);
sessions.set(ws, session);
// Index by sessionId for /internal/restart + Commit 3 re-attach.
if (sessionId) sessionsById.set(sessionId, session);
},
message(ws, raw) {
let session = sessions.get(ws);
if (!session) {
// Fallback for any path where open() didn't fire (shouldn't happen
// in Bun.serve but keeps the spawn path safe). No keepalive on
// this branch — open() is the supported entry point.
session = {
proc: null,
cols: 80,
rows: 24,
cookie: (ws.data as any)?.cookie || '',
liveWs: ws,
sessionId: (ws.data as any)?.sessionId ?? null,
spawned: false,
pingInterval: null,
ringBuffer: [],
ringBufferBytes: 0,
altScreenActive: false,
detached: false,
detachTimer: null,
};
sessions.set(ws, session);
if (session.sessionId) sessionsById.set(session.sessionId, session);
}
// Text frames are control messages: {type: "resize", cols, rows},
// {type: "tabSwitch", tabId, url, title}, {type: "tabState", ...},
// or v1.44 keepalive frames: {type: "pong", ts}, {type: "keepalive"}.
// Binary frames are raw input bytes destined for the PTY stdin.
// Text frames are control messages: {type: "resize", cols, rows} or
// {type: "tabSwitch", tabId, url, title}. Binary frames are raw input
// bytes destined for the PTY stdin.
if (typeof raw === 'string') {
let msg: any;
try { msg = JSON.parse(raw); } catch { return; }
@ -765,32 +355,50 @@ function buildServer() {
handleTabState(msg);
return;
}
if (msg?.type === 'pong' || msg?.type === 'keepalive' || msg?.type === 'ping') {
// Keepalive frames — accepted and silently dropped. The mere
// fact that the WS carried this frame is the liveness signal;
// there's no application-level state to update at this layer.
// `ping` is acknowledged here too in case the client (or a
// future agent peer) mirrors our server-side ping shape.
return;
}
if (msg?.type === 'start') {
// v1.44 explicit spawn trigger. forceRestart sends this
// immediately on every fresh WS so claude boots without the
// user having to type a keystroke (pre-v1.44, the lazy-binary
// spawn made restart look stuck until the user typed). No-op
// if already spawned.
maybeSpawnPty(ws, session);
return;
}
// Unknown text frame — ignore.
return;
}
// Binary input. Lazy-spawn claude on the first byte if `start`
// wasn't sent first. Both paths land in the same maybeSpawnPty
// helper for behavior parity.
// Binary input. Lazy-spawn claude on the first byte.
if (!session.spawned) {
if (!maybeSpawnPty(ws, session)) return;
session.spawned = true;
// UTF-8 boundary detection to prevent splitting multi-byte characters (issue #1272).
// Buffer incomplete UTF-8 sequences until the next chunk completes them.
let leftover = Buffer.alloc(0);
const proc = spawnClaude(session.cols, session.rows, (chunk) => {
const combined = Buffer.concat([leftover, Buffer.from(chunk)]);
// Find the last index where a UTF-8 codepoint ends. Look back at most 3 bytes.
let safeEnd = combined.length;
for (let i = combined.length - 1; i >= Math.max(0, combined.length - 3); i--) {
const b = combined[i];
if ((b & 0x80) === 0) { safeEnd = i + 1; break; } // ASCII
if ((b & 0xC0) === 0x80) continue; // continuation byte
const expected = (b & 0xE0) === 0xC0 ? 2 : (b & 0xF0) === 0xE0 ? 3 : 4;
safeEnd = (combined.length - i >= expected) ? combined.length : i;
break;
}
const flush = combined.slice(0, safeEnd);
leftover = combined.slice(safeEnd);
if (flush.length) {
try { ws.sendBinary(flush); } catch {}
}
});
if (!proc) {
try {
ws.send(JSON.stringify({
type: 'error',
code: 'CLAUDE_NOT_FOUND',
message: 'claude CLI not on PATH. Install: https://docs.anthropic.com/en/docs/claude-code',
}));
ws.close(4404, 'claude not found');
} catch {}
return;
}
session.proc = proc;
// Watch for child exit so the WS closes cleanly when claude exits.
proc.exited?.then?.(() => {
try { ws.close(1000, 'pty exited'); } catch {}
});
}
try {
// raw is a Uint8Array; Bun.Terminal.write accepts string|Buffer.
@ -801,49 +409,16 @@ function buildServer() {
}
},
close(ws, code, _reason) {
close(ws) {
const session = sessions.get(ws);
if (!session) return;
// Always drop the WS-keyed map entry and the per-attach
// attachToken — the attach grant was single-use.
sessions.delete(ws);
if (session.cookie) validTokens.delete(session.cookie);
// Keepalive lives with the WS — every attach starts a fresh one.
if (session.pingInterval) {
clearInterval(session.pingInterval);
session.pingInterval = null;
}
// Commit 3 detach state machine. If the close was intentional
// (code 4001 = restart, 4404 = no-claude error), dispose
// immediately — there's no value in keeping the PTY alive.
// Otherwise enter the detach window: claude keeps running, the
// ring buffer keeps accumulating, and a re-attach with the same
// sessionId within DETACH_WINDOW_MS picks back up. If the timer
// fires without a re-attach, the session is disposed normally.
//
// Sessions without a sessionId (legacy single-shot grants) can't
// re-attach by definition — fall through to immediate dispose.
const intentional = code === 4001 || code === 4404 || code === 1000;
if (intentional || !session.sessionId) {
if (session) {
disposeSession(session);
if (session.sessionId) sessionsById.delete(session.sessionId);
return;
if (session.cookie) {
// Drop the cookie so it can't be replayed against a new PTY.
validTokens.delete(session.cookie);
}
sessions.delete(ws);
}
// Mark detached and start the disposal timer. The session stays
// in sessionsById so the next /ws upgrade with the same
// sessionId can find and reattach to it.
session.detached = true;
session.liveWs = null;
session.detachTimer = setTimeout(() => {
if (!session.detached) return; // re-attached in the meantime
disposeSession(session);
if (session.sessionId) sessionsById.delete(session.sessionId);
}, DETACH_WINDOW_MS);
// setTimeout returns a Bun Timer; unref so the detach window
// doesn't keep the process alive past natural shutdown.
(session.detachTimer as any)?.unref?.();
},
},
});
@ -973,25 +548,14 @@ function main() {
writeSecureFile(tmp, String(port));
fs.renameSync(tmp, PORT_FILE);
// Write identity-based agent record (pid + per-boot gen). Replaces the
// v1.43- `pkill -f terminal-agent\.ts` regex teardown that could kill
// sibling gstack sessions. Callers (cli.ts spawn site, server.ts
// shutdown, the v1.44 watchdog) now route through killAgentByRecord in
// terminal-agent-control.ts.
writeAgentRecord(dir, { pid: process.pid, gen: CURRENT_GEN, startedAt: Date.now() });
// Hand the parent the internal token so it can call /internal/grant.
// Parent learns INTERNAL_TOKEN via env (TERMINAL_AGENT_INTERNAL_TOKEN below).
// We just print it on stdout for the supervising process to pick up if it's
// not already in env. Defense against env races at spawn time.
console.log(`[terminal-agent] listening on 127.0.0.1:${port} pid=${process.pid} gen=${CURRENT_GEN}`);
console.log(`[terminal-agent] listening on 127.0.0.1:${port} pid=${process.pid}`);
// Cleanup port file + agent record on exit.
const cleanup = () => {
safeUnlink(PORT_FILE);
clearAgentRecord(dir);
process.exit(0);
};
// Cleanup port file on exit.
const cleanup = () => { safeUnlink(PORT_FILE); process.exit(0); };
process.on('SIGTERM', cleanup);
process.on('SIGINT', cleanup);
}

View File

@ -11,14 +11,12 @@ import { findInstalledBrowsers, importCookies, importCookiesViaCdp, hasV20Cookie
import { generatePickerCode } from './cookie-picker-routes';
import { validateNavigationUrl } from './url-validation';
import { validateOutputPath, validateReadPath } from './path-security';
import { guardScreenshotPath } from './screenshot-size-guard';
import * as fs from 'fs';
import * as path from 'path';
import type { SetContentWaitUntil } from './tab-session';
import { TEMP_DIR, isPathWithin } from './platform';
import { SAFE_DIRECTORIES } from './path-security';
import { modifyStyle, undoModification, resetModifications, getModificationHistory } from './cdp-inspector';
import { withCdpSession } from './cdp-bridge';
/**
* Aggressive page cleanup selectors and heuristics.
@ -1125,10 +1123,6 @@ export async function handleWriteCommand(
// Take screenshot
await page.screenshot({ path: outputPath, fullPage: !scrollTo });
// Guard against Anthropic vision API >2000px brick (#1214). Only
// applies to fullPage captures; scrollTo viewport-bound shots are
// already capped by the viewport size.
if (!scrollTo) await guardScreenshotPath(outputPath);
// Restore viewport
if (viewportWidth && originalViewport) {
@ -1410,10 +1404,9 @@ export async function handleWriteCommand(
validateOutputPath(outputPath);
try {
const data = await withCdpSession(page, async (cdp) => {
const result = await cdp.send('Page.captureSnapshot', { format: 'mhtml' });
return (result as { data: string }).data;
});
const cdp = await page.context().newCDPSession(page);
const { data } = await cdp.send('Page.captureSnapshot', { format: 'mhtml' });
await cdp.detach();
fs.writeFileSync(outputPath, data);
return `Archive saved: ${outputPath} (${Math.round(data.length / 1024)}KB, MHTML)`;
} catch (err: any) {

View File

@ -1,5 +1,4 @@
import { EventEmitter } from 'node:events';
import { afterEach, beforeEach, describe, it, expect } from 'bun:test';
import { describe, it, expect } from 'bun:test';
// ─── BrowserManager basic unit tests ─────────────────────────────
@ -16,214 +15,3 @@ describe('BrowserManager defaults', () => {
expect(bm.getRefMap()).toEqual([]);
});
});
// ─── shouldEnableChromiumSandbox ─────────────────────────────────
//
// Pinning this is what prevents the "--no-sandbox" yellow infobar from
// regressing on headed launches. Playwright auto-adds --no-sandbox when
// chromiumSandbox !== true (playwright-core chromium.js:291-292), so all
// three launch sites in browser-manager.ts must pass the policy this
// helper computes.
describe('shouldEnableChromiumSandbox', () => {
const origPlatform = process.platform;
const origCI = process.env.CI;
const origContainer = process.env.CONTAINER;
const origNoSandbox = process.env.GSTACK_CHROMIUM_NO_SANDBOX;
const origGetuid = process.getuid;
beforeEach(() => {
delete process.env.CI;
delete process.env.CONTAINER;
delete process.env.GSTACK_CHROMIUM_NO_SANDBOX;
});
afterEach(() => {
Object.defineProperty(process, 'platform', { value: origPlatform });
if (origCI === undefined) delete process.env.CI; else process.env.CI = origCI;
if (origContainer === undefined) delete process.env.CONTAINER; else process.env.CONTAINER = origContainer;
if (origNoSandbox === undefined) delete process.env.GSTACK_CHROMIUM_NO_SANDBOX; else process.env.GSTACK_CHROMIUM_NO_SANDBOX = origNoSandbox;
process.getuid = origGetuid;
});
function setPlatform(p: NodeJS.Platform) {
Object.defineProperty(process, 'platform', { value: p });
}
it('darwin, no CI/CONTAINER/root → true', async () => {
setPlatform('darwin');
process.getuid = (() => 501) as typeof process.getuid;
const { shouldEnableChromiumSandbox } = await import('../src/browser-manager');
expect(shouldEnableChromiumSandbox()).toBe(true);
});
it('linux, no CI/CONTAINER/root → true', async () => {
setPlatform('linux');
process.getuid = (() => 1000) as typeof process.getuid;
const { shouldEnableChromiumSandbox } = await import('../src/browser-manager');
expect(shouldEnableChromiumSandbox()).toBe(true);
});
it('win32 → false (sandbox fails in Bun→Node→Chromium chain)', async () => {
setPlatform('win32');
process.getuid = (() => 1000) as typeof process.getuid;
const { shouldEnableChromiumSandbox } = await import('../src/browser-manager');
expect(shouldEnableChromiumSandbox()).toBe(false);
});
it('linux + CI=1 → false', async () => {
setPlatform('linux');
process.env.CI = '1';
process.getuid = (() => 1000) as typeof process.getuid;
const { shouldEnableChromiumSandbox } = await import('../src/browser-manager');
expect(shouldEnableChromiumSandbox()).toBe(false);
});
it('linux + CONTAINER=1 → false', async () => {
setPlatform('linux');
process.env.CONTAINER = '1';
process.getuid = (() => 1000) as typeof process.getuid;
const { shouldEnableChromiumSandbox } = await import('../src/browser-manager');
expect(shouldEnableChromiumSandbox()).toBe(false);
});
it('linux + root (uid 0) → false', async () => {
setPlatform('linux');
process.getuid = (() => 0) as typeof process.getuid;
const { shouldEnableChromiumSandbox } = await import('../src/browser-manager');
expect(shouldEnableChromiumSandbox()).toBe(false);
});
// #1562 — Ubuntu/AppArmor opt-in override
it('linux + GSTACK_CHROMIUM_NO_SANDBOX=1 → false (Ubuntu/AppArmor opt-out)', async () => {
setPlatform('linux');
process.env.GSTACK_CHROMIUM_NO_SANDBOX = '1';
process.getuid = (() => 1000) as typeof process.getuid;
const { shouldEnableChromiumSandbox } = await import('../src/browser-manager');
expect(shouldEnableChromiumSandbox()).toBe(false);
});
it('darwin + GSTACK_CHROMIUM_NO_SANDBOX=1 → false (env override wins on any platform)', async () => {
setPlatform('darwin');
process.env.GSTACK_CHROMIUM_NO_SANDBOX = '1';
process.getuid = (() => 501) as typeof process.getuid;
const { shouldEnableChromiumSandbox } = await import('../src/browser-manager');
expect(shouldEnableChromiumSandbox()).toBe(false);
});
it('GSTACK_CHROMIUM_NO_SANDBOX=0 → does NOT trigger override (must be exactly "1")', async () => {
setPlatform('linux');
process.env.GSTACK_CHROMIUM_NO_SANDBOX = '0';
process.getuid = (() => 1000) as typeof process.getuid;
const { shouldEnableChromiumSandbox } = await import('../src/browser-manager');
expect(shouldEnableChromiumSandbox()).toBe(true);
});
});
// ─── resolveDisconnectCause ──────────────────────────────────────
//
// Pinning the clean-vs-crash distinction matters because gbd's
// HealthMonitor consumes our exit code (0 = don't restart, !=0 =
// restart). A regression here brings back the "Cmd+Q makes the browser
// keep coming back" UX bug.
function makeFakeBrowser(opts: {
exitCode: number | null;
signalCode: NodeJS.Signals | null;
/** ms before emitting 'exit'; default = already exited at construction */
exitDelay?: number;
}): { process(): { exitCode: number | null; signalCode: NodeJS.Signals | null; once: EventEmitter['once'] } } {
const ee = new EventEmitter();
const state = {
exitCode: opts.exitDelay != null ? null : opts.exitCode,
signalCode: opts.exitDelay != null ? null : opts.signalCode,
once: ee.once.bind(ee),
};
if (opts.exitDelay != null) {
setTimeout(() => {
state.exitCode = opts.exitCode;
state.signalCode = opts.signalCode;
ee.emit('exit', opts.exitCode, opts.signalCode);
}, opts.exitDelay);
}
return { process: () => state };
}
describe('resolveDisconnectCause', () => {
it('clean: process already exited with code 0', async () => {
const { resolveDisconnectCause } = await import('../src/browser-manager');
const fake = makeFakeBrowser({ exitCode: 0, signalCode: null });
expect(await resolveDisconnectCause(fake as never)).toBe('clean');
});
it('crash: non-zero exit code', async () => {
const { resolveDisconnectCause } = await import('../src/browser-manager');
const fake = makeFakeBrowser({ exitCode: 1, signalCode: null });
expect(await resolveDisconnectCause(fake as never)).toBe('crash');
});
it('crash: SIGSEGV', async () => {
const { resolveDisconnectCause } = await import('../src/browser-manager');
const fake = makeFakeBrowser({ exitCode: null, signalCode: 'SIGSEGV' });
expect(await resolveDisconnectCause(fake as never)).toBe('crash');
});
it('crash: SIGKILL', async () => {
const { resolveDisconnectCause } = await import('../src/browser-manager');
const fake = makeFakeBrowser({ exitCode: null, signalCode: 'SIGKILL' });
expect(await resolveDisconnectCause(fake as never)).toBe('crash');
});
it('clean: process exits asynchronously with code 0 within timeout', async () => {
const { resolveDisconnectCause } = await import('../src/browser-manager');
const fake = makeFakeBrowser({ exitCode: 0, signalCode: null, exitDelay: 50 });
expect(await resolveDisconnectCause(fake as never)).toBe('clean');
});
it('crash: process exits asynchronously with non-zero code', async () => {
const { resolveDisconnectCause } = await import('../src/browser-manager');
const fake = makeFakeBrowser({ exitCode: 137, signalCode: null, exitDelay: 50 });
expect(await resolveDisconnectCause(fake as never)).toBe('crash');
});
it('crash: null browser returns crash (defensive default)', async () => {
const { resolveDisconnectCause } = await import('../src/browser-manager');
expect(await resolveDisconnectCause(null)).toBe('crash');
});
});
// ─── onDisconnect exit-code propagation (regression test) ──────────
//
// The contract: BrowserManager.onDisconnect is called with the resolved
// exit code (0 for clean Cmd+Q, 2 for crash). server.ts then forwards
// that code to activeShutdown(), which exits the process.
//
// Without this propagation, the headed-mode user-visible Cmd+Q respawn
// bug returns: server.ts hardcoded `activeShutdown?.(2)` ignores the
// resolved 0 and gbrowser's gbd HealthMonitor treats the clean quit as
// a crash, restarting the window.
describe('BrowserManager.onDisconnect exit-code propagation', () => {
it('signature accepts an optional exitCode argument', async () => {
const { BrowserManager } = await import('../src/browser-manager');
const bm = new BrowserManager();
const calls: Array<number | undefined> = [];
bm.onDisconnect = (code?: number) => { calls.push(code); };
bm.onDisconnect(0);
bm.onDisconnect(2);
bm.onDisconnect(undefined);
expect(calls).toEqual([0, 2, undefined]);
});
it('server.ts callback forwards exitCode when provided, falls back to 2', async () => {
// Mirror the production wiring in browse/src/server.ts so a refactor
// that drops the forward (e.g. reverting to `() => activeShutdown?.(2)`)
// fails CI before the user-visible bug returns.
const shutdownCalls: number[] = [];
const activeShutdown = (code: number) => { shutdownCalls.push(code); };
const onDisconnect = (code?: number) => activeShutdown(code ?? 2);
onDisconnect(0);
onDisconnect(2);
onDisconnect(undefined);
expect(shutdownCalls).toEqual([0, 2, 2]);
});
});

View File

@ -178,17 +178,7 @@ describe('buildSpawnEnv', () => {
process.env.LANG = 'en_US.UTF-8';
});
afterEach(() => {
// process.env = origEnv replaces only the reference; the underlying
// env stays mutated and leaks to later test files in the same Bun
// process (e.g., breaks Bun.which('bash') in security.test.ts and
// bun-spawn in pair-agent-tunnel-eval.test.ts). Delete every current
// key then re-assign from the snapshot — restores the actual env.
for (const k of Object.keys(process.env)) {
if (!(k in origEnv)) delete process.env[k];
}
for (const [k, v] of Object.entries(origEnv)) {
if (v !== undefined) process.env[k] = v;
}
process.env = origEnv;
});
it('untrusted: drops $HOME and secrets', () => {
@ -303,15 +293,7 @@ describe.skipIf(SKIP_SPAWN)('spawnSkill: lifecycle', () => {
expect(parsed.gh).toBeNull();
expect(parsed.gstack).toBeNull();
} finally {
// See afterEach comment in `buildSpawnEnv` describe — direct
// reassignment of process.env doesn't actually restore the
// underlying env in Bun. Delete + re-assign instead.
for (const k of Object.keys(process.env)) {
if (!(k in origEnv)) delete process.env[k];
}
for (const [k, v] of Object.entries(origEnv)) {
if (v !== undefined) process.env[k] = v;
}
process.env = origEnv;
}
});
@ -330,12 +312,7 @@ describe.skipIf(SKIP_SPAWN)('spawnSkill: lifecycle', () => {
const parsed = JSON.parse(result.stdout);
expect(parsed.home).toBe('/Users/test-user');
} finally {
for (const k of Object.keys(process.env)) {
if (!(k in origEnv)) delete process.env[k];
}
for (const [k, v] of Object.entries(origEnv)) {
if (v !== undefined) process.env[k] = v;
}
process.env = origEnv;
}
});

View File

@ -1,95 +0,0 @@
import { describe, test, expect, beforeEach } from 'bun:test';
import type { Page } from 'playwright';
import {
__testInternals,
undoModification,
} from '../src/cdp-inspector';
// Regression tests for the modificationHistory cap (D6 / smoking gun #2).
// Pre-cap, the module-scoped array grew unbounded across the session. Cap is
// 200 entries, oldest evicted on push past the cap. undoModification reports
// "evicted at the cap" in the error message so a user who asks for a
// no-longer-available index understands what happened (instead of seeing the
// pre-cap "No modification at index 500" with no context).
const { pushModification, MOD_HISTORY_CAP, getRawHistory, getTotalPushed, resetForTest } = __testInternals;
function fakeMod(id: number) {
return {
selector: `#node-${id}`,
property: 'color',
oldValue: 'red',
newValue: 'blue',
source: 'inline' as const,
timestamp: id,
method: 'setProperty' as 'setProperty',
};
}
beforeEach(() => {
resetForTest();
});
describe('modificationHistory cap', () => {
test('1. push under cap keeps every entry', () => {
for (let i = 0; i < 50; i++) pushModification(fakeMod(i));
expect(getRawHistory().length).toBe(50);
expect(getTotalPushed()).toBe(50);
expect(getRawHistory()[0].timestamp).toBe(0);
expect(getRawHistory()[49].timestamp).toBe(49);
});
test('2. push exactly cap keeps every entry', () => {
for (let i = 0; i < MOD_HISTORY_CAP; i++) pushModification(fakeMod(i));
expect(getRawHistory().length).toBe(MOD_HISTORY_CAP);
expect(getTotalPushed()).toBe(MOD_HISTORY_CAP);
expect(getRawHistory()[0].timestamp).toBe(0);
});
test('3. push past cap evicts oldest, keeps length at cap', () => {
const total = MOD_HISTORY_CAP + 50;
for (let i = 0; i < total; i++) pushModification(fakeMod(i));
expect(getRawHistory().length).toBe(MOD_HISTORY_CAP);
expect(getTotalPushed()).toBe(total);
// Oldest 50 dropped — entry that was #0 is gone; new oldest is #50.
expect(getRawHistory()[0].timestamp).toBe(50);
expect(getRawHistory()[MOD_HISTORY_CAP - 1].timestamp).toBe(total - 1);
});
test('4. resetForTest clears both buffer and totalPushed', () => {
for (let i = 0; i < 10; i++) pushModification(fakeMod(i));
resetForTest();
expect(getRawHistory().length).toBe(0);
expect(getTotalPushed()).toBe(0);
});
});
describe('undoModification eviction-aware error', () => {
// Stub Page: undoModification throws before any await when idx is out of
// range, so the stub never actually gets called.
const stubPage = {} as unknown as Page;
test('5. out-of-range BEFORE any eviction → no evicted note', async () => {
for (let i = 0; i < 5; i++) pushModification(fakeMod(i));
await expect(undoModification(stubPage, 99)).rejects.toThrow(
'No modification at index 99. History has 5 entries.',
);
});
test('6. out-of-range AFTER eviction → message names the evicted count', async () => {
const total = MOD_HISTORY_CAP + 73;
for (let i = 0; i < total; i++) pushModification(fakeMod(i));
// 273 pushed, 200 in buffer, 73 evicted. Ask for idx=400 (above buffer).
await expect(undoModification(stubPage, 400)).rejects.toThrow(
`No modification at index 400. History has ${MOD_HISTORY_CAP} entries ` +
`(most recent ${MOD_HISTORY_CAP} only — 73 earlier entries evicted at the cap).`,
);
});
test('7. negative explicit index throws cleanly (no NaN propagation)', async () => {
for (let i = 0; i < 10; i++) pushModification(fakeMod(i));
await expect(undoModification(stubPage, -1)).rejects.toThrow(
'No modification at index -1.',
);
});
});

View File

@ -1,171 +0,0 @@
import { describe, test, expect } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
import type { Page } from 'playwright';
import { withCdpSession, getOrCreateCdpSession } from '../src/cdp-bridge';
// Static-grep tripwire + behavior tests for the CDP session lifecycle
// helpers introduced as part of the D11 EXPAND_SCOPE memory-leak fix.
//
// Direct calls to `page.context().newCDPSession(page)` are the leak class
// the helpers exist to close — every direct call needs a matching
// `session.detach()` and forgetting it leaves the Chromium-side target
// attached until the underlying transport drops. The tripwire fails CI
// if any source file calls `newCDPSession(` outside `cdp-bridge.ts`
// (the file that owns the helpers).
//
// Pattern mirrors browse/test/terminal-agent-pid-identity.test.ts and
// browse/test/server-sanitize-surrogates.test.ts: read source files
// directly, assert an invariant on their contents.
const SRC_DIR = path.resolve(new URL(import.meta.url).pathname, '..', '..', 'src');
function readAllSourceFiles(): Array<{ file: string; content: string }> {
const out: Array<{ file: string; content: string }> = [];
for (const entry of fs.readdirSync(SRC_DIR)) {
if (!entry.endsWith('.ts')) continue;
const full = path.join(SRC_DIR, entry);
out.push({ file: entry, content: fs.readFileSync(full, 'utf-8') });
}
return out;
}
describe('CDP session cleanup invariant', () => {
test('1. no source file calls `newCDPSession(` outside cdp-bridge.ts', () => {
const offenders: Array<{ file: string; line: number; text: string }> = [];
for (const { file, content } of readAllSourceFiles()) {
// The helper file is the ONE allowed home for direct newCDPSession calls.
if (file === 'cdp-bridge.ts') continue;
const lines = content.split('\n');
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (!/newCDPSession\s*\(/.test(line)) continue;
// Skip comment lines — documentation mentions are fine.
const trimmed = line.trim();
if (trimmed.startsWith('//') || trimmed.startsWith('*')) continue;
offenders.push({ file, line: i + 1, text: trimmed });
}
}
if (offenders.length > 0) {
const formatted = offenders
.map((o) => ` ${o.file}:${o.line} ${o.text}`)
.join('\n');
throw new Error(
`Direct newCDPSession(...) calls found outside cdp-bridge.ts. ` +
`Route through withCdpSession() (one-shot, finally-detach) or ` +
`getOrCreateCdpSession() (cached, close-detach) instead:\n${formatted}`,
);
}
expect(offenders).toEqual([]);
});
test('2. helper file exports the two documented entry points', () => {
// Sanity: the tripwire is meaningless if the helpers themselves are gone.
expect(typeof withCdpSession).toBe('function');
expect(typeof getOrCreateCdpSession).toBe('function');
});
});
describe('withCdpSession finally-detach', () => {
// Fake Page surface for unit-testing the helper without spinning up a real
// browser. The helper only touches page.context().newCDPSession(page) and
// the returned session's .detach(), so this surface is enough.
function makeFakePage(detachSpy: { called: number; rejected?: Error }) {
const session = {
detach: async () => {
detachSpy.called++;
if (detachSpy.rejected) throw detachSpy.rejected;
},
};
return {
context: () => ({
newCDPSession: async (_p: unknown) => session,
}),
} as unknown as Page;
}
test('3. detaches on the success path', async () => {
const detachSpy = { called: 0 };
const page = makeFakePage(detachSpy);
const result = await withCdpSession(page, async (session) => {
expect(session).toBeDefined();
return 42;
});
expect(result).toBe(42);
expect(detachSpy.called).toBe(1);
});
test('4. detaches even when fn throws (the actual leak fix)', async () => {
const detachSpy = { called: 0 };
const page = makeFakePage(detachSpy);
await expect(
withCdpSession(page, async () => {
throw new Error('boom');
}),
).rejects.toThrow('boom');
expect(detachSpy.called).toBe(1);
});
test('5. swallows detach errors so they do not mask fn errors', async () => {
const detachSpy = { called: 0, rejected: new Error('already detached') };
const page = makeFakePage(detachSpy);
await expect(
withCdpSession(page, async () => {
throw new Error('original');
}),
).rejects.toThrow('original');
expect(detachSpy.called).toBe(1);
});
test('6. swallows detach errors on the success path too', async () => {
const detachSpy = { called: 0, rejected: new Error('target closed') };
const page = makeFakePage(detachSpy);
const result = await withCdpSession(page, async () => 'ok');
expect(result).toBe('ok');
expect(detachSpy.called).toBe(1);
});
});
describe('getOrCreateCdpSession close-detach', () => {
function makeFakePage() {
const closeListeners: Array<() => void> = [];
const session = {
detach: async () => {
session._detachCount++;
},
_detachCount: 0,
};
const page = {
context: () => ({
newCDPSession: async (_p: unknown) => session,
}),
once: (event: string, fn: () => void) => {
if (event === 'close') closeListeners.push(fn);
},
_fireClose: () => {
for (const fn of closeListeners) fn();
},
};
return { page: page as unknown as Page, session, fireClose: page._fireClose };
}
test('7. caches the session across calls', async () => {
const { page } = makeFakePage();
const cache = new WeakMap<Page, any>();
const s1 = await getOrCreateCdpSession(page, cache);
const s2 = await getOrCreateCdpSession(page, cache);
expect(s1).toBe(s2);
});
test('8. close hook detaches the session AND clears the cache', async () => {
const { page, session, fireClose } = makeFakePage();
const cache = new WeakMap<Page, any>();
await getOrCreateCdpSession(page, cache);
expect(cache.get(page)).toBeDefined();
fireClose();
// Detach runs synchronously up to the await in the close hook; let it settle.
await new Promise((r) => setTimeout(r, 0));
expect(cache.get(page)).toBeUndefined();
expect(session._detachCount).toBe(1);
});
});

View File

@ -1,75 +0,0 @@
/**
* Coverage for #1612 macOS/Linux server must survive sandboxed-shell
* harnesses by becoming its own session leader (setsid).
*
* Pre-#1612, Bun.spawn().unref() removed the child from Bun's event loop
* but did NOT call setsid(). When the CLI ran inside Claude Code's
* per-command sandbox, Conductor, or CI step runners, the session leader's
* exit sent SIGHUP to every PID in the session, killing the bun server.
*
* The fix routes macOS/Linux spawn through Node's child_process.spawn with
* detached:true, which calls setsid() so the server becomes its own session
* leader (PPID=1 on Linux, similar reparenting on Darwin).
*
* The actual setsid syscall is hard to assert in a unit test without a
* real spawn testing here is static: the cli.ts source must use the
* Node spawn path on macOS/Linux, with detached:true and .unref(). If a
* future refactor reverts to Bun.spawn().unref() on the macOS/Linux branch
* the regression returns and these tests fail.
*/
import { describe, expect, test } from "bun:test";
import * as fs from "node:fs";
import * as path from "node:path";
const ROOT = path.resolve(import.meta.dir, "..", "..");
const CLI = path.join(ROOT, "browse", "src", "cli.ts");
function read(): string {
return fs.readFileSync(CLI, "utf-8");
}
describe("#1612 macOS/Linux daemonize via Node setsid path", () => {
test("cli.ts imports nodeSpawn from child_process (Node spawn alias)", () => {
const body = read();
// The fix relies on Node's child_process.spawn (which calls setsid on
// detached:true), aliased to avoid name collision with Bun.spawn. Match
// either `nodeSpawn` or `spawn as nodeSpawn` to be flexible to the
// exact import style.
expect(body).toMatch(/(spawn as nodeSpawn|nodeSpawn\s*[,}])/);
expect(body).toMatch(/from\s+['"]child_process['"]/);
});
test("non-Windows branch uses nodeSpawn(...).unref() with detached:true", () => {
const body = read();
// Find the non-Windows branch and assert it uses the Node spawn alias
// with detached:true. Match the pattern `nodeSpawn(...) ... detached:true`.
expect(body).toMatch(/nodeSpawn\([\s\S]{0,500}detached:\s*true/);
expect(body).toMatch(/nodeSpawn\([\s\S]{0,500}\.unref\(\)/);
});
test("non-Windows branch comment documents setsid/SIGHUP root cause", () => {
const body = read();
// The comment block must mention setsid() so a future refactor sees the
// why before changing the spawn call.
expect(body).toMatch(/setsid/);
expect(body).toMatch(/SIGHUP/);
});
test("the spawn call on macOS/Linux is nodeSpawn, not Bun.spawn", () => {
const body = read();
// Strip line comments before regex matching, so the "Bun.spawn().unref()"
// mentions inside the explanatory comment don't trigger false positives.
const codeOnly = body
.split("\n")
.filter((line) => !line.trim().startsWith("//"))
.join("\n");
// Find the non-Windows branch. The `} else {` block following the
// Windows branch. We then require its first ~400 chars contain a
// nodeSpawn() call and NOT a Bun.spawn() call (excluding the comment).
const nonWindowsStart = codeOnly.indexOf("nodeSpawn('bun'");
expect(nonWindowsStart).toBeGreaterThan(-1);
const slice = codeOnly.slice(nonWindowsStart, nonWindowsStart + 400);
expect(slice).toMatch(/nodeSpawn\(/);
expect(slice).not.toMatch(/Bun\.spawn\(/);
});
});

View File

@ -1,81 +0,0 @@
import { describe, test, expect } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
// v1.44 outer supervisor — static-grep invariants.
//
// Pre-v1.44 `$B connect` was fire-and-forget: spawn server detached, CLI
// exits, server runs unsupervised. If the server crashed, the user had to
// re-run `$B connect`. The opt-in supervisor (--supervise or
// BROWSE_SUPERVISE=1) keeps the CLI attached and respawns the server on
// unexpected exit, with the same crash-loop guard shape as the v1.44
// terminal-agent watchdog.
//
// Live respawn tests belong in the e2e tier (real Bun.spawn cycles take
// 3-8s each). These tripwires defend the load-bearing invariants:
// opt-in by default, signal handlers wired, crash-loop guard, env knobs.
const CLI_TS = path.resolve(new URL(import.meta.url).pathname, '..', '..', 'src', 'cli.ts');
describe('CLI outer supervisor (v1.44+)', () => {
test('1. supervisor is opt-in via --supervise flag or BROWSE_SUPERVISE env', () => {
const src = fs.readFileSync(CLI_TS, 'utf-8');
expect(src).toContain("commandArgs.includes('--supervise')");
expect(src).toContain("process.env.BROWSE_SUPERVISE === '1'");
// Default path MUST still exit 0 promptly. The legacy contract is
// that every caller of `$B connect` (Claude Code Bash tool, scripts,
// CI) gets a prompt return.
expect(src).toMatch(/if \(!superviseRequested\) \{\s*process\.exit\(0\);\s*\}/);
});
test('2. SIGINT and SIGTERM trigger clean teardown', () => {
const src = fs.readFileSync(CLI_TS, 'utf-8');
// Both signals must hit the teardown path or the user's Ctrl-C leaves
// an orphaned server (worse than no supervisor).
expect(src).toMatch(/process\.on\('SIGINT'.*teardownAndExit/);
expect(src).toMatch(/process\.on\('SIGTERM'.*teardownAndExit/);
// Teardown must signal the supervised server before exiting itself.
expect(src).toContain("safeKill(state.pid, 'SIGTERM')");
});
test('3. crash-loop guard with 5-in-5min rolling window', () => {
const src = fs.readFileSync(CLI_TS, 'utf-8');
expect(src).toContain('SUPERVISOR_GUARD_WINDOW_MS = 5 * 60_000');
expect(src).toContain('SUPERVISOR_GUARD_MAX = 5');
// Window pruning: a long-lived daemon with sporadic crashes must NOT
// hit the guard (otherwise we punish the user for the supervisor doing
// its job).
expect(src).toMatch(/respawns\.shift\(\)/);
});
test('4. exponential backoff schedule, env-overridable', () => {
const src = fs.readFileSync(CLI_TS, 'utf-8');
expect(src).toContain('GSTACK_SUPERVISOR_BACKOFF');
// Default schedule must include short waits at first (rapid recovery
// from transient crashes) and cap at a sensible long wait.
expect(src).toContain('1000,2000,4000,8000,30000');
});
test('5. tick interval is env-overridable for tests', () => {
const src = fs.readFileSync(CLI_TS, 'utf-8');
expect(src).toContain('GSTACK_SUPERVISOR_TICK_MS');
});
test('6. respawned server gets a fresh terminal-agent too', () => {
const src = fs.readFileSync(CLI_TS, 'utf-8');
// After server respawn, the terminal-agent state is stale (old PID
// record points to a dead agent that exited with its parent). The
// supervisor must re-call spawnTerminalAgent or the PTY path stays
// broken even though the server is back up.
const block = sliceBetween(src, 'Supervisor mode:', '// ─── Headed Disconnect');
expect(block).toContain('spawnTerminalAgent({');
});
});
function sliceBetween(source: string, start: string, end: string): string {
const i = source.indexOf(start);
if (i === -1) throw new Error(`marker not found: ${start}`);
const j = source.indexOf(end, i + start.length);
if (j === -1) throw new Error(`end marker not found: ${end}`);
return source.slice(i, j);
}

View File

@ -47,15 +47,4 @@ describe('locateBinary', () => {
expect(typeof locateBinary).toBe('function');
expect(locateBinary.length).toBe(0);
});
test('source-checkout fallback resolves <git-root>/browse/dist/browse[.exe]', () => {
// The windows-setup-e2e.yml workflow builds binaries directly under
// browse/dist/ (no .claude/skills/gstack/ install layout). find-browse
// must resolve those — otherwise every fresh build that hasn't run
// ./setup yet looks broken. Static pin so a future refactor that
// drops the source-checkout branch trips this test.
const src = require('fs').readFileSync(require('path').join(__dirname, '../src/find-browse.ts'), 'utf-8');
expect(src).toContain('Source-checkout fallback');
expect(src).toContain("join(root, 'browse', 'dist', 'browse')");
});
});

View File

@ -1,7 +1,6 @@
import { describe, test, expect } from 'bun:test';
import * as net from 'net';
import * as path from 'path';
import { __testInternals__ } from '../src/server';
const polyfillPath = path.resolve(import.meta.dir, '../src/bun-polyfill.cjs');
@ -29,47 +28,6 @@ function getFreePort(): Promise<number> {
}
describe('findPort / isPortAvailable', () => {
test('explicit BROWSE_PORT diagnostic distinguishes bind denial from occupied port', () => {
const blocked = __testInternals__.formatExplicitPortUnavailableError(34567, {
available: false,
code: 'EPERM',
message: 'operation not permitted',
}).message;
expect(blocked).toContain('Cannot bind BROWSE_PORT=34567');
expect(blocked).toContain('localhost port binding is blocked');
expect(blocked).toContain('not that the port is occupied');
const occupied = __testInternals__.formatExplicitPortUnavailableError(34567, {
available: false,
code: 'EADDRINUSE',
message: 'address already in use',
}).message;
expect(occupied).toBe('[browse] Port 34567 (from BROWSE_PORT env) is in use');
});
test('random port diagnostic calls out sandbox-style bind denial', () => {
const message = __testInternals__.formatRandomPortUnavailableError([
{ port: 11001, result: { available: false, code: 'EADDRINUSE', message: 'address already in use' } },
{ port: 12002, result: { available: false, code: 'EPERM', message: 'operation not permitted' } },
]).message;
expect(message).toContain('Cannot bind localhost ports after 2 attempts');
expect(message).toContain('Last error: 12002 (EPERM: operation not permitted)');
expect(message).toContain('not that every sampled port is occupied');
expect(message).toContain('set BROWSE_PORT to an approved port');
});
test('random port diagnostic preserves old busy-port meaning when all attempts are occupied', () => {
const message = __testInternals__.formatRandomPortUnavailableError([
{ port: 11001, result: { available: false, code: 'EADDRINUSE', message: 'address already in use' } },
{ port: 12002, result: { available: false, code: 'EADDRINUSE', message: 'address already in use' } },
]).message;
expect(message).toContain('No available port after 5 attempts');
expect(message).toContain('every sampled port was already in use');
});
test('isPortAvailable returns true for a free port', async () => {
// Use the same isPortAvailable logic from server.ts

View File

@ -1,247 +0,0 @@
import { describe, test, expect } from 'bun:test';
import { formatBytes, type MemorySnapshot, type MemoryStructureStats } from '../src/memory-snapshot';
// Unit coverage for the $B memory diagnostic surface — formatter, byte
// renderer, and the structures-stats aggregator. The integration path
// ($B memory through the BrowserManager → CDP) requires a real headless
// Chromium and is covered indirectly by browse-basic in the eval suite.
// These tests pin the renderer logic in isolation so format regressions
// (rounded GB drift, missing "and N more" tail, snapshot.notes ordering)
// surface immediately.
// ─── formatBytes() ─────────────────────────────────────────────
describe('formatBytes', () => {
test('1. < 1 KB renders as bytes', () => {
expect(formatBytes(0)).toBe('0 B');
expect(formatBytes(1)).toBe('1 B');
expect(formatBytes(1023)).toBe('1023 B');
});
test('2. KB tier (1024 ... 1024^2-1)', () => {
expect(formatBytes(1024)).toBe('1.0 KB');
expect(formatBytes(1536)).toBe('1.5 KB');
expect(formatBytes(1024 * 1024 - 1)).toMatch(/^1024\.0 KB$|^1023\.\d KB$/);
});
test('3. MB tier', () => {
expect(formatBytes(1024 * 1024)).toBe('1.0 MB');
expect(formatBytes(312 * 1024 * 1024)).toBe('312.0 MB');
});
test('4. GB tier renders with 2 decimals', () => {
expect(formatBytes(1024 * 1024 * 1024)).toBe('1.00 GB');
expect(formatBytes(1.4 * 1024 * 1024 * 1024)).toMatch(/^1\.40 GB$/);
// 160.61 GB — the friend's OOM number from the original screenshot.
// Verify the renderer doesn't blow up at the actual leak scale.
const big = 160.61 * 1024 * 1024 * 1024;
expect(formatBytes(big)).toMatch(/^160\.6\d GB$/);
});
test('5. negative input behavior — coerces to bytes path (best-effort, do not throw)', () => {
// Diagnostic should never crash on a weird CDP reading; render
// something reasonable.
expect(() => formatBytes(-1)).not.toThrow();
});
});
// ─── handleMemoryCommand text + json output ────────────────────
// Build a minimal MemorySnapshot fixture exercising every render branch.
// This is what bm.getMemorySnapshot would return; we stub the BrowserManager
// so the test never spins up real Chromium.
function makeStructureStats(): MemoryStructureStats {
return {
modificationHistory: { current: 42, cap: 200, evicted: 0 },
activitySubscribers: 1,
inspectorSubscribers: 0,
consoleBufferLen: 1842,
networkBufferLen: 12000,
dialogBufferLen: 3,
captureBufferBytes: 0,
};
}
function makeSnapshot(overrides: Partial<MemorySnapshot> = {}): MemorySnapshot {
return {
bunServer: {
rss: 312 * 1024 * 1024,
heapUsed: 84 * 1024 * 1024,
heapTotal: 120 * 1024 * 1024,
external: 21 * 1024 * 1024,
},
tabs: [],
processes: null,
structures: makeStructureStats(),
capturedAt: 1700000000000,
notes: [],
...overrides,
};
}
// Mock BrowserManager surface for handleMemoryCommand. Only
// getMemorySnapshot is touched.
function makeFakeBm(snapshot: MemorySnapshot) {
return {
getMemorySnapshot: async (structures: MemoryStructureStats) => ({
...snapshot,
structures,
}),
} as unknown as import('../src/browser-manager').BrowserManager;
}
describe('handleMemoryCommand', () => {
test('6. --json mode emits parseable JSON with bunServer + structures', async () => {
const { handleMemoryCommand } = await import('../src/memory-command');
const snapshot = makeSnapshot();
const result = await handleMemoryCommand(['--json'], makeFakeBm(snapshot));
const parsed = JSON.parse(result);
expect(parsed.bunServer.rss).toBe(312 * 1024 * 1024);
expect(parsed.structures).toBeDefined();
expect(parsed.structures.modificationHistory.cap).toBe(200);
});
test('7. text mode renders Bun server line with RSS + heap', async () => {
const { handleMemoryCommand } = await import('../src/memory-command');
const result = await handleMemoryCommand([], makeFakeBm(makeSnapshot()));
expect(result).toContain('Bun server:');
expect(result).toContain('312.0 MB');
expect(result).toContain('84.0 MB');
});
test('8. text mode renders "no tabs tracked" when tabs array is empty', async () => {
const { handleMemoryCommand } = await import('../src/memory-command');
const result = await handleMemoryCommand([], makeFakeBm(makeSnapshot({ tabs: [] })));
expect(result).toContain('Renderers:');
expect(result).toContain('(no tabs tracked)');
});
test('9. text mode shows top 10 tabs + "...and N more" tail when > 10', async () => {
const { handleMemoryCommand } = await import('../src/memory-command');
const tabs = Array.from({ length: 15 }, (_, i) => ({
id: i,
url: `https://example.com/tab${i}`,
title: `Tab ${i}`,
jsHeapUsed: (15 - i) * 50 * 1024 * 1024, // descending so sort matters
jsHeapTotal: (15 - i) * 60 * 1024 * 1024,
documents: 1,
nodes: 100,
listeners: 10,
}));
const result = await handleMemoryCommand([], makeFakeBm(makeSnapshot({ tabs })));
expect(result).toContain('Renderers: 15 tabs');
expect(result).toContain('and 5 more');
// Sorted by JS heap descending — tab 0 (largest) should appear before tab 9
expect(result.indexOf('tab #0 —')).toBeLessThan(result.indexOf('tab #9 —'));
});
test('10. text mode renders Chromium processes grouped by type', async () => {
const { handleMemoryCommand } = await import('../src/memory-command');
const snapshot = makeSnapshot({
processes: [
{ id: 1, type: 'browser', cpuTime: 1.5 },
{ id: 2, type: 'renderer', cpuTime: 3.2 },
{ id: 3, type: 'renderer', cpuTime: 2.1 },
{ id: 4, type: 'gpu', cpuTime: 0.5 },
],
});
const result = await handleMemoryCommand([], makeFakeBm(snapshot));
expect(result).toContain('Chromium processes: 4 total');
expect(result).toContain('renderer=2');
expect(result).toContain('browser=1');
expect(result).toContain('gpu=1');
});
test('11. text mode renders "unavailable" line when processes is null', async () => {
const { handleMemoryCommand } = await import('../src/memory-command');
const result = await handleMemoryCommand([], makeFakeBm(makeSnapshot({ processes: null })));
expect(result).toContain('Chromium processes: (unavailable — see notes)');
});
test('12. text mode renders modificationHistory with evicted-count when > 0', async () => {
// formatSnapshotText is what we're really testing here — exercise it
// directly with a known snapshot so the live collectStructureStats
// doesn't override the fixture values.
const mod = await import('../src/memory-command');
// formatSnapshotText is private; reach via re-rendering through
// --json mode then visually validating the JSON shape. The text-mode
// renderer is exercised by test 13 below with live (zero) values.
const stats = makeStructureStats();
stats.modificationHistory = { current: 200, cap: 200, evicted: 47 };
// Synthesize a "would-render" snapshot to assert the eviction note shape.
const renderedExpected =
'modificationHistory: 200 / 200 entries (47 evicted since reset)';
// Since formatSnapshotText isn't exported, validate the format
// contract by re-implementing the line and asserting our expectation
// matches the canonical format. This pins the user-visible string
// shape — a renderer change to drop the "evicted since reset" suffix
// would fail this assertion.
const evicted = stats.modificationHistory.evicted;
const current = stats.modificationHistory.current;
const cap = stats.modificationHistory.cap;
const expected =
`modificationHistory: ${current} / ${cap} entries` +
(evicted > 0 ? ` (${evicted} evicted since reset)` : '');
expect(expected).toBe(renderedExpected);
void mod;
});
test('13. text mode renders modificationHistory line shape', async () => {
const { handleMemoryCommand } = await import('../src/memory-command');
const result = await handleMemoryCommand([], makeFakeBm(makeSnapshot()));
// collectStructureStats reads live module state; values may be 0 in
// the test env. Verify the LINE SHAPE rather than specific numbers.
expect(result).toMatch(/modificationHistory:\s+\d+ \/ \d+ entries/);
});
test('14. text mode prints notes section when notes are present', async () => {
const { handleMemoryCommand } = await import('../src/memory-command');
const snapshot = makeSnapshot({
notes: ['Per-Chromium-process RSS not collected — CDP limitation.'],
});
const result = await handleMemoryCommand([], makeFakeBm(snapshot));
expect(result).toContain('Notes:');
expect(result).toContain('CDP limitation.');
});
test('15. text mode omits notes section when notes is empty', async () => {
const { handleMemoryCommand } = await import('../src/memory-command');
const result = await handleMemoryCommand([], makeFakeBm(makeSnapshot({ notes: [] })));
expect(result).not.toContain('Notes:');
});
test('16. text mode truncates long tab URLs with ellipsis', async () => {
const { handleMemoryCommand } = await import('../src/memory-command');
const longUrl = 'https://example.com/' + 'a'.repeat(120);
const tabs = [{
id: 1,
url: longUrl,
title: 'long',
jsHeapUsed: 1024,
jsHeapTotal: 2048,
documents: 1,
nodes: 10,
listeners: 1,
}];
const result = await handleMemoryCommand([], makeFakeBm(makeSnapshot({ tabs })));
expect(result).toContain('...');
// The truncated URL appears, the full URL does not
expect(result.includes(longUrl)).toBe(false);
});
});
// ─── buildMemorySnapshotJson — server-endpoint entry ──────────
describe('buildMemorySnapshotJson', () => {
test('17. returns the snapshot with structures populated', async () => {
const { buildMemorySnapshotJson } = await import('../src/memory-command');
const snapshot = makeSnapshot();
const result = await buildMemorySnapshotJson(makeFakeBm(snapshot));
expect(result.bunServer.rss).toBe(snapshot.bunServer.rss);
expect(result.structures.modificationHistory.cap).toBe(200);
// structures is populated from live module accessors, not from the
// fixture. Just assert the shape is right.
expect(typeof result.structures.consoleBufferLen).toBe('number');
expect(typeof result.structures.networkBufferLen).toBe('number');
});
});

View File

@ -1,132 +0,0 @@
import { describe, test, expect } from 'bun:test';
import { BrowserManager } from '../src/browser-manager';
import { networkBuffer } from '../src/buffers';
// Reproducer for the body-materialization leak fixed in the D10
// USE_CDP_EVENT_BATCHED commit. Pre-fix, the wirePageEvents
// `requestfinished` listener called `await res.body()` just to read
// `.length`, allocating the full response body into a Bun Buffer on
// every request — multi-GB/hour of churn on long-lived headed
// Chromium with media-heavy pages.
//
// What this test pins:
// - The handler calls Playwright's structured req.sizes() API
// (which pulls from Network.loadingFinished without
// materializing the body).
// - The handler NEVER calls res.body(), even though a fake response
// exposes the method.
// - networkBuffer entries are still populated with the right size.
//
// What this test does NOT cover:
// - A real Chromium burst measuring peak Bun RSS during concurrent
// fetches. That's a periodic-tier test (browse/test/
// memory-leak-reproducer-e2e.test.ts, deferred — see TODOS).
// - Per-tab JS heap growth on the Chromium side. Outside Bun's
// visibility entirely.
//
// Wall clock target: < 1 second. Gate tier.
interface CallCounters {
sizes: number;
body: number;
}
function makeFakeReq(url: string, responseBodySize: number, counters: CallCounters) {
return {
url: () => url,
sizes: async () => {
counters.sizes++;
return {
requestBodySize: 0,
requestHeadersSize: 100,
responseBodySize,
responseHeadersSize: 200,
};
},
method: () => 'GET',
response: async () => ({
url: () => url,
status: () => 200,
body: async () => {
// If THIS runs, the leak is back. Allocate a real Buffer so a
// future reviewer reading the failing assertion sees what
// pre-fix code was doing on every request.
counters.body++;
return Buffer.alloc(responseBodySize);
},
}),
};
}
interface ListenerMap {
[event: string]: Array<(arg: unknown) => void>;
}
function makeFakePage() {
const listeners: ListenerMap = {};
return {
on(event: string, fn: (arg: unknown) => void): void {
(listeners[event] ||= []).push(fn);
},
emit(event: string, arg: unknown): void {
for (const fn of listeners[event] || []) fn(arg);
},
listenerCount(event: string): number {
return (listeners[event] || []).length;
},
};
}
describe('memory-leak reproducer: requestfinished does not materialize bodies', () => {
test('burst of 200 requestfinished events calls req.sizes() but never res.body()', async () => {
const bm = new BrowserManager();
const page = makeFakePage();
// wirePageEvents is private — access via the same indexed pattern the
// tab-guardrail test uses to drive private methods.
const wirePageEvents = (
bm as unknown as { wirePageEvents: (p: unknown) => void }
).wirePageEvents.bind(bm);
wirePageEvents(page);
// Seed networkBuffer with 200 request entries via the existing
// page.on('request') handler so the requestfinished backward-scan
// has something to match against.
const startLen = networkBuffer.length;
for (let i = 0; i < 200; i++) {
page.emit('request', {
url: () => `https://example.invalid/asset/${i}`,
method: () => 'GET',
});
}
// Fire 200 requestfinished events concurrently. Each notional response
// is 1 MB — pre-fix this would allocate 200 MB of Buffer. With the fix,
// not one byte of body content is allocated.
const counters: CallCounters = { sizes: 0, body: 0 };
const reqs = Array.from({ length: 200 }, (_, i) =>
makeFakeReq(`https://example.invalid/asset/${i}`, 1024 * 1024, counters),
);
for (const req of reqs) page.emit('requestfinished', req);
// Drain the async handler chain — wirePageEvents.requestfinished is
// async; each emit kicks off a microtask that awaits req.sizes().
await new Promise((r) => setTimeout(r, 50));
// One more tick in case of cascading microtasks.
await new Promise((r) => setTimeout(r, 0));
// Every event hit req.sizes().
expect(counters.sizes).toBeGreaterThanOrEqual(200);
// The actual leak fix: res.body() is NEVER called.
expect(counters.body).toBe(0);
// And the size data still made it into networkBuffer.
const populated = Array.from({ length: networkBuffer.length }, (_, i) =>
networkBuffer.get(i),
)
.filter((e) => e && e.url?.startsWith('https://example.invalid/asset/'))
.filter((e) => typeof e?.size === 'number' && e.size > 0).length;
expect(populated).toBeGreaterThanOrEqual(200);
// Sanity: the seed didn't double-count from a previous run.
expect(networkBuffer.length).toBeGreaterThan(startLen);
});
});

View File

@ -1,76 +0,0 @@
/**
* Tests for the /pty-inject-scan endpoint (#1370).
*
* Verifies the endpoint's invariants without spinning a real browse
* server: auth required, tunnel-listener denial, payload cap, JSON
* shape, and the local-only routing rule (NOT in TUNNEL_PATHS).
*
* Full integration with a live sidecar + Chromium is exercised by the
* existing browser security suite; this file covers the static + unit
* invariants codex's plan review specifically called out.
*/
import { describe, test, expect } from 'bun:test';
import { readFileSync } from 'fs';
import { join } from 'path';
const SERVER_SRC = readFileSync(
join(import.meta.dir, '..', 'src', 'server.ts'),
'utf-8',
);
describe('/pty-inject-scan — server.ts static invariants', () => {
test('endpoint is defined as a POST handler', () => {
expect(SERVER_SRC).toContain(
"url.pathname === '/pty-inject-scan' && req.method === 'POST'",
);
});
test('endpoint requires auth (validateAuth gate)', () => {
// Find the endpoint block, verify it calls validateAuth before doing
// any work.
const start = SERVER_SRC.indexOf("'/pty-inject-scan'");
expect(start).toBeGreaterThan(-1);
const blockEnd = SERVER_SRC.indexOf("\n // ─", start);
const block = SERVER_SRC.slice(start, blockEnd > start ? blockEnd : start + 5000);
expect(block).toContain('validateAuth(req)');
expect(block).toContain('401');
});
test('endpoint caps payload at 64KB', () => {
const start = SERVER_SRC.indexOf("'/pty-inject-scan'");
const block = SERVER_SRC.slice(start, start + 5000);
expect(block).toContain('64 * 1024');
expect(block).toContain('payload-too-large');
expect(block).toContain('413');
});
test('endpoint is NOT in the tunnel listener allowlist', () => {
const tunnelBlockStart = SERVER_SRC.indexOf('const TUNNEL_PATHS = new Set<string>([');
expect(tunnelBlockStart).toBeGreaterThan(-1);
const tunnelBlockEnd = SERVER_SRC.indexOf(']);', tunnelBlockStart);
const tunnelAllowlist = SERVER_SRC.slice(tunnelBlockStart, tunnelBlockEnd);
expect(tunnelAllowlist).not.toContain('/pty-inject-scan');
});
test('response goes through sanitizeReplacer (Unicode egress hardening)', () => {
const start = SERVER_SRC.indexOf("'/pty-inject-scan'");
const block = SERVER_SRC.slice(start, start + 5000);
expect(block).toContain('sanitizeReplacer');
});
test('endpoint surfaces l4 availability shape for D7 degrade-to-WARN path', () => {
const start = SERVER_SRC.indexOf("'/pty-inject-scan'");
const block = SERVER_SRC.slice(start, start + 5000);
expect(block).toContain('isSidecarAvailable');
expect(block).toContain('available');
});
test('endpoint uses the sidecar client, not direct security-classifier import', () => {
// Static check that server.ts imports from security-sidecar-client.ts,
// NOT from security-classifier.ts directly (would brick the compiled
// binary per CLAUDE.md).
expect(SERVER_SRC).toContain("from './security-sidecar-client'");
expect(SERVER_SRC).not.toContain("from './security-classifier'");
});
});

View File

@ -1,98 +0,0 @@
import { describe, test, expect, beforeEach } from 'bun:test';
// pty-session-lease registers a sessionId space distinct from the pre-v1.44
// attach-token space (browse/src/pty-session-cookie.ts). These tests pin
// the validate-first contract that codex outside-voice flagged as critical:
// refreshLease MUST NOT resurrect expired leases, otherwise the 30-min TTL
// stops bounding leaked-token blast radius.
import {
mintLease,
validateLease,
refreshLease,
revokeLease,
leaseCount,
__resetLeases,
} from '../src/pty-session-lease';
beforeEach(() => {
__resetLeases();
});
describe('pty-session-lease: mint/validate/revoke', () => {
test('mintLease returns a fresh non-secret sessionId + future expiresAt', () => {
const a = mintLease();
const b = mintLease();
expect(a.sessionId).toBeTruthy();
expect(b.sessionId).toBeTruthy();
expect(a.sessionId).not.toBe(b.sessionId);
expect(a.expiresAt).toBeGreaterThan(Date.now());
// base64url alphabet: characters in [A-Za-z0-9_-].
expect(a.sessionId).toMatch(/^[A-Za-z0-9_-]+$/);
expect(leaseCount()).toBe(2);
});
test('validateLease ok for fresh lease, false for unknown', () => {
const { sessionId } = mintLease();
const ok = validateLease(sessionId);
expect(ok.ok).toBe(true);
if (ok.ok) expect(ok.expiresAt).toBeGreaterThan(Date.now());
expect(validateLease('not-a-real-session-id').ok).toBe(false);
expect(validateLease(null).ok).toBe(false);
expect(validateLease(undefined).ok).toBe(false);
});
test('revokeLease removes the lease; subsequent validate returns false', () => {
const { sessionId } = mintLease();
expect(validateLease(sessionId).ok).toBe(true);
revokeLease(sessionId);
expect(validateLease(sessionId).ok).toBe(false);
expect(leaseCount()).toBe(0);
});
test('revokeLease tolerates unknown sessionId without throwing', () => {
expect(() => revokeLease('phantom')).not.toThrow();
expect(() => revokeLease(null)).not.toThrow();
});
});
describe('pty-session-lease: refresh contract (validate-first)', () => {
test('refreshLease extends expiresAt for a valid lease', () => {
const { sessionId, expiresAt: initial } = mintLease();
// Sleep micro-tick — Date.now() is ms-grain so a synchronous extend
// may not move the integer. Use a tight async wait instead.
return new Promise<void>((resolve) => {
setTimeout(() => {
const r = refreshLease(sessionId);
expect(r.ok).toBe(true);
if (r.ok) expect(r.expiresAt).toBeGreaterThan(initial);
resolve();
}, 5);
});
});
test('refreshLease rejects unknown sessionId (validate-first invariant)', () => {
const r = refreshLease('never-minted');
expect(r.ok).toBe(false);
});
test('refreshLease never resurrects an expired lease', async () => {
// Force TTL down to 5ms for this assertion by minting + waiting past expiry.
// Lease internals use Date.now() so the easiest way to expire one is
// to artificially backdate via revoke+remint cycle. Simpler: mint, then
// wait for the registry's own expiry check to trip.
//
// We can't backdate without breaking encapsulation, so this test exercises
// the negative-validate path: minted lease, then prove that refresh after
// explicit revoke still returns ok:false (same as expired-and-pruned).
const { sessionId } = mintLease();
revokeLease(sessionId);
const r = refreshLease(sessionId);
expect(r.ok).toBe(false);
});
test('refreshLease tolerates null / undefined sessionId', () => {
expect(refreshLease(null).ok).toBe(false);
expect(refreshLease(undefined).ok).toBe(false);
});
});

View File

@ -1,83 +0,0 @@
/**
* Regression test for PR #1169 bug #7 `pdf --from-file` ran JSON.parse on
* user-supplied file contents with no try/catch. A malformed payload crashed
* the pdf handler with a raw SyntaxError. Codex flagged that JSON.parse
* accepts primitives too (numbers, strings, null) and Array.isArray must be
* checked separately, so the fix added an explicit object-shape gate.
*
* Test surface: parsePdfFromFile, exported for tests at meta-commands.ts:139.
* All fixtures land in process.cwd() (SAFE_DIRECTORIES allows TEMP_DIR or cwd;
* cwd is universally safe on every platform our CI runs on).
*/
import { describe, expect, test, beforeAll, afterAll } from "bun:test";
import * as fs from "node:fs";
import * as path from "node:path";
import { parsePdfFromFile } from "../src/meta-commands";
const FIXTURE_DIR = fs.mkdtempSync(path.join(process.cwd(), "pr1169-pdf-"));
beforeAll(() => {
// mkdtempSync already created the dir
});
afterAll(() => {
fs.rmSync(FIXTURE_DIR, { recursive: true, force: true });
});
function writeFixture(name: string, body: string): string {
const p = path.join(FIXTURE_DIR, name);
fs.writeFileSync(p, body);
return p;
}
describe("parsePdfFromFile — invalid JSON regression (PR #1169 bug #7)", () => {
test("invalid JSON: throws with file path AND parser detail", () => {
const p = writeFixture("invalid.json", "{ not-json");
expect(() => parsePdfFromFile(p)).toThrow(/not valid JSON/);
expect(() => parsePdfFromFile(p)).toThrow(p);
});
test("empty file: throws JSON-parse style error", () => {
const p = writeFixture("empty.json", "");
// Empty string is invalid JSON per ECMA-404.
expect(() => parsePdfFromFile(p)).toThrow(/not valid JSON/);
});
test("top-level array: throws 'must be a JSON object' with type", () => {
const p = writeFixture("array.json", JSON.stringify(["a", "b"]));
expect(() => parsePdfFromFile(p)).toThrow(/must be a JSON object/);
expect(() => parsePdfFromFile(p)).toThrow(/array/);
});
test("top-level number: throws with 'number' type label", () => {
const p = writeFixture("number.json", "42");
expect(() => parsePdfFromFile(p)).toThrow(/must be a JSON object/);
expect(() => parsePdfFromFile(p)).toThrow(/number/);
});
test("top-level string: throws with 'string' type label", () => {
const p = writeFixture("string.json", JSON.stringify("hello"));
expect(() => parsePdfFromFile(p)).toThrow(/must be a JSON object/);
expect(() => parsePdfFromFile(p)).toThrow(/string/);
});
test("top-level null: throws with 'object' type label (JS null typeof === object)", () => {
const p = writeFixture("null.json", "null");
// null passes typeof === 'object' but the fix's `=== null` branch catches it.
expect(() => parsePdfFromFile(p)).toThrow(/must be a JSON object/);
});
test("top-level boolean: throws with 'boolean' type label", () => {
const p = writeFixture("bool.json", "true");
expect(() => parsePdfFromFile(p)).toThrow(/must be a JSON object/);
expect(() => parsePdfFromFile(p)).toThrow(/boolean/);
});
test("valid object: parses successfully (happy-path regression)", () => {
const p = writeFixture("valid.json", JSON.stringify({ format: "A4", pageNumbers: true }));
const result = parsePdfFromFile(p);
expect(result.format).toBe("A4");
expect(result.pageNumbers).toBe(true);
});
});

View File

@ -1,39 +0,0 @@
import { describe, test, expect } from "bun:test";
import { buildRestartEnv } from "../src/cli";
// #1781: an auto-restart triggered by a plain command (no --headed flag) must
// NOT silently downgrade a headed session to headless. buildRestartEnv reapplies
// headed/proxy/configHash from this invocation OR the persisted server state.
describe("buildRestartEnv (#1781 headed persistence)", () => {
const headedState = { pid: 1, port: 9, token: "t", startedAt: "", serverPath: "", mode: "headed" as const };
const launchedState = { pid: 1, port: 9, token: "t", startedAt: "", serverPath: "", mode: "launched" as const };
test("headed flag on this invocation → BROWSE_HEADED=1", () => {
expect(buildRestartEnv({ headed: true } as any, null).BROWSE_HEADED).toBe("1");
});
test("plain command + persisted headed state → still BROWSE_HEADED=1 (the regression)", () => {
const env = buildRestartEnv({} as any, headedState as any);
expect(env.BROWSE_HEADED).toBe("1");
});
test("plain command + headless state → no BROWSE_HEADED (no spurious headed)", () => {
const env = buildRestartEnv({} as any, launchedState as any);
expect(env.BROWSE_HEADED).toBeUndefined();
});
test("nothing set → empty env", () => {
expect(buildRestartEnv(null, null)).toEqual({});
});
test("proxy + configHash reapplied from flags", () => {
const env = buildRestartEnv({ proxyUrl: "socks5://x", configHash: "abc" } as any, null);
expect(env.BROWSE_PROXY_URL).toBe("socks5://x");
expect(env.BROWSE_CONFIG_HASH).toBe("abc");
});
test("configHash falls back to persisted state", () => {
const env = buildRestartEnv({} as any, { ...launchedState, configHash: "fromstate" } as any);
expect(env.BROWSE_CONFIG_HASH).toBe("fromstate");
});
});

View File

@ -1,118 +0,0 @@
/**
* Unit tests for the screenshot size guard (#1214).
*
* Verifies that images exceeding 2000px on the longest dimension get
* downscaled to fit the Anthropic vision API cap, while images already
* inside the cap pass through untouched.
*
* Integration with the three callsites (snapshot.ts, meta-commands.ts,
* write-commands.ts) is exercised by the existing browse E2E suite we
* don't need to spin up Chromium just to verify the helper. The static
* invariant test below pins that all three callsites import the guard.
*/
import { afterEach, beforeEach, describe, expect, test } from 'bun:test';
import { mkdtempSync, readFileSync, rmSync, writeFileSync } from 'fs';
import { tmpdir } from 'os';
import { join } from 'path';
import sharp from 'sharp';
import {
SCREENSHOT_MAX_DIMENSION_PX,
guardScreenshotBuffer,
guardScreenshotPath,
} from '../src/screenshot-size-guard';
let tmp: string;
beforeEach(() => {
tmp = mkdtempSync(join(tmpdir(), 'screenshot-guard-'));
});
afterEach(() => {
rmSync(tmp, { recursive: true, force: true });
});
async function makePng(width: number, height: number): Promise<Buffer> {
return sharp({
create: { width, height, channels: 3, background: { r: 200, g: 50, b: 50 } },
})
.png()
.toBuffer();
}
describe('guardScreenshotBuffer', () => {
test('passes through images already within the cap', async () => {
const input = await makePng(1500, 1800);
const { buffer, result } = await guardScreenshotBuffer(input);
expect(result.resized).toBe(false);
expect(result.width).toBe(1500);
expect(result.height).toBe(1800);
expect(buffer).toBe(input); // identity — no re-encode
});
test('downscales a 5000px-tall image to fit the cap', async () => {
const input = await makePng(1200, 5000);
const { buffer, result } = await guardScreenshotBuffer(input);
expect(result.resized).toBe(true);
expect(result.originalHeight).toBe(5000);
expect(Math.max(result.width, result.height)).toBeLessThanOrEqual(
SCREENSHOT_MAX_DIMENSION_PX,
);
// Aspect ratio preserved.
expect(result.height / result.width).toBeCloseTo(5000 / 1200, 1);
// Buffer is a different (smaller) PNG.
expect(buffer.length).toBeLessThan(input.length);
});
test('downscales a 6000px-wide image', async () => {
const input = await makePng(6000, 1200);
const { buffer, result } = await guardScreenshotBuffer(input);
expect(result.resized).toBe(true);
expect(result.originalWidth).toBe(6000);
expect(Math.max(result.width, result.height)).toBeLessThanOrEqual(
SCREENSHOT_MAX_DIMENSION_PX,
);
expect(buffer.length).toBeGreaterThan(0);
});
test('treats exactly-2000px images as in-bounds (no resize)', async () => {
const input = await makePng(2000, 1000);
const { result } = await guardScreenshotBuffer(input);
expect(result.resized).toBe(false);
});
});
describe('guardScreenshotPath', () => {
test('rewrites the file in place when downscale is needed', async () => {
const filePath = join(tmp, 'tall.png');
writeFileSync(filePath, await makePng(1200, 5000));
const result = await guardScreenshotPath(filePath);
expect(result.resized).toBe(true);
const written = readFileSync(filePath);
const meta = await sharp(written).metadata();
expect(Math.max(meta.width ?? 0, meta.height ?? 0)).toBeLessThanOrEqual(
SCREENSHOT_MAX_DIMENSION_PX,
);
});
test('leaves the file untouched when already within cap', async () => {
const filePath = join(tmp, 'short.png');
const original = await makePng(800, 600);
writeFileSync(filePath, original);
const result = await guardScreenshotPath(filePath);
expect(result.resized).toBe(false);
const written = readFileSync(filePath);
expect(written.equals(original)).toBe(true);
});
});
describe('static invariant: all three full-page callsites import the guard', () => {
test('snapshot.ts, meta-commands.ts, and write-commands.ts wire the size guard', () => {
const browseSrc = join(import.meta.dir, '..', 'src');
const paths = ['snapshot.ts', 'meta-commands.ts', 'write-commands.ts'];
for (const rel of paths) {
const content = readFileSync(join(browseSrc, rel), 'utf-8');
expect(content).toContain('screenshot-size-guard');
}
});
});

View File

@ -1,138 +0,0 @@
/**
* Regression test for PR #1169 bug #6 downloadFile opened a WriteStream to
* `<dest>.tmp.<pid>` but never closed it on error paths. If the reader or
* writer threw mid-download, the FD leaked and the half-written tmp could
* be promoted by a retry's renameSync.
*
* The fix wraps the read loop in try/catch and runs `writer.destroy()` +
* `fs.unlinkSync(tmp)` before rethrowing.
*
* Per codex's pushback, this test must exercise BOTH the reader-throws path
* and the non-2xx-response path, and it must NOT assume the specific tmp
* filename only that no `<dest>.tmp.*` sibling remains.
*/
import { describe, expect, test, beforeAll, afterAll, beforeEach, afterEach } from "bun:test";
import * as fs from "node:fs";
import * as path from "node:path";
import { downloadFile } from "../src/security-classifier";
function tmpSiblings(destDir: string, destBase: string): string[] {
if (!fs.existsSync(destDir)) return [];
return fs.readdirSync(destDir).filter((f) =>
f.startsWith(destBase + ".tmp.")
);
}
let FIXTURE_DIR = "";
let originalFetch: typeof fetch;
beforeAll(() => {
FIXTURE_DIR = fs.mkdtempSync(path.join(process.cwd(), "pr1169-dl-"));
});
afterAll(() => {
if (FIXTURE_DIR) {
fs.rmSync(FIXTURE_DIR, { recursive: true, force: true });
}
});
beforeEach(() => {
originalFetch = globalThis.fetch;
});
afterEach(() => {
globalThis.fetch = originalFetch;
});
describe("downloadFile error-path cleanup (PR #1169 bug #6)", () => {
test("reader rejects mid-stream: throws, no dest, no tmp sibling left", async () => {
const dest = path.join(FIXTURE_DIR, "reader-fail-model.bin");
const destDir = path.dirname(dest);
const destBase = path.basename(dest);
// Build a ReadableStream that emits one chunk then errors on second pull.
const body = new ReadableStream<Uint8Array>({
start(controller) {
controller.enqueue(new Uint8Array([1, 2, 3, 4]));
},
pull(controller) {
// Second pull triggers the failure path the fix protects against.
controller.error(new Error("simulated mid-stream read failure"));
},
});
// @ts-expect-error — overwrite global fetch for the test
globalThis.fetch = async () =>
new Response(body, { status: 200, statusText: "OK" });
await expect(downloadFile("https://example.com/model.bin", dest)).rejects.toThrow(
/simulated mid-stream read failure/
);
expect(fs.existsSync(dest)).toBe(false);
expect(tmpSiblings(destDir, destBase)).toEqual([]);
});
test("non-2xx response: throws with status, no tmp file created", async () => {
const dest = path.join(FIXTURE_DIR, "http500-model.bin");
const destDir = path.dirname(dest);
const destBase = path.basename(dest);
// @ts-expect-error — overwrite global fetch for the test
globalThis.fetch = async () =>
new Response("server boom", { status: 500, statusText: "Server Error" });
await expect(downloadFile("https://example.com/model.bin", dest)).rejects.toThrow(
/Failed to fetch.*500/
);
expect(fs.existsSync(dest)).toBe(false);
expect(tmpSiblings(destDir, destBase)).toEqual([]);
});
test("missing body: throws, no tmp file created", async () => {
const dest = path.join(FIXTURE_DIR, "nobody-model.bin");
const destDir = path.dirname(dest);
const destBase = path.basename(dest);
// Response with null body (some upstreams send this on edge errors).
// @ts-expect-error — overwrite global fetch for the test
globalThis.fetch = async () =>
new Response(null, { status: 200, statusText: "OK" });
await expect(downloadFile("https://example.com/model.bin", dest)).rejects.toThrow(
/Failed to fetch/
);
expect(fs.existsSync(dest)).toBe(false);
expect(tmpSiblings(destDir, destBase)).toEqual([]);
});
test("happy path: 2xx body completes, dest exists, no tmp sibling remains", async () => {
const dest = path.join(FIXTURE_DIR, "ok-model.bin");
const destDir = path.dirname(dest);
const destBase = path.basename(dest);
const body = new ReadableStream<Uint8Array>({
start(controller) {
controller.enqueue(new Uint8Array([9, 9, 9, 9]));
controller.close();
},
});
// @ts-expect-error — overwrite global fetch for the test
globalThis.fetch = async () =>
new Response(body, { status: 200, statusText: "OK" });
await downloadFile("https://example.com/model.bin", dest);
expect(fs.existsSync(dest)).toBe(true);
expect(tmpSiblings(destDir, destBase)).toEqual([]);
const written = fs.readFileSync(dest);
expect(Array.from(written)).toEqual([9, 9, 9, 9]);
fs.unlinkSync(dest);
});
});

View File

@ -1,66 +0,0 @@
/**
* Unit tests for browse/src/security-sidecar-client.ts.
*
* Tests the IPC client's behavior against a fake sidecar (a tiny Node
* script we spawn) verifies request/response id correlation, timeout,
* payload cap, malformed-response handling, and circuit-breaker tripping.
*
* Does NOT exercise the real classifier that lives behind the model
* download and is covered by the existing security-classifier tests + the
* E2E browser security suite.
*/
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
import { mkdtempSync, rmSync, writeFileSync } from "fs";
import { tmpdir } from "os";
import { join } from "path";
let tmp: string;
beforeEach(() => {
tmp = mkdtempSync(join(tmpdir(), "sidecar-client-test-"));
});
afterEach(async () => {
const mod = await import("../src/security-sidecar-client");
mod.resetSidecarForTests();
rmSync(tmp, { recursive: true, force: true });
});
describe("security-sidecar-client — payload cap", () => {
test("rejects requests over 64KB without spawning", async () => {
const { scanWithSidecar } = await import("../src/security-sidecar-client");
const huge = "a".repeat(65 * 1024);
await expect(scanWithSidecar(huge)).rejects.toThrow(/payload-too-large/);
});
});
describe("security-sidecar-client — availability probe", () => {
test("isSidecarAvailable returns a shape regardless of platform", async () => {
const { isSidecarAvailable } = await import("../src/security-sidecar-client");
const result = isSidecarAvailable();
expect(typeof result.available).toBe("boolean");
if (!result.available) {
// When unavailable, reason must explain why
expect(typeof result.reason).toBe("string");
}
});
});
describe("security-sidecar-client — circuit breaker after repeated failures", () => {
test("trips after RESPAWN_LIMIT failures and stays unavailable", async () => {
// We can simulate the breaker tripping by repeatedly calling against an
// invalid sidecar entry. The cleanest way without faking spawn() is to
// exercise the payload-too-large path which doesn't trip the breaker
// (it short-circuits before spawn), so this is an indirect proof:
// verify the timeout path can be exercised by an oversized small text
// and that retries don't crash.
const { scanWithSidecar } = await import("../src/security-sidecar-client");
const oversized = "x".repeat(70 * 1024);
for (let i = 0; i < 5; i += 1) {
await expect(scanWithSidecar(oversized)).rejects.toThrow(/payload-too-large/);
}
// Sentinel — if the loop above silently passed, fail fast.
expect(true).toBe(true);
});
});

View File

@ -63,13 +63,13 @@ describe('Server auth security', () => {
// Test 4: /activity/history requires auth via validateAuth
test('/activity/history requires authentication', () => {
const historyBlock = sliceBetween(SERVER_SRC, "url.pathname === '/activity/history'", 'Batch endpoint');
const historyBlock = sliceBetween(SERVER_SRC, "url.pathname === '/activity/history'", 'Sidebar endpoints');
expect(historyBlock).toContain('validateAuth');
});
// Test 5: /activity/history has no wildcard CORS header
test('/activity/history has no wildcard CORS header', () => {
const historyBlock = sliceBetween(SERVER_SRC, "url.pathname === '/activity/history'", 'Batch endpoint');
const historyBlock = sliceBetween(SERVER_SRC, "url.pathname === '/activity/history'", 'Sidebar endpoints');
expect(historyBlock).not.toContain("'*'");
});
@ -314,7 +314,7 @@ describe('Server auth security', () => {
// Regression: connect command crashed with "domains is not defined" because
// a stray `domains,` variable was in the status fetch body (cli.ts:852).
test('connect command status fetch body has no undefined variable references', () => {
const connectBlock = sliceBetween(CLI_SRC, 'Launching headed Chromium', 'Terminal agent started');
const connectBlock = sliceBetween(CLI_SRC, 'Launching headed Chromium', 'Sidebar agent started');
// The status fetch should use a clean JSON body
expect(connectBlock).toContain("command: 'status'");
// Must NOT contain a bare `domains` reference in the fetch body
@ -335,15 +335,10 @@ describe('Server auth security', () => {
// The connect subprocess env must override BROWSE_PARENT_PID
expect(pairBlock).toContain("BROWSE_PARENT_PID");
expect(pairBlock).toContain("'0'");
// The connect command must propagate BROWSE_PARENT_PID=0 via the
// serverEnv object literal passed to startServer. The literal text
// `serverEnv.BROWSE_PARENT_PID` is NOT in source — the value is
// assigned via object-literal syntax (`BROWSE_PARENT_PID: '0'`)
// inside the `const serverEnv: Record<string, string> = { ... }`
// declaration. Assert both pieces appear in the connect block.
const connectBlock = sliceBetween(CLI_SRC, 'Launching headed Chromium', 'Terminal agent started');
expect(connectBlock).toContain("const serverEnv");
expect(connectBlock).toContain("BROWSE_PARENT_PID: '0'");
// The connect command must propagate BROWSE_PARENT_PID=0 to serverEnv
const connectBlock = sliceBetween(CLI_SRC, 'Launching headed Chromium', 'Sidebar agent started');
expect(connectBlock).toContain("BROWSE_PARENT_PID");
expect(connectBlock).toContain("serverEnv.BROWSE_PARENT_PID");
});
// Regression: newtab returned 403 for scoped tokens because the tab ownership

View File

@ -1,232 +0,0 @@
import { describe, test, expect, beforeEach, beforeAll, afterAll } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
import * as crypto from 'crypto';
import {
buildFetchHandler,
__resetShuttingDown,
type ServerConfig,
} from '../src/server';
import { __resetRegistry } from '../src/token-registry';
import { BrowserManager } from '../src/browser-manager';
import { resolveConfig } from '../src/config';
// Tests for the v1.41+ ownsTerminalAgent flag.
//
// Embedders (gbrowser phoenix overlay) that run their own PTY server and write
// terminal-port / terminal-internal-token / terminal-agent-pid themselves were
// getting those files clobbered by gstack's shutdown(). The flag (default true)
// gates four side effects (v1.44+):
// 1. identity-based kill of the PID in <stateDir>/terminal-agent-pid
// 2. unlink terminal-port
// 3. unlink terminal-internal-token
// 4. unlink terminal-agent-pid
// False = embedder owns them, gstack stays hands-off.
//
// Pre-v1.44 used `pkill -f terminal-agent\.ts` which matched sibling gstack
// sessions on the same host — see browse/src/terminal-agent-control.ts header.
//
// CRITICAL: each test stubs process.exit (so shutdown's exit doesn't kill
// the test runner). The PID in the test agent-record is a guaranteed-dead
// PID (1 = init / launchd — exists but cannot be killed by an unprivileged
// process, so safeKill returns ESRCH-equivalent without affecting anything).
// Use isProcessAlive's false branch by also testing with a PID that does
// not exist (negative PID rejected by the OS).
const stateDir = resolveConfig().stateDir;
const PORT_FILE = path.join(stateDir, 'terminal-port');
const TOKEN_FILE = path.join(stateDir, 'terminal-internal-token');
const AGENT_RECORD_FILE = path.join(stateDir, 'terminal-agent-pid');
const SENTINEL_PORT = 'sentinel-port-65432';
const SENTINEL_TOKEN = 'sentinel-token-abcdef1234567890';
// PID 2^31-1 is the Linux PID_MAX_LIMIT; macOS uses 99998. Either way, no
// real process will ever hold this PID on a developer machine. isProcessAlive
// returns false → killAgentByRecord no-ops without sending any signal.
const SENTINEL_DEAD_PID = 2147483646;
function makeMinimalConfig(overrides: Partial<ServerConfig> = {}): ServerConfig {
const token = 'embedder-test-' + crypto.randomBytes(16).toString('hex');
return {
authToken: token,
browsePort: 34568,
idleTimeoutMs: 1_800_000,
config: resolveConfig(),
browserManager: new BrowserManager(),
startTime: Date.now(),
...overrides,
};
}
function writeSentinels(): void {
fs.mkdirSync(stateDir, { recursive: true });
fs.writeFileSync(PORT_FILE, SENTINEL_PORT);
fs.writeFileSync(TOKEN_FILE, SENTINEL_TOKEN);
fs.writeFileSync(
AGENT_RECORD_FILE,
JSON.stringify({ pid: SENTINEL_DEAD_PID, gen: 'sentinel-gen', startedAt: Date.now() }),
);
}
function readIfExists(p: string): string | null {
try { return fs.readFileSync(p, 'utf-8'); } catch { return null; }
}
/**
* Stubs process.exit so shutdown()'s process.exit(0) throws an __exit:N
* marker the test can swallow instead of killing the runner. Also stubs
* process.kill so an accidental kill (regression in killAgentByRecord
* that bypassed isProcessAlive) cannot reach a real PID on the developer
* machine. Returns the captured kill calls so tests can assert kill
* scope.
*/
async function withStubs(
cb: (killCalls: Array<[number, NodeJS.Signals | number]>) => Promise<void>
): Promise<Array<[number, NodeJS.Signals | number]>> {
const origExit = process.exit;
const origKill = process.kill;
const killCalls: Array<[number, NodeJS.Signals | number]> = [];
(process as any).exit = ((code: number) => {
throw new Error(`__exit:${code}`);
}) as any;
(process as any).kill = ((pid: number, signal: NodeJS.Signals | number) => {
killCalls.push([pid, signal ?? 'SIGTERM']);
// signal 0 is a liveness probe — keep the existing 'process is dead'
// semantics so isProcessAlive(SENTINEL_DEAD_PID) returns false.
if (signal === 0) {
const err: any = new Error('No such process');
err.code = 'ESRCH';
throw err;
}
return true;
}) as any;
try {
await cb(killCalls);
} finally {
(process as any).exit = origExit;
(process as any).kill = origKill;
}
return killCalls;
}
async function runShutdown(handle: { shutdown: (code?: number) => Promise<void> }): Promise<void> {
try {
await handle.shutdown(0);
} catch (err: any) {
if (typeof err?.message !== 'string' || !err.message.startsWith('__exit:')) throw err;
}
}
// Filter out the signal=0 liveness probes; only count actual termination signals.
function terminationCalls(
calls: Array<[number, NodeJS.Signals | number]>,
): Array<[number, NodeJS.Signals | number]> {
return calls.filter(([, sig]) => sig !== 0);
}
describe('buildFetchHandler ownsTerminalAgent gate', () => {
// shutdown() reads `path.dirname(config.stateFile)` from module-level config
// (composition gap — see TODOS T9). So unlinks target the real state dir,
// not a per-test temp dir. If a real gstack daemon is running on this host,
// its terminal-port + terminal-internal-token + terminal-agent-pid live
// where this test writes. Save + restore real-daemon file contents around
// the whole suite so the test never clobbers a developer's running session.
let realPortBackup: string | null = null;
let realTokenBackup: string | null = null;
let realAgentRecordBackup: string | null = null;
beforeAll(() => {
realPortBackup = readIfExists(PORT_FILE);
realTokenBackup = readIfExists(TOKEN_FILE);
realAgentRecordBackup = readIfExists(AGENT_RECORD_FILE);
});
afterAll(() => {
if (realPortBackup !== null) {
fs.mkdirSync(stateDir, { recursive: true });
fs.writeFileSync(PORT_FILE, realPortBackup);
} else {
try { fs.unlinkSync(PORT_FILE); } catch {}
}
if (realTokenBackup !== null) {
fs.mkdirSync(stateDir, { recursive: true });
fs.writeFileSync(TOKEN_FILE, realTokenBackup);
} else {
try { fs.unlinkSync(TOKEN_FILE); } catch {}
}
if (realAgentRecordBackup !== null) {
fs.mkdirSync(stateDir, { recursive: true });
fs.writeFileSync(AGENT_RECORD_FILE, realAgentRecordBackup);
} else {
try { fs.unlinkSync(AGENT_RECORD_FILE); } catch {}
}
});
beforeEach(() => {
__resetRegistry();
__resetShuttingDown();
// Clean any leftover sentinels from a prior failed run so the "preserved"
// assertion can't pass spuriously off a stale file.
try { fs.unlinkSync(PORT_FILE); } catch {}
try { fs.unlinkSync(TOKEN_FILE); } catch {}
try { fs.unlinkSync(AGENT_RECORD_FILE); } catch {}
});
test('1. ownsTerminalAgent:false preserves all three files and sends no signal', async () => {
writeSentinels();
const handle = buildFetchHandler(makeMinimalConfig({ ownsTerminalAgent: false }));
const calls = await withStubs(async () => {
await runShutdown(handle);
});
expect(readIfExists(PORT_FILE)).toBe(SENTINEL_PORT);
expect(readIfExists(TOKEN_FILE)).toBe(SENTINEL_TOKEN);
expect(readIfExists(AGENT_RECORD_FILE)).not.toBeNull();
expect(terminationCalls(calls).length).toBe(0);
});
test('2. ownsTerminalAgent:true deletes all three files; identity-based kill probes the recorded PID', async () => {
writeSentinels();
const handle = buildFetchHandler(makeMinimalConfig({ ownsTerminalAgent: true }));
const calls = await withStubs(async () => {
await runShutdown(handle);
});
expect(readIfExists(PORT_FILE)).toBeNull();
expect(readIfExists(TOKEN_FILE)).toBeNull();
expect(readIfExists(AGENT_RECORD_FILE)).toBeNull();
// isProcessAlive sends signal 0; PID is the sentinel-dead PID, so the
// probe returns false and no SIGTERM is sent.
const probes = calls.filter(([pid, sig]) => pid === SENTINEL_DEAD_PID && sig === 0);
expect(probes.length).toBeGreaterThan(0);
expect(terminationCalls(calls).length).toBe(0);
});
test('3. ownsTerminalAgent unset defaults to true (deletes all three; probes recorded PID)', async () => {
writeSentinels();
// Note: no ownsTerminalAgent in the overrides — uses the `?? true` default.
const handle = buildFetchHandler(makeMinimalConfig());
const calls = await withStubs(async () => {
await runShutdown(handle);
});
expect(readIfExists(PORT_FILE)).toBeNull();
expect(readIfExists(TOKEN_FILE)).toBeNull();
expect(readIfExists(AGENT_RECORD_FILE)).toBeNull();
const probes = calls.filter(([pid, sig]) => pid === SENTINEL_DEAD_PID && sig === 0);
expect(probes.length).toBeGreaterThan(0);
});
test('4. CLI start() call site passes ownsTerminalAgent: true literally (static grep)', () => {
// Resolves browse/src/server.ts relative to this test file so the test
// works regardless of cwd. import.meta.url is the test file's URL.
const serverTsPath = path.resolve(
new URL(import.meta.url).pathname,
'..',
'..',
'src',
'server.ts',
);
const source = fs.readFileSync(serverTsPath, 'utf-8');
// Match the call site inside start()'s buildFetchHandler({...}) literal.
// The pattern looks for the trailing comma and trailing context so the
// match cannot be satisfied by the JSDoc reference earlier in the file.
expect(source).toMatch(/ownsTerminalAgent:\s*true,\s*\/\/\s*CLI spawns terminal-agent\.ts/);
});
});

View File

@ -1,8 +1,7 @@
import { describe, test, expect, beforeEach, mock } from 'bun:test';
import { describe, test, expect, beforeEach } from 'bun:test';
import {
resolveConfigFromEnv,
buildFetchHandler,
__testInternals__,
type ServerConfig,
type ServerHandle,
type Surface,
@ -12,8 +11,6 @@ import { __resetRegistry, initRegistry } from '../src/token-registry';
import { BrowserManager } from '../src/browser-manager';
import { resolveConfig } from '../src/config';
import * as crypto from 'crypto';
import * as fs from 'node:fs';
import * as path from 'node:path';
/**
* Tests for the factory-export API surface added so gbrowser (phoenix) can
@ -384,141 +381,3 @@ describe('buildFetchHandler factory contract', () => {
expect(() => initRegistry('second-token-pad-to-16-chars')).toThrow(/already initialized/i);
});
});
// ─── Idle timer + onDisconnect dual-instance fix (v1.42.3.0) ──────────
//
// Before this fix, module-level handlers (idleCheckTick, parent watchdog,
// SIGTERM, onDisconnect default wire) all read the module-level
// BrowserManager directly. For embedders (gbrowser) that pass their own
// BrowserManager into buildFetchHandler, the module-level instance never
// has launchHeaded() called on it — so connectionMode stays 'launched'
// forever and headed mode never short-circuits idle-shutdown. Result:
// 30-min auto-shutdown of overlay sessions.
//
// Fix: introduce `let activeBrowserManager` indirection (symmetric with
// the existing `let activeShutdown` pattern). buildFetchHandler retargets
// it at cfg.browserManager AND chains cfg.browserManager.onDisconnect to
// activeShutdown (without clobbering any caller-provided handler).
function makeMockBrowserManager(mode: 'launched' | 'headed') {
return {
getConnectionMode: () => mode,
isWatching: () => false,
stopWatch: () => {},
close: async () => {},
onDisconnect: null as ((code?: number) => void | Promise<void>) | null,
};
}
describe('idle timer + onDisconnect dual-instance fix', () => {
beforeEach(() => {
__resetRegistry();
// Reset module state every test. Bun memoizes the server.ts module
// import for the whole test process, so `lastActivity`, `tunnelActive`,
// `activeShutdown`, `activeBrowserManager`, and `isShuttingDown` leak
// between tests. We reset what we touch here; the rest is fresh
// because each test calls buildFetchHandler with a new mock instance.
__testInternals__.setTunnelActive(false);
__testInternals__.setLastActivity(Date.now());
__testInternals__.resetShutdownState();
});
test('CRITICAL — REGRESSION: headed embedder does not auto-shutdown at idle', () => {
const exitMock = mock((_code?: number) => { throw new Error('process.exit called'); });
const originalExit = process.exit;
(process as any).exit = exitMock;
try {
const mockBM = makeMockBrowserManager('headed');
buildFetchHandler(makeMinimalConfig({ browserManager: mockBM as any }));
// Drive lastActivity past the idle threshold via the test seam instead
// of mutating Date.now — the leaked module-level setInterval would
// see fake-time and could fire shutdown if the timing aligned.
__testInternals__.setLastActivity(Date.now() - (31 * 60 * 1000));
__testInternals__.idleCheckTick();
expect(exitMock).not.toHaveBeenCalled();
} finally {
(process as any).exit = originalExit;
}
});
test('headless still auto-shuts down at idle (paired defensive)', async () => {
// Non-throwing mock: idleCheckTick fires shutdown as a fire-and-forget
// async call. Throwing from process.exit becomes an unhandled rejection
// that the test runner catches. Recording the call is enough.
const exitMock = mock((_code?: number) => {});
const originalExit = process.exit;
(process as any).exit = exitMock;
try {
const mockBM = makeMockBrowserManager('launched');
buildFetchHandler(makeMinimalConfig({ browserManager: mockBM as any }));
__testInternals__.setLastActivity(Date.now() - (31 * 60 * 1000));
__testInternals__.idleCheckTick();
// Drain microtasks: shutdown awaits flushBuffers + cfgBrowserManager.close
// before reaching process.exit.
await Promise.resolve();
await Promise.resolve();
await new Promise<void>(r => setImmediate(r));
await new Promise<void>(r => setImmediate(r));
expect(exitMock).toHaveBeenCalled();
} finally {
(process as any).exit = originalExit;
}
});
test('buildFetchHandler chains cfgBrowserManager.onDisconnect, preserving caller-set handler', async () => {
const mockBM = makeMockBrowserManager('headed');
const callerCb = mock(async (_code?: number) => {});
mockBM.onDisconnect = callerCb;
buildFetchHandler(makeMinimalConfig({ browserManager: mockBM as any }));
// gstack should have wrapped the caller-installed handler instead of
// clobbering it (Codex finding: BrowserManager.onDisconnect is a public
// field; gbrowser may set it before calling buildFetchHandler).
expect(typeof mockBM.onDisconnect).toBe('function');
expect(mockBM.onDisconnect).not.toBe(callerCb);
// Verify the chain: invoking the wrapped handler runs the caller
// callback AND reaches activeShutdown (which calls process.exit at the
// very end of its async path). Stubbing process.exit to throw aborts
// the chain before isShuttingDown can leak into later tests.
const exitMock = mock((_code?: number) => { throw new Error('process.exit called'); });
const originalExit = process.exit;
(process as any).exit = exitMock;
try {
await expect((mockBM.onDisconnect as any)(0)).rejects.toThrow('process.exit called');
expect(callerCb).toHaveBeenCalledWith(0);
expect(exitMock).toHaveBeenCalledWith(0);
} finally {
(process as any).exit = originalExit;
}
});
test('tunnelActive blocks idle-shutdown even in headless mode', () => {
const exitMock = mock((_code?: number) => { throw new Error('process.exit called'); });
const originalExit = process.exit;
(process as any).exit = exitMock;
try {
const mockBM = makeMockBrowserManager('launched');
buildFetchHandler(makeMinimalConfig({ browserManager: mockBM as any }));
__testInternals__.setTunnelActive(true);
__testInternals__.setLastActivity(Date.now() - (31 * 60 * 1000));
__testInternals__.idleCheckTick();
expect(exitMock).not.toHaveBeenCalled();
} finally {
(process as any).exit = originalExit;
}
});
test('lifecycle handlers (idleCheckTick + parent watchdog + SIGTERM) read activeBrowserManager, not module-level browserManager', () => {
// Static guard against a future refactor reintroducing a stale read.
// The 3 lifecycle sites this plan fixed all call getConnectionMode via
// the indirection. Other module-level browserManager reads inside
// handleCommandInternalImpl (informational mode reporting in response
// payloads) are out of scope and intentionally untouched.
const src = fs.readFileSync(path.join(__dirname, '..', 'src', 'server.ts'), 'utf-8');
const factoryStart = src.indexOf('export function buildFetchHandler');
expect(factoryStart).toBeGreaterThan(0);
const moduleLevel = src.slice(0, factoryStart);
const activeCount = (moduleLevel.match(/activeBrowserManager\.getConnectionMode\(\)/g) || []).length;
// Edit 2 (idleCheckTick), Edit 3 (parent watchdog), Edit 6 (SIGTERM).
expect(activeCount).toBe(3);
});
});

View File

@ -1,94 +0,0 @@
import { describe, test, expect } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
// Server-side route shape for the v1.44 lease + restart + dispose +
// lease-refresh wiring. Live route exercises require the terminal-agent
// loopback to be live (e2e-tier); these static-grep tripwires pin the
// load-bearing protocol invariants.
const SERVER_TS = path.resolve(new URL(import.meta.url).pathname, '..', '..', 'src', 'server.ts');
describe('server: PTY lease routes (v1.44+ Commit 2)', () => {
test('1. /pty-session returns the 4-tuple shape (sessionId, attachToken, leaseExpiresAt)', () => {
const src = fs.readFileSync(SERVER_TS, 'utf-8');
const block = sliceBetween(src, "url.pathname === '/pty-session' &&", "url.pathname === '/pty-session/reattach'");
expect(block).toContain('mintLease()');
expect(block).toContain('grantPtyToken(minted.token, lease.sessionId)');
expect(block).toContain('sessionId: lease.sessionId');
expect(block).toContain('attachToken: minted.token');
expect(block).toContain('leaseExpiresAt: lease.expiresAt');
// Backward compat: legacy ptySessionToken alias preserved for one release.
expect(block).toContain('ptySessionToken: minted.token');
});
test('2. /pty-session/reattach validates lease + mints fresh attachToken', () => {
const src = fs.readFileSync(SERVER_TS, 'utf-8');
const block = sliceBetween(src, "url.pathname === '/pty-session/reattach'", "url.pathname === '/pty-restart'");
// Validate-first: rejects unknown/expired sessionId with 410 Gone so
// the client knows to fall back to a fresh /pty-session.
expect(block).toContain('validateLease(sessionId)');
expect(block).toContain('status: 410');
// Mint fresh token bound to SAME sessionId.
expect(block).toContain('grantPtyToken(minted.token, sessionId!)');
});
test('3. /pty-restart is one transaction — dispose + revoke + fresh mint', () => {
const src = fs.readFileSync(SERVER_TS, 'utf-8');
const block = sliceBetween(src, "url.pathname === '/pty-restart'", "url.pathname === '/pty-dispose'");
// Disposes old session (best-effort — missing sessionId is non-fatal).
expect(block).toContain('restartPtySession(oldSessionId)');
expect(block).toContain('revokeLease(oldSessionId)');
// Then mints fresh sessionId + lease + attachToken in the same handler.
expect(block).toContain('mintLease()');
expect(block).toContain('grantPtyToken(minted.token, lease.sessionId)');
// Returns the same 4-tuple shape so the client doesn't need a
// separate /pty-session round-trip.
expect(block).toContain('attachToken: minted.token');
expect(block).toContain('leaseExpiresAt: lease.expiresAt');
});
test('4. /pty-dispose accepts body-token (sendBeacon-compatible)', () => {
const src = fs.readFileSync(SERVER_TS, 'utf-8');
const block = sliceBetween(src, "url.pathname === '/pty-dispose'", "url.pathname === '/internal/lease-refresh'");
// sendBeacon can't set custom headers, so the route MUST accept the
// auth token in the request body. Otherwise pagehide cleanup fails
// silently every time the user closes the browser.
expect(block).toContain('body?.authToken');
expect(block).toContain('authedByBody');
// Both auth paths must validate against authToken — never just trust
// a body-supplied token without the equality check.
expect(block).toContain('authTokenFromBody === authToken');
});
test('5. /internal/lease-refresh resets the daemon idle timer (T6)', () => {
const src = fs.readFileSync(SERVER_TS, 'utf-8');
const block = sliceBetween(src, "url.pathname === '/internal/lease-refresh'", '─── /pty-inject-scan');
expect(block).toContain('refreshLease(sessionId)');
expect(block).toContain('resetIdleTimer()');
// Refresh failure (unknown / expired) MUST 410, not 200, so the
// agent knows to close the WS and force a clean re-auth.
expect(block).toContain('status: 410');
});
test('6. grantPtyToken loopback carries sessionId binding', () => {
const src = fs.readFileSync(SERVER_TS, 'utf-8');
expect(src).toMatch(/grantPtyToken\(token: string, sessionId\?: string\)/);
expect(src).toContain('sessionId ? { token, sessionId } : { token }');
});
test('7. restartPtySession helper exists and POSTs the agent /internal/restart', () => {
const src = fs.readFileSync(SERVER_TS, 'utf-8');
expect(src).toMatch(/async function restartPtySession\(sessionId: string\)/);
expect(src).toContain('/internal/restart');
expect(src).toContain('JSON.stringify({ sessionId })');
});
});
function sliceBetween(source: string, start: string, end: string): string {
const i = source.indexOf(start);
if (i === -1) throw new Error(`marker not found: ${start}`);
const j = source.indexOf(end, i + start.length);
if (j === -1) throw new Error(`end marker not found: ${end}`);
return source.slice(i, j);
}

View File

@ -113,45 +113,17 @@ describe('sanitizeLoneSurrogates — wiring invariants', () => {
expect(SERVER_SRC).toContain('result: sanitizeLoneSurrogates(cr.result)');
});
test('SSE activity feed routes outbound frames through createSseEndpoint', () => {
// v1.51 refactor: /activity/stream no longer inlines its own
// ReadableStream/sanitizer wiring; it routes through createSseEndpoint
// which applies sanitizeReplacer to every JSON.stringify. The grep
// pins both halves of the contract: the endpoint uses the helper,
// and the helper does the sanitization.
const activityBlock = SERVER_SRC.match(
/if \(url\.pathname === '\/activity\/stream'\)[\s\S]*?createSseEndpoint\(/,
);
expect(activityBlock).not.toBeNull();
test('SSE activity feed sanitizes outbound frames via sanitizeReplacer', () => {
// Replacer must run DURING stringify; post-stringify regex is ineffective
// because JSON.stringify converts \uD800 → "\\ud800" before our regex sees it.
expect(SERVER_SRC).toContain('JSON.stringify(entry, sanitizeReplacer)');
});
test('SSE inspector stream routes outbound frames through createSseEndpoint', () => {
// Same v1.51 refactor invariant for /inspector/events.
const inspectorBlock = SERVER_SRC.match(
/if \(url\.pathname === '\/inspector\/events'[\s\S]*?createSseEndpoint\(/,
);
expect(inspectorBlock).not.toBeNull();
test('SSE inspector stream sanitizes outbound frames via sanitizeReplacer', () => {
expect(SERVER_SRC).toContain('JSON.stringify(event, sanitizeReplacer)');
});
test('createSseEndpoint applies sanitizeReplacer to every JSON.stringify', () => {
// The helper is the single source of truth for SSE sanitization now.
// If a future refactor moves stringify off the replacer (e.g. someone
// adds a fast-path encode), this test fails and the surrogate-escape
// class regresses across every SSE endpoint at once.
const helperPath = path.resolve(import.meta.dir, '..', 'src', 'sse-helpers.ts');
const helperSrc = fs.readFileSync(helperPath, 'utf-8');
expect(helperSrc).toContain('JSON.stringify(');
expect(helperSrc).toContain('sanitizeReplacer');
// The sanitizer itself uses stripLoneSurrogates (the shared utility in
// sanitize.ts) — not a private copy. Re-confirms the helper is wired
// to the canonical sanitizer, not a drift'd duplicate.
expect(helperSrc).toContain("import { stripLoneSurrogates } from './sanitize'");
});
test('sanitizeReplacer is a function defined in server.ts (for non-SSE egress)', () => {
// server.ts keeps its own sanitizeReplacer for the non-SSE JSON egress
// paths (handleCommandInternal etc.). The SSE path uses sse-helpers.ts's
// own sanitizeReplacer; both must exist independently.
test('sanitizeReplacer is a function defined in server.ts', () => {
expect(SERVER_SRC).toContain('function sanitizeReplacer(');
});
});

View File

@ -1589,17 +1589,19 @@ describe('tool calls collapse into reasoning disclosure', () => {
});
// ─── Idle timeout disabled in headed mode (server.ts) ───────────
//
// The original 'idle check skips in headed mode' string-grep test was deleted
// in v1.42.3.0 — it would have passed even with the dual-instance bug present
// because it only grepped for "=== 'headed'" + 'return' in the same window.
// Behavioral coverage lives in browse/test/server-factory.test.ts under the
// 'idle timer + onDisconnect dual-instance fix' describe block, which
// exercises the headed/headless/tunnel branches of idleCheckTick directly.
describe('idle timeout behavior (server.ts)', () => {
const serverSrc = fs.readFileSync(path.join(ROOT, 'src', 'server.ts'), 'utf-8');
test('idle check skips in headed mode', () => {
const idleCheck = serverSrc.slice(
serverSrc.indexOf('idleCheckInterval'),
serverSrc.indexOf('idleCheckInterval') + 300,
);
expect(idleCheck).toContain("=== 'headed'");
expect(idleCheck).toContain('return');
});
test('sidebar-command resets idle timer', () => {
const sidebarCmd = serverSrc.slice(
serverSrc.indexOf("url.pathname === '/sidebar-command'"),

Some files were not shown because too many files have changed in this diff Show More