mirror of https://github.com/garrytan/gstack.git
Compare commits
1 Commits
133c6cbd98
...
942e049514
| Author | SHA1 | Date |
|---|---|---|
|
|
942e049514 |
|
|
@ -51,15 +51,6 @@ jobs:
|
|||
if: matrix.os == 'ubicloud-standard-8'
|
||||
run: sudo apt-get update && sudo apt-get install -y poppler-utils
|
||||
|
||||
# Install a color-emoji font BEFORE Chromium launches so the emoji render
|
||||
# gate has a fallback font. macOS ships Apple Color Emoji already.
|
||||
- name: Install color-emoji font (Ubuntu)
|
||||
if: matrix.os == 'ubicloud-standard-8'
|
||||
run: |
|
||||
sudo apt-get install -y fonts-noto-color-emoji
|
||||
fc-cache -f || true
|
||||
fc-match -f '%{family[0]}\t%{color}\n' ':lang=und-zsye:charset=1F600' || true
|
||||
|
||||
- name: Install Playwright Chromium
|
||||
run: bunx playwright install chromium
|
||||
|
||||
|
|
@ -83,7 +74,7 @@ jobs:
|
|||
- name: Run make-pdf unit tests
|
||||
run: bun test make-pdf/test/*.test.ts
|
||||
|
||||
- name: Run E2E gates (combined-features copy-paste + emoji render)
|
||||
- name: Run combined-features copy-paste gate (P0)
|
||||
env:
|
||||
BROWSE_BIN: ${{ github.workspace }}/browse/dist/browse
|
||||
run: bun test make-pdf/test/e2e/
|
||||
run: bun test make-pdf/test/e2e/combined-gate.test.ts
|
||||
|
|
|
|||
|
|
@ -116,7 +116,6 @@ jobs:
|
|||
test/setup-windows-fallback.test.ts \
|
||||
test/build-script-shell-compat.test.ts \
|
||||
test/docs-config-keys.test.ts \
|
||||
test/brain-sync-windows-paths.test.ts \
|
||||
make-pdf/test/browseClient.test.ts \
|
||||
make-pdf/test/pdftotext.test.ts
|
||||
shell: bash
|
||||
|
|
|
|||
|
|
@ -1,96 +0,0 @@
|
|||
name: Windows Setup E2E
|
||||
|
||||
# End-to-end fresh-install gate for Windows. Runs `./setup` on a clean
|
||||
# windows-latest checkout and asserts the build completes, binaries
|
||||
# resolve via find-browse, and the gstack-paths state root resolves
|
||||
# cleanly. Catches Bun shell-parser regressions in package.json's build
|
||||
# chain (#1538, #1537, #1530, #1457, #1561) before they reach users.
|
||||
#
|
||||
# Separate from windows-free-tests.yml because that one runs a curated
|
||||
# unit-test subset; this one exercises the install path itself.
|
||||
#
|
||||
# Runner: GitHub-hosted free windows-latest. ~3-5 min total.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'package.json'
|
||||
- 'scripts/build.sh'
|
||||
- 'scripts/write-version-files.sh'
|
||||
- 'setup'
|
||||
- 'browse/src/cli.ts'
|
||||
- 'browse/src/find-browse.ts'
|
||||
- 'bin/gstack-paths'
|
||||
- '.github/workflows/windows-setup-e2e.yml'
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: windows-setup-e2e-${{ github.head_ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
windows-setup:
|
||||
runs-on: windows-latest
|
||||
timeout-minutes: 15
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: oven-sh/setup-bun@v1
|
||||
with:
|
||||
bun-version: latest
|
||||
|
||||
- name: Configure git identity
|
||||
run: |
|
||||
git config --global user.email "windows-setup-e2e@gstack.test"
|
||||
git config --global user.name "Windows Setup E2E"
|
||||
git config --global init.defaultBranch main
|
||||
shell: bash
|
||||
|
||||
- name: Install dependencies
|
||||
run: bun install --frozen-lockfile
|
||||
shell: bash
|
||||
|
||||
- name: Run bun run build (the previously-broken path)
|
||||
# This is the regression gate. Bun's Windows shell parser rejected
|
||||
# multiple constructs the old inline build chain used; the wave
|
||||
# moved the build to scripts/build.sh. If this step fails on
|
||||
# Windows, the build chain regressed.
|
||||
run: bun run build
|
||||
shell: bash
|
||||
env:
|
||||
GSTACK_SKIP_PLAYWRIGHT: '1'
|
||||
|
||||
- name: Verify binaries exist (with .exe extension on Windows)
|
||||
run: |
|
||||
set -e
|
||||
test -f browse/dist/browse.exe || test -f browse/dist/browse || (echo "MISSING: browse" && exit 1)
|
||||
test -f browse/dist/find-browse.exe || test -f browse/dist/find-browse || (echo "MISSING: find-browse" && exit 1)
|
||||
test -f design/dist/design.exe || test -f design/dist/design || (echo "MISSING: design" && exit 1)
|
||||
test -f bin/gstack-global-discover.exe || test -f bin/gstack-global-discover || (echo "MISSING: gstack-global-discover" && exit 1)
|
||||
echo "All binaries present"
|
||||
shell: bash
|
||||
|
||||
- name: Verify find-browse resolves to the .exe variant
|
||||
run: |
|
||||
set -e
|
||||
OUT=$(bun browse/src/find-browse.ts 2>&1) || true
|
||||
echo "find-browse output: $OUT"
|
||||
# On Windows, find-browse should successfully resolve to a binary,
|
||||
# whether or not it has the .exe extension on disk. Empty output
|
||||
# or "not found" means the .exe extension resolver regressed.
|
||||
echo "$OUT" | grep -qE '(browse\.exe|browse)$' || (echo "find-browse failed to resolve binary on Windows" && exit 1)
|
||||
shell: bash
|
||||
|
||||
- name: Verify gstack-paths state root resolves
|
||||
run: |
|
||||
set -e
|
||||
eval "$(bash bin/gstack-paths)"
|
||||
test -n "$GSTACK_STATE_ROOT" || (echo "GSTACK_STATE_ROOT empty" && exit 1)
|
||||
test -n "$PLAN_ROOT" || (echo "PLAN_ROOT empty" && exit 1)
|
||||
test -n "$TMP_ROOT" || (echo "TMP_ROOT empty" && exit 1)
|
||||
echo "GSTACK_STATE_ROOT=$GSTACK_STATE_ROOT"
|
||||
echo "PLAN_ROOT=$PLAN_ROOT"
|
||||
echo "TMP_ROOT=$TMP_ROOT"
|
||||
shell: bash
|
||||
|
|
@ -4,7 +4,7 @@ dist/
|
|||
browse/dist/
|
||||
design/dist/
|
||||
make-pdf/dist/
|
||||
bin/gstack-global-discover*
|
||||
bin/gstack-global-discover
|
||||
.gstack/
|
||||
.claude/skills/
|
||||
.claude/scheduled_tasks.lock
|
||||
|
|
|
|||
20
AGENTS.md
20
AGENTS.md
|
|
@ -21,7 +21,6 @@ Invoke them by name (e.g., `/office-hours`).
|
|||
| `/plan-tune` | Self-tune AskUserQuestion sensitivity per question. |
|
||||
| `/autoplan` | One command runs CEO → design → eng → DX review. |
|
||||
| `/design-consultation` | Build a complete design system from scratch. |
|
||||
| `/spec` | Turn vague intent into a precise, executable spec in five phases. Files a GitHub issue, optionally spawns a Claude Code agent in a fresh worktree, and lets `/ship` close the source issue on merge. |
|
||||
|
||||
### Implementation + review
|
||||
|
||||
|
|
@ -76,25 +75,6 @@ Invoke them by name (e.g., `/office-hours`).
|
|||
| `/setup-browser-cookies` | Import cookies from your real browser for authenticated testing. |
|
||||
| `/pair-agent` | Pair a remote AI agent (OpenClaw, Codex, etc.) with your browser. |
|
||||
|
||||
### iOS QA — drive real iPhones over USB or Tailscale (v1.43.0.0+)
|
||||
|
||||
| Skill | What it does |
|
||||
|-------|-------------|
|
||||
| `/ios-qa` | Live-device iOS QA via USB CoreDevice tunnel + embedded StateServer. Optionally exposes the device over Tailscale so remote agents can drive it. |
|
||||
| `/ios-fix` | Autonomous iOS bug fixer with regression snapshot capture. |
|
||||
| `/ios-design-review` | Designer's-eye QA on a real iPhone — 10-dimension Apple HIG rubric. |
|
||||
| `/ios-clean` | Convenience: strip DebugBridge + #if DEBUG wiring before a Release build. |
|
||||
| `/ios-sync` | Regenerate the iOS debug bridge against the latest upstream templates. |
|
||||
|
||||
Companion CLIs (run on the Mac that's plugged into the device):
|
||||
|
||||
| Command | What it does |
|
||||
|---------|-------------|
|
||||
| `gstack-ios-qa-daemon` | Mac-side broker. Loopback by default; `--tailnet` adds a Tailscale-facing listener with capability tiers and audit logging. |
|
||||
| `gstack-ios-qa-mint` | Owner-grant CLI for the tailnet allowlist (`grant`/`revoke`/`list`). |
|
||||
|
||||
End-to-end walkthrough: [docs/howto-ios-testing-with-gstack.md](docs/howto-ios-testing-with-gstack.md).
|
||||
|
||||
### Safety + scoping
|
||||
|
||||
| Skill | What it does |
|
||||
|
|
|
|||
|
|
@ -317,7 +317,6 @@ from `snapshot`, or `@c` refs from `snapshot -C`. Full table:
|
|||
| `disconnect` | Close headed Chrome, return to headless |
|
||||
| `focus [@ref]` | Bring headed Chrome to foreground (macOS); `@ref` also scrolls into view |
|
||||
| `state save\|load <name>` | Save or load browser state (cookies + URLs) |
|
||||
| `memory [--json]` | Snapshot Bun heap + per-tab JS heap + Chromium process tree + bounded buffer sizes. Use `--json` for programmatic consumers; text mode renders sorted top-10 tabs with "and N more" tail. |
|
||||
|
||||
### Handoff
|
||||
|
||||
|
|
|
|||
1267
CHANGELOG.md
1267
CHANGELOG.md
File diff suppressed because it is too large
Load Diff
110
CLAUDE.md
110
CLAUDE.md
|
|
@ -27,16 +27,25 @@ bun run slop:diff # slop findings in files changed on this branch only
|
|||
`test:evals` requires `ANTHROPIC_API_KEY`. Codex E2E tests (`test/codex-e2e.test.ts`)
|
||||
use Codex's own auth from `~/.codex/` config — no `OPENAI_API_KEY` env var needed.
|
||||
|
||||
**Env keys in Conductor workspaces.** The `GSTACK_*` env-shim (v1.39.2.0+,
|
||||
`lib/conductor-env-shim.ts`) promotes `GSTACK_ANTHROPIC_API_KEY` /
|
||||
`GSTACK_OPENAI_API_KEY` to their canonical names inside gstack's TS binaries.
|
||||
Tests run through gstack entrypoints inherit this promotion automatically.
|
||||
Don't echo the key value to stdout, logs, or shell history. When passing to a
|
||||
test's Agent SDK, do NOT pass `env: {...}` to `runAgentSdkTest` — the SDK's
|
||||
auth pipeline doesn't pick up the key the same way when env is supplied as an
|
||||
object (confirmed failure mode). Mutate `process.env.ANTHROPIC_API_KEY`
|
||||
ambiently before the call and restore in `finally`.
|
||||
**Where the keys live on this machine.** Conductor workspaces don't inherit the
|
||||
user's interactive shell env, so `ANTHROPIC_API_KEY` and `OPENAI_API_KEY` aren't
|
||||
in the default process env. Before running any paid eval / E2E, source them from
|
||||
`~/.zshrc` (that's where Garry keeps them):
|
||||
|
||||
```bash
|
||||
bash -c '
|
||||
eval "$(grep -E "^export (ANTHROPIC_API_KEY|OPENAI_API_KEY)=" ~/.zshrc)"
|
||||
export ANTHROPIC_API_KEY OPENAI_API_KEY
|
||||
EVALS=1 EVALS_TIER=periodic bun test test/skill-e2e-<whatever>.test.ts
|
||||
'
|
||||
```
|
||||
|
||||
Do not echo the key value anywhere (stdout, logs, shell history). The grep+eval
|
||||
pattern keeps it in process env only. When passing to a test's Agent SDK, do NOT
|
||||
pass `env: {...}` to `runAgentSdkTest` — the SDK's auth pipeline doesn't pick up
|
||||
the key the same way when env is supplied as an object (confirmed failure mode).
|
||||
Instead, mutate `process.env.ANTHROPIC_API_KEY` ambiently before the call and
|
||||
restore in `finally`.
|
||||
E2E tests stream progress in real-time (tool-by-tool via `--output-format stream-json
|
||||
--verbose`). Results are persisted to `~/.gstack-dev/evals/` with auto-comparison
|
||||
against the previous run.
|
||||
|
|
@ -111,7 +120,6 @@ gstack/
|
|||
├── land-and-deploy/ # /land-and-deploy skill (merge → deploy → canary verify)
|
||||
├── office-hours/ # /office-hours skill (YC Office Hours — startup diagnostic + builder brainstorm)
|
||||
├── investigate/ # /investigate skill (systematic root-cause debugging)
|
||||
├── spec/ # /spec skill (five-phase spec → GitHub issue, optional agent spawn, /ship auto-closes)
|
||||
├── retro/ # Retrospective skill (includes /retro global cross-project mode)
|
||||
├── bin/ # CLI utilities (gstack-repo-mode, gstack-slug, gstack-config, etc.)
|
||||
├── document-release/ # /document-release skill (post-ship doc updates + Diataxis coverage map)
|
||||
|
|
@ -228,24 +236,6 @@ Activity / Refs / Inspector as debug overlays behind the footer's
|
|||
flow, dual-token model, and threat-model boundary — silent failures
|
||||
here usually trace to not understanding the cross-component flow.
|
||||
|
||||
**Embedder terminal-agent ownership** (v1.42.1.0+, identity-based kill v1.44.0.0+).
|
||||
`buildFetchHandler` in `browse/src/server.ts` accepts `ServerConfig.ownsTerminalAgent?:
|
||||
boolean` (default `true`). When `true`, factory shutdown runs the full teardown:
|
||||
identity-based kill via `killAgentByRecord(readAgentRecord(stateDir))` from
|
||||
`browse/src/terminal-agent-control.ts` plus `safeUnlinkQuiet` on
|
||||
`<stateDir>/terminal-port`, `<stateDir>/terminal-internal-token`, and
|
||||
`<stateDir>/terminal-agent-pid` (the per-boot agent record introduced in v1.44).
|
||||
Embedders (e.g. the gbrowser phoenix overlay) that pre-launch their own PTY
|
||||
server must pass `false` so their discovery files survive gstack teardown cycles.
|
||||
The flag is the third caller-owned teardown gate in `ServerConfig` (alongside
|
||||
`xvfb?` and `proxyBridge?`); polarity is inverted (explicit bool vs presence) and
|
||||
documented in the field's JSDoc. CLI `start()` always passes `true` explicitly —
|
||||
the static-grep test in `browse/test/server-embedder-terminal-port.test.ts` fails
|
||||
CI if a refactor drops it. Pre-v1.44 used `pkill -f terminal-agent\.ts` (regex
|
||||
match) which would kill sibling gstack sessions on the same host; the new
|
||||
`browse/test/terminal-agent-pid-identity.test.ts` static-grep tripwire fails CI
|
||||
if any source file re-introduces `pkill ... terminal-agent` or `spawnSync('pkill', ...)`.
|
||||
|
||||
**WebSocket auth uses Sec-WebSocket-Protocol, not cookies.** Browsers
|
||||
can't set `Authorization` on a WebSocket upgrade, but they CAN set
|
||||
`Sec-WebSocket-Protocol` via `new WebSocket(url, [token])`. The agent
|
||||
|
|
@ -294,26 +284,6 @@ response in `server.ts`, read
|
|||
`browse/test/server-sanitize-surrogates.test.ts` pins the wiring with invariant
|
||||
tests, so bypasses fail CI.
|
||||
|
||||
**SSE endpoint helper** (v1.51.0.0+). New SSE endpoints in `server.ts` MUST route
|
||||
through `createSseEndpoint(req, config)` from `browse/src/sse-helpers.ts`. The
|
||||
helper owns the cleanup contract (abort + enqueue-throw + heartbeat-throw, all
|
||||
idempotent) and bakes in `sanitizeLoneSurrogates` on every JSON.stringify, so
|
||||
new subscribers can't accidentally regress either invariant. Inline
|
||||
`ReadableStream` wiring leaked subscribers when the TCP connection died without
|
||||
firing `req.signal.abort` (Chromium MV3 service-worker suspend, intermediate
|
||||
proxy half-close). `/activity/stream`, `/inspector/events`, and `/memory`
|
||||
(SSE-eligible) all route through it. `browse/test/sse-helpers.test.ts` pins the
|
||||
cleanup contract.
|
||||
|
||||
**CDP session lifecycle** (v1.51.0.0+). Direct `page.context().newCDPSession(page)`
|
||||
calls outside `browse/src/cdp-bridge.ts` fail CI via the static-grep tripwire in
|
||||
`browse/test/cdp-session-cleanup.test.ts`. Use `withCdpSession(page, async (s) => {...})`
|
||||
for one-shot CDP work (try/finally detach) or `getOrCreateCdpSession(page, cache)`
|
||||
for cached sessions tied to a page's lifetime (close-detach via `Map<page, session>`).
|
||||
Three sites migrated: cdp-bridge frame events, write-commands archive capture,
|
||||
cdp-inspector. The helpers prevent the per-session leak class where successful-path
|
||||
detach happened but error-path detach was missed.
|
||||
|
||||
**Setup symlink hardening** (v1.38.0.0+). Every link site in `setup` MUST route
|
||||
through the `_link_or_copy SRC DST` helper near the `IS_WINDOWS` detection. On
|
||||
Windows without Developer Mode, plain `ln -snf` produces frozen file copies that
|
||||
|
|
@ -418,44 +388,6 @@ because they're tracked despite `.gitignore` — ignore them. When staging files
|
|||
always use specific filenames (`git add file1 file2`) — never `git add .` or
|
||||
`git add -A`, which will accidentally include the binaries.
|
||||
|
||||
## Redaction guard (PII / secrets / legal content)
|
||||
|
||||
Shared redaction engine catches credentials, PII, and legal/damaging content
|
||||
before it reaches an external sink (codex dispatch, GitHub issue/PR body, pushed
|
||||
commit). It is a **guardrail, not airtight enforcement** — `git push --no-verify`,
|
||||
direct `gh issue create`, and `GSTACK_REDACT_PREPUSH=skip` all bypass it. It
|
||||
catches accidents and carelessness, the 99% case. Do not claim it stops a
|
||||
determined leaker (a CHANGELOG line that does would fail a hostile screenshotter).
|
||||
|
||||
- **Engine + taxonomy:** `lib/redact-patterns.ts` (the single source of truth —
|
||||
3 tiers; HIGH = genuinely-secret credentials that block, MEDIUM = PII/legal/
|
||||
internal + high-FP credential shapes that confirm via AskUserQuestion, LOW =
|
||||
FYI) and `lib/redact-engine.ts` (pure `scan()` + `applyRedactions()`).
|
||||
Calibration matters: a gate that cries wolf gets ignored, so context-variable
|
||||
shapes (Stripe `pk_live_`, Google `AIza`, JWT, env `*_KEY=`) sit at MEDIUM.
|
||||
- **CLI:** `bin/gstack-redact` (exit 0 clean / 2 MEDIUM / 3 HIGH; `--json`,
|
||||
`--auto-redact`, `--repo-visibility`, `--from-file`). `bin/gstack-redact-prepush`
|
||||
is the opt-in git hook.
|
||||
- **Skill docs are generated** from `scripts/resolvers/redact-doc.ts`
|
||||
(`{{REDACT_TAXONOMY_TABLE}}`, `{{REDACT_INVOCATION_BLOCK:<sink>}}`) so /spec,
|
||||
/cso, /ship, /document-release, /document-generate never drift from the engine.
|
||||
- **Scan-at-sink:** always scan the EXACT bytes that will be sent — write to a
|
||||
temp file, scan that file, pass the SAME file to `gh`/`git`. Never scan a string
|
||||
then re-render (that reopens a scan-vs-send gap).
|
||||
- **Visibility (no tier promotion):** resolve once per run, order = local config
|
||||
(`gstack-config get redact_repo_visibility`, ~/.gstack so never committed) → gh
|
||||
→ glab → unknown(=public-strict). Public repos get STERNER per-finding
|
||||
confirmation (no batch-acknowledge, no silent-proceed); MEDIUM is never
|
||||
auto-promoted to HIGH.
|
||||
- **Tool-attributed fences:** wrap Codex/Greptile/eval output in ` ```codex-review `
|
||||
/ ` ```greptile ` fences so example credentials those tools quote WARN-degrade
|
||||
instead of blocking. A live-format credential inside the fence still blocks.
|
||||
- **Config keys:** `redact_repo_visibility` (public|private|unknown, local-only
|
||||
override for repos gh/glab can't read), `redact_prepush_hook` (true|false).
|
||||
There is intentionally NO key to disable HIGH blocking.
|
||||
- **Audit:** the /spec semantic pass appends a content-free record (categories +
|
||||
body sha256, no spec text) to `~/.gstack/security/semantic-reviews.jsonl` (0600).
|
||||
|
||||
## Commit style
|
||||
|
||||
**Always bisect commits.** Every commit should be a single logical change. When
|
||||
|
|
@ -938,10 +870,4 @@ file globs. Run `/sync-gbrain` after meaningful code changes; for ongoing
|
|||
auto-sync across all worktrees, run `gbrain autopilot --install` once per
|
||||
machine — gbrain's daemon handles incremental refresh on a schedule.
|
||||
|
||||
Safety: don't run `/sync-gbrain` while `gbrain autopilot` is active — the
|
||||
orchestrator refuses destructive source ops when it detects a running autopilot
|
||||
to avoid racing it (#1734). Prefer registering user repos with `gbrain sources
|
||||
add --path <dir>` (no `--url`): URL-managed sources can auto-reclone, and the
|
||||
sync code walk for them requires an explicit `--allow-reclone` opt-in.
|
||||
|
||||
<!-- gstack-gbrain-search-guidance:end -->
|
||||
|
|
|
|||
|
|
@ -326,13 +326,11 @@ If you're using [Conductor](https://conductor.build) to run multiple Claude Code
|
|||
|
||||
| Hook | Script | What it does |
|
||||
|------|--------|-------------|
|
||||
| `setup` | `bin/dev-setup` | Copies `.env` from main worktree, installs deps, symlinks skills, runs `./setup` non-interactively |
|
||||
| `setup` | `bin/dev-setup` | Copies `.env` from main worktree, installs deps, symlinks skills |
|
||||
| `archive` | `bin/dev-teardown` | Removes skill symlinks, cleans up `.claude/` directory |
|
||||
|
||||
When Conductor creates a new workspace, `bin/dev-setup` runs automatically. It detects the main worktree (via `git worktree list`), copies your `.env` so API keys carry over, and sets up dev mode — no manual steps needed.
|
||||
|
||||
`bin/dev-setup` runs `./setup` fully non-interactively (it passes `--plan-tune-hooks=prompt` and closes stdin), so a forwarded Conductor TTY can never hang on a hidden setup prompt. It also never installs the plan-tune Claude Code hooks, which means a throwaway workspace can't rewrite your global `~/.claude/settings.json` to point at an ephemeral worktree path. To install the plan-tune hooks deliberately, run `./setup --plan-tune-hooks` outside dev-setup (or `gstack-config set plan_tune_hooks yes`).
|
||||
|
||||
**First-time setup:** Put your `ANTHROPIC_API_KEY` in `.env` in the main repo (see `.env.example`). Every Conductor workspace inherits it automatically.
|
||||
|
||||
**`GSTACK_*` env prefix (Conductor-injected keys).** Conductor explicitly strips `ANTHROPIC_API_KEY` and `OPENAI_API_KEY` from every workspace's process env. The `.env` copy path doesn't restore them either — the strip happens after env inheritance. Users who want paid evals, `/sync-gbrain` embeddings, or `claude-agent-sdk` calls to work in a Conductor workspace must set `GSTACK_ANTHROPIC_API_KEY` and `GSTACK_OPENAI_API_KEY` in Conductor's workspace env config; Conductor passes those through untouched. On the gstack side, TS entry points import `lib/conductor-env-shim.ts` as a side effect, which promotes `GSTACK_FOO_API_KEY` to `FOO_API_KEY` when the canonical name is empty. If you add a new TS entry point that hits a paid API, add `import "../lib/conductor-env-shim";` to the top of the file. Today the shim is imported from `bin/gstack-gbrain-sync.ts`, `bin/gstack-model-benchmark`, `scripts/preflight-agent-sdk.ts`, and `test/helpers/e2e-helpers.ts`.
|
||||
|
|
|
|||
|
|
@ -204,7 +204,6 @@ Each skill feeds into the next. `/office-hours` writes a design doc that `/plan-
|
|||
| `/browse` | **QA Engineer** | Give the agent eyes. Real Chromium browser, real clicks, real screenshots. ~100ms per command. `/open-gstack-browser` launches GStack Browser with sidebar, anti-bot stealth, and auto model routing. |
|
||||
| `/setup-browser-cookies` | **Session Manager** | Import cookies from your real browser (Chrome, Arc, Brave, Edge) into the headless session. Test authenticated pages. |
|
||||
| `/autoplan` | **Review Pipeline** | One command, fully reviewed plan. Runs CEO → design → eng review automatically with encoded decision principles. Surfaces only taste decisions for your approval. |
|
||||
| `/spec` | **Spec Author** | Turn vague intent into a precise, executable spec in five phases (why, scope, technical with mandatory code-reading, draft, file). Codex quality gate before file (blocks below 7/10), fail-closed secret redaction, dedupe against existing issues, archive to `$GSTACK_STATE_ROOT/projects/$SLUG/specs/` for team-corpus recall. `--execute` spawns `claude -p` in a fresh worktree; `/ship` auto-closes the source issue on merge. Plan-mode aware. |
|
||||
| `/learn` | **Memory** | Manage what gstack learned across sessions. Review, search, prune, and export project-specific patterns, pitfalls, and preferences. Learnings compound across sessions so gstack gets smarter on your codebase over time. |
|
||||
|
||||
### Which review should I use?
|
||||
|
|
@ -230,8 +229,6 @@ Each skill feeds into the next. `/office-hours` writes a design doc that `/plan-
|
|||
| `/setup-gbrain` | **GBrain Onboarding** — from zero to running gbrain in under 5 minutes. PGLite local, Supabase existing URL, or auto-provision a new Supabase project via Management API. MCP registration for Claude Code + per-repo trust triad (read-write/read-only/deny). [Full guide](USING_GBRAIN_WITH_GSTACK.md). |
|
||||
| `/sync-gbrain` | **Keep Brain Current** — re-index this repo's code into gbrain via `gbrain sources add` + `gbrain sync --strategy code`, refresh the `## GBrain Search Guidance` block in CLAUDE.md, and auto-remove guidance when the capability check fails. `--incremental` (default), `--full`, `--dry-run`. Idempotent; safe to re-run. |
|
||||
| `/gstack-upgrade` | **Self-Updater** — upgrade gstack to latest. Detects global vs vendored install, syncs both, shows what changed. |
|
||||
| `/ios-qa` | **iOS Live-Device QA (v1.43.0.0+)** — drive a real iPhone over USB CoreDevice via an embedded `StateServer` in the app. Read Swift source, codegen typed `@Observable` accessors, run the agent loop. Optional `--tailnet` flag exposes the device to OpenClaw or any HTTP-capable agent on your Tailscale tailnet so remote agents can run iOS QA without ever touching the hardware. Capability-tier allowlist (observe/interact/mutate/restore), per-device session lock, audit log. |
|
||||
| `/ios-fix`, `/ios-design-review`, `/ios-clean`, `/ios-sync` | iOS bug-fix loop, designer's-eye HIG audit, debug-bridge cleanup, and accessor resync. See `docs/skills.md`. End-to-end walkthrough: [docs/howto-ios-testing-with-gstack.md](docs/howto-ios-testing-with-gstack.md). |
|
||||
|
||||
### New binaries (v0.19)
|
||||
|
||||
|
|
@ -241,8 +238,6 @@ Beyond the slash-command skills, gstack ships standalone CLIs for workflows that
|
|||
|---------|-------------|
|
||||
| `gstack-model-benchmark` | **Cross-model benchmark** — run the same prompt through Claude, GPT (via Codex CLI), and Gemini; compare latency, tokens, cost, and (optionally) LLM-judge quality score. Auth detected per provider, unavailable providers skip cleanly. Output as table, JSON, or markdown. `--dry-run` validates flags + auth without spending API calls. |
|
||||
| `gstack-taste-update` | **Design taste learning** — writes approvals and rejections from `/design-shotgun` into a persistent per-project taste profile. Decays 5%/week. Feeds back into future variant generation so the system learns what you actually pick. |
|
||||
| `gstack-ios-qa-daemon` | **iOS QA daemon** — Mac-side broker between an agent and a connected iPhone over USB CoreDevice. Loopback by default; `--tailnet` opens a Tailscale-facing listener with identity-gated capability tiers. Single-instance via flock on `~/.gstack/ios-qa-daemon.pid`. See [docs/howto-ios-testing-with-gstack.md](docs/howto-ios-testing-with-gstack.md). |
|
||||
| `gstack-ios-qa-mint` | **iOS allowlist manager** — owner-grant CLI for the tailnet allowlist. `grant`/`revoke`/`list` against `~/.gstack/ios-qa-allowlist.json` (mode 0600). Remote agents never auto-allowlist; this is the explicit-intent path. |
|
||||
|
||||
### Continuous checkpoint mode (opt-in, local by default)
|
||||
|
||||
|
|
@ -400,7 +395,7 @@ Four paths, pick one:
|
|||
- **PGLite local** — zero accounts, zero network, ~30 seconds. Isolated brain on this Mac only. Great for try-first; migrate to Supabase later with `/setup-gbrain --switch`.
|
||||
- **Remote gbrain MCP** — your brain runs on another machine (Tailscale, ngrok, internal LAN) or a teammate's server; paste an MCP URL and bearer token. Optionally pair with a local PGLite for symbol-aware code search in split-engine mode. Best for cross-machine memory without standing up a local DB.
|
||||
|
||||
After init, the skill offers to register gbrain as an MCP server for Claude Code (`claude mcp add gbrain -- gbrain serve`) so `gbrain search`, `gbrain put`, etc. show up as first-class typed tools — not bash shell-outs.
|
||||
After init, the skill offers to register gbrain as an MCP server for Claude Code (`claude mcp add gbrain -- gbrain serve`) so `gbrain search`, `gbrain put_page`, etc. show up as first-class typed tools — not bash shell-outs.
|
||||
|
||||
**Keeping the brain current.** Run `/sync-gbrain` from any repo to re-index its code into gbrain (incremental by default, `--full` for a full reindex, `--dry-run` to preview). The skill registers the cwd as a federated source via `gbrain sources add`, runs `gbrain sync --strategy code`, and writes a `## GBrain Search Guidance` block to your project's CLAUDE.md so the agent prefers `gbrain search`/`code-def`/`code-refs` over Grep. The block is removed automatically if the capability check fails — no stale guidance pointing at tools that aren't installed.
|
||||
|
||||
|
|
|
|||
34
SKILL.md
34
SKILL.md
|
|
@ -2,7 +2,11 @@
|
|||
name: gstack
|
||||
preamble-tier: 1
|
||||
version: 1.1.0
|
||||
description: Fast headless browser for QA testing and site dogfooding. (gstack)
|
||||
description: |
|
||||
Fast headless browser for QA testing and site dogfooding. Navigate pages, interact with
|
||||
elements, verify state, diff before/after, take annotated screenshots, test responsive
|
||||
layouts, forms, uploads, dialogs, and capture bug evidence. Use when asked to open or
|
||||
test a site, verify a deployment, dogfood a user flow, or file a bug with screenshots. (gstack)
|
||||
allowed-tools:
|
||||
- Bash
|
||||
- Read
|
||||
|
|
@ -17,14 +21,6 @@ triggers:
|
|||
<!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly -->
|
||||
<!-- Regenerate: bun run gen:skill-docs -->
|
||||
|
||||
|
||||
## When to invoke this skill
|
||||
|
||||
Navigate pages, interact with
|
||||
elements, verify state, diff before/after, take annotated screenshots, test responsive
|
||||
layouts, forms, uploads, dialogs, and capture bug evidence. Use when asked to open or
|
||||
test a site, verify a deployment, dogfood a user flow, or file a bug with screenshots.
|
||||
|
||||
## Preamble (run first)
|
||||
|
||||
```bash
|
||||
|
|
@ -60,7 +56,7 @@ _QUESTION_TUNING=$(~/.claude/skills/gstack/bin/gstack-config get question_tuning
|
|||
echo "QUESTION_TUNING: $_QUESTION_TUNING"
|
||||
mkdir -p ~/.gstack/analytics
|
||||
if [ "$_TEL" != "off" ]; then
|
||||
echo '{"skill":"gstack","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(_repo=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null | tr -cd 'a-zA-Z0-9._-'); echo "${_repo:-unknown}")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
|
||||
echo '{"skill":"gstack","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
|
||||
fi
|
||||
for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do
|
||||
if [ -f "$_PF" ]; then
|
||||
|
|
@ -102,19 +98,6 @@ _CHECKPOINT_MODE=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_mode
|
|||
_CHECKPOINT_PUSH=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_push 2>/dev/null || echo "false")
|
||||
echo "CHECKPOINT_MODE: $_CHECKPOINT_MODE"
|
||||
echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH"
|
||||
# Plan-mode hint for skills like /spec that branch behavior on plan-mode state.
|
||||
# Claude Code exposes plan mode via system reminders; we detect best-effort
|
||||
# from CLAUDE_PLAN_FILE (set by the harness when plan mode is active) and
|
||||
# fall back to "inactive". Codex hosts and Claude execution mode both end up
|
||||
# inactive, which is the safe default (defaults to file+execute pipeline).
|
||||
if [ -n "${CLAUDE_PLAN_FILE:-}${GSTACK_PLAN_MODE_FORCE:-}" ]; then
|
||||
export GSTACK_PLAN_MODE="active"
|
||||
elif [ "${GSTACK_PLAN_MODE:-}" = "active" ]; then
|
||||
export GSTACK_PLAN_MODE="active"
|
||||
else
|
||||
export GSTACK_PLAN_MODE="inactive"
|
||||
fi
|
||||
echo "GSTACK_PLAN_MODE: $GSTACK_PLAN_MODE"
|
||||
[ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true
|
||||
```
|
||||
|
||||
|
|
@ -170,7 +153,7 @@ Only run `open` if yes. Always run `touch`.
|
|||
|
||||
If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: ask telemetry once via AskUserQuestion:
|
||||
|
||||
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code or file paths. Your repo name is recorded locally only and stripped before any upload.
|
||||
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code, file paths, or repo names.
|
||||
|
||||
Options:
|
||||
- A) Help gstack get better! (recommended)
|
||||
|
|
@ -246,7 +229,6 @@ Key routing rules:
|
|||
- Ship/deploy/PR → invoke /ship or /land-and-deploy
|
||||
- Save progress → invoke /context-save
|
||||
- Resume context → invoke /context-restore
|
||||
- Author a backlog-ready spec/issue → invoke /spec
|
||||
```
|
||||
|
||||
Then commit the change: `git add CLAUDE.md && git commit -m "chore: add gstack skill routing rules to CLAUDE.md"`
|
||||
|
|
@ -504,7 +486,6 @@ quality gates that produce better results than answering inline.
|
|||
|
||||
**Routing rules — when you see these patterns, INVOKE the skill via the Skill tool:**
|
||||
- User describes a new idea, asks "is this worth building", brainstorms, pitches a concept → invoke `/office-hours`
|
||||
- User asks to spec something out, file an issue, write up a ticket, "turn this into a GitHub issue", "backlog item" → invoke `/spec`
|
||||
- User asks about strategy, scope, ambition, "think bigger", "what should we build" → invoke `/plan-ceo-review`
|
||||
- User asks to review architecture, lock in the plan, "does this design make sense" → invoke `/plan-eng-review`
|
||||
- User asks about design system, brand, visual identity, "how should this look" → invoke `/design-consultation`
|
||||
|
|
@ -963,7 +944,6 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`.
|
|||
| `disconnect` | Disconnect headed browser, return to headless mode |
|
||||
| `focus [@ref]` | Bring headed browser window to foreground (macOS) |
|
||||
| `handoff [message]` | Open visible Chrome at current page for user takeover |
|
||||
| `memory [--json]` | Snapshot Bun heap + per-tab JS heap + Chromium process tree + bounded buffer sizes. JSON output with --json. |
|
||||
| `restart` | Restart server |
|
||||
| `resume` | Re-snapshot after user takeover, return control to AI |
|
||||
| `state save|load <name>` | Save/load browser state (cookies + URLs) |
|
||||
|
|
|
|||
|
|
@ -32,7 +32,6 @@ quality gates that produce better results than answering inline.
|
|||
|
||||
**Routing rules — when you see these patterns, INVOKE the skill via the Skill tool:**
|
||||
- User describes a new idea, asks "is this worth building", brainstorms, pitches a concept → invoke `/office-hours`
|
||||
- User asks to spec something out, file an issue, write up a ticket, "turn this into a GitHub issue", "backlog item" → invoke `/spec`
|
||||
- User asks about strategy, scope, ambition, "think bigger", "what should we build" → invoke `/plan-ceo-review`
|
||||
- User asks to review architecture, lock in the plan, "does this design make sense" → invoke `/plan-eng-review`
|
||||
- User asks about design system, brand, visual identity, "how should this look" → invoke `/design-consultation`
|
||||
|
|
|
|||
503
TODOS.md
503
TODOS.md
|
|
@ -1,284 +1,5 @@
|
|||
# TODOS
|
||||
|
||||
## Test infrastructure
|
||||
|
||||
### ✅ DONE (v1.53.1.0): Rebaseline parity-suite (v1.44.1 → v1.53.0.0)
|
||||
|
||||
**What:** `test/parity-suite.test.ts` checked every skill's SKILL.md size against
|
||||
the frozen `test/fixtures/parity-baseline-v1.44.1.json`. Five planning skills had
|
||||
crept past the 1.05x ceiling: `plan-ceo-review` (1.052), `plan-eng-review` (1.062),
|
||||
`plan-design-review` (1.068), `investigate` (1.053), `office-hours` (1.065) — growth
|
||||
from the brain-aware-planning releases (v1.49–v1.52) plus the v1.53 redaction guard.
|
||||
|
||||
**Resolved:** Captured a fresh baseline at HEAD via
|
||||
`bun run scripts/capture-baseline.ts --tag v1.53.0.0` and re-pointed the test at
|
||||
`test/fixtures/parity-baseline-v1.53.0.0.json`. The per-skill 1.05 ratio is kept, so
|
||||
future bloat is still caught — only the stale anchor moved. Mirrors the earlier
|
||||
`skill-size-budget` rebase (v1.44.1 → v1.47.0.0). Historical v1.44.1 / v1.46.0.0 /
|
||||
v1.47.0.0 baselines retained in `test/fixtures/` for the v1→v2 audit trail. The
|
||||
captured skill bytes match `origin/main` exactly (the rebasing branch left every
|
||||
SKILL.md untouched). `bun test` is green again.
|
||||
|
||||
## gbrowser memory follow-ups (filed via /plan-eng-review + /codex on the v1.49 leak-fix PR)
|
||||
|
||||
These four items came out of the memory-leak investigation that shipped
|
||||
the `$B memory` diagnostic + the four leak fixes. They were
|
||||
deliberately deferred from that PR (already 14 commits / ~12 files);
|
||||
each stands alone and any one could ship independently.
|
||||
|
||||
### P2: MV3 extension service worker memory profile
|
||||
|
||||
**What:** The `/memory` endpoint snapshot enumerates pages but does
|
||||
not enumerate the gstack baked-in extension's service-worker target.
|
||||
A long-running MV3 service worker can leak through retained DOM
|
||||
snapshots, message ports that never close, alarms that re-arm, and
|
||||
caches that grow without bound. The diagnostic should call
|
||||
`Target.getTargets` with a filter for `service_worker` and include
|
||||
each one in `tabs[]` (or a sibling `serviceWorkers[]` array) with the
|
||||
same `Performance.getMetrics` data.
|
||||
|
||||
**Why:** Codex's outside-voice review on the eng-review surfaced this
|
||||
class of leak (the extension is part of the gbrowser process tree but
|
||||
invisible to today's snapshot). Until we surface it, a SW leak shows
|
||||
up only in the parent process RSS with no per-target attribution.
|
||||
|
||||
**Pros:** Closes the per-target attribution gap for the
|
||||
single-most-likely future leak source (our own extension).
|
||||
**Cons:** Extension SW lifecycle is asymmetric vs page lifecycle;
|
||||
auto-attach + filter is one more piece of CDP plumbing.
|
||||
|
||||
**Context:** Codex finding #4 on the eng-review outside voice. Not
|
||||
in scope of the v1.49 PR; deliberately deferred to keep the PR to
|
||||
the four highest-confidence leak fixes.
|
||||
|
||||
**Priority:** P2. **Effort:** M.
|
||||
|
||||
---
|
||||
|
||||
### P2: Native + GPU memory breakdown in `$B memory`
|
||||
|
||||
**What:** `$B memory` shows Bun RSS + per-tab JS heap + Chromium
|
||||
process tree (PIDs + types + CPU time) but the per-process RSS is
|
||||
absent — `SystemInfo.getProcessInfo` doesn't expose RSS and the eng
|
||||
review (D2 USE_CDP) explicitly chose CDP over shelling to `ps`. The
|
||||
honest next step is to surface what CDP DOES give for the other
|
||||
memory categories: `Memory.getDOMCounters` per target (node + listener
|
||||
counts), `SystemInfo.getInfo` for GPU memory, `Memory.getAllTimeSamplingProfile`
|
||||
for a sampled native estimate.
|
||||
|
||||
**Why:** Codex's outside-voice review flagged that
|
||||
`Performance.getMetrics` misses native memory, GPU memory, video
|
||||
buffers, Skia, network cache, extension process RSS, and
|
||||
browser-process RSS — all the categories where a 160 GB leak would
|
||||
actually live. A diagnostic that misses the categories where the
|
||||
leak class lives undersells itself.
|
||||
|
||||
**Pros:** Per-process category breakdown closes the gap between
|
||||
"Activity Monitor says 160 GB" and what the diagnostic shows.
|
||||
**Cons:** Each CDP method has its own quirks; this is a real
|
||||
implementation pass, not a one-line addition.
|
||||
|
||||
**Context:** Codex finding #5 on the eng-review outside voice. Not
|
||||
in scope of the v1.49 PR; deliberately deferred.
|
||||
|
||||
**Priority:** P2. **Effort:** M.
|
||||
|
||||
---
|
||||
|
||||
### P3: Single-context CDP listener for Network.loadingFinished
|
||||
|
||||
**What:** `wirePageEvents` attaches a `page.on('requestfinished')`
|
||||
listener PER PAGE. The D10 fix removed the body-materialization leak
|
||||
inside that listener but kept the per-page listener architecture
|
||||
(7 listeners attached per tab — close, framenavigated, dialog,
|
||||
console, request, response, requestfinished). The stretch goal from
|
||||
D10 was to replace the per-page `requestfinished` listener with a
|
||||
single context-level CDP listener via
|
||||
`Target.setAutoAttach({autoAttach: true, waitForDebuggerOnStart: false,
|
||||
flatten: true})` and a browser-wide `Network.loadingFinished` event
|
||||
handler.
|
||||
|
||||
**Why:** Going from N to 1 listener for the request-size capture is
|
||||
structurally the right architecture and removes one piece of per-tab
|
||||
memory pressure. The body-materialization fix already addressed the
|
||||
acute leak; this is the architectural cleanup that prevents similar
|
||||
leaks in the same class.
|
||||
|
||||
**Pros:** One listener per browser instead of one per tab.
|
||||
**Cons:** `Target.setAutoAttach` plumbing is more code than the
|
||||
straight per-page listener; the marginal memory win is small on top
|
||||
of the body-fetch fix that already landed.
|
||||
|
||||
**Context:** D10 stretch goal on the eng-review. The minimal-risk
|
||||
fix shipped in v1.49 (replaces `await res.body()` with
|
||||
`await req.sizes()`, preserving the per-page listener); this is the
|
||||
architectural follow-up.
|
||||
|
||||
**Priority:** P3. **Effort:** M-L.
|
||||
|
||||
---
|
||||
|
||||
### P3: Real-Chromium peak-RSS reproducer (periodic tier)
|
||||
|
||||
**What:** The gate-tier reproducer
|
||||
(`browse/test/memory-leak-reproducer.test.ts`) pins the invariant
|
||||
that `res.body()` is never called during a burst of
|
||||
`requestfinished` events. It uses a fake page; it does NOT spin up a
|
||||
real Chromium nor measure peak Bun RSS during a real concurrent fetch
|
||||
burst. A periodic-tier follow-up should: spin up a real headless
|
||||
Chromium, navigate to a fixture page that concurrently fetches 500
|
||||
mixed responses (small JSON, 100 KB images, 10 MB chunked,
|
||||
gzip-compressed 2 MB), sample `process.memoryUsage().heapUsed` every
|
||||
100 ms during the burst, assert `peak_heap < 200 MB above baseline`
|
||||
AND `post-gc_heap < 30 MB above baseline`. Also include a single-tab
|
||||
WebGL canvas variant that grows to >4 GB and asserts the per-tab RSS
|
||||
toast fires.
|
||||
|
||||
**Why:** Codex flagged that the leak's real failure mode is transient
|
||||
amplification under concurrent burst, not retained leak — a steady-state
|
||||
heap test misses it. The fake-page gate-tier test catches the
|
||||
listener-architecture regression; the periodic real-browser test
|
||||
catches the actual peak-RSS class.
|
||||
|
||||
**Pros:** Closes the "did we actually demonstrate the OOM is fixed"
|
||||
question with hard numbers. Feeds the ANGLE_B_NUMBERS CHANGELOG
|
||||
release-summary table.
|
||||
**Cons:** Periodic tier costs minutes of CI time and money per run;
|
||||
real-browser memory tests are inherently flaky.
|
||||
|
||||
**Context:** Codex outside-voice finding on the eng-review; D7
|
||||
ANGLE_B_NUMBERS CHANGELOG framing needs this reproducer's numbers
|
||||
before /ship time.
|
||||
|
||||
**Priority:** P3. **Effort:** M.
|
||||
|
||||
---
|
||||
|
||||
## design daemon: follow-ups (filed v1.45.0.0 via /ship review army)
|
||||
|
||||
### ✅ DONE (v1.45.0.0): Tighten daemon test coverage
|
||||
|
||||
**Resolved in commit `6b037c55` (same PR):** All 5 test gaps filled before
|
||||
landing. Per-file totals after: serve 16, daemon 34, daemon-discovery 23,
|
||||
feedback-roundtrip-daemon 4 = 77 (+10 from initial ship). Specifically:
|
||||
- Idle-shutdown actually fires (spawn-based, daemon process observed exiting,
|
||||
state file removed).
|
||||
- Bare GET polling doesn't reset idle (hammers `/api/progress` in background,
|
||||
daemon still idles out).
|
||||
- Idle-with-active-boards extends, then force-shuts after MAX_EXTENSIONS
|
||||
(with `DESIGN_DAEMON_EXTENSION_MS=1500` + `MAX_EXTENSIONS=2`).
|
||||
- Concurrent `ensureDaemon()` race converges on one daemon (lock wins).
|
||||
- Stale-lock reclaim (dead PID succeeds, alive unrelated PID refuses).
|
||||
- Malformed-JSON + non-object + array-body + missing-html negatives for
|
||||
`POST /api/boards` and `POST /boards/<id>/api/reload`.
|
||||
|
||||
### P3: Minor maintainability nits from /ship review
|
||||
|
||||
- `design/src/cli.ts` and `design/src/serve.ts` both have a small `openBrowser`
|
||||
helper with identical darwin/linux/else branches. Extract a shared
|
||||
`design/src/open-browser.ts`.
|
||||
- `design/src/daemon-client.ts:320` (`AbortSignal.timeout(2000)`) and `:357`
|
||||
(`delay(50)`) use bare numeric literals while sibling timeouts are named
|
||||
constants. Promote to `SHUTDOWN_POST_TIMEOUT_MS` and `ALIVE_POLL_INTERVAL_MS`.
|
||||
- `design/src/daemon-state.ts:21` `serverPath` field is written
|
||||
(`daemon.ts:541`) but never read by production code. Either remove or
|
||||
document the forensic intent.
|
||||
|
||||
### P3: Daemon scope deferred from v1.45.0.0 plan
|
||||
|
||||
Originally listed in the plan's "TODOs surfaced for later" section:
|
||||
|
||||
- Per-daemon scoped auth tokens (only relevant once a tunnel/share use case appears).
|
||||
- Optional persistent board history on disk in
|
||||
`~/.gstack/projects/$SLUG/designs/history/` so submitted boards survive
|
||||
daemon restarts.
|
||||
- Windows spawn branch lifted from browse (V1 daemon is macOS + Linux;
|
||||
Windows users fall back to legacy `--no-daemon` per-process server).
|
||||
- `$D board list` / `$D board stop <id>` per-board ops CLI (V1 has only
|
||||
`$D daemon status` / `stop`).
|
||||
- Cross-worktree daemon attach (conductor sibling worktrees of the same
|
||||
repo currently each spawn their own daemon — matches browse; revisit
|
||||
if it causes friction).
|
||||
|
||||
---
|
||||
|
||||
## browse server: terminal-agent teardown follow-ups (filed v1.41 via /plan-eng-review)
|
||||
|
||||
### ✅ DONE (v1.44.0.0): Identity-based terminal-agent kill (replace pkill regex with PID)
|
||||
|
||||
**Resolved:** Bundled into the v1.44.0.0 long-lived-sidebar PR as Commit 0.
|
||||
`browse/src/terminal-agent-control.ts` is the new home for `readAgentRecord`,
|
||||
`writeAgentRecord`, `clearAgentRecord`, and `killAgentByRecord`. The agent
|
||||
writes `<stateDir>/terminal-agent-pid` (JSON `{pid, gen, startedAt}`) at boot
|
||||
and clears it on SIGTERM/SIGINT. `cli.ts` and `server.ts` both route through
|
||||
`killAgentByRecord` instead of `pkill -f terminal-agent\.ts`. The new
|
||||
`browse/test/terminal-agent-pid-identity.test.ts` is the static-grep tripwire
|
||||
that fails CI if `pkill ... terminal-agent` or `spawnSync('pkill', ...)`
|
||||
reappears in any source file.
|
||||
|
||||
---
|
||||
|
||||
### P3: shutdown() reads module-level `config`, not `cfg.config` (composition gap)
|
||||
|
||||
**What:** `browse/src/server.ts:shutdown()` reads `path.dirname(config.stateFile)`
|
||||
where `config` is the module-level value resolved at import time, not the
|
||||
`cfg.config` passed into `buildFetchHandler`. Same gap applies to
|
||||
`cleanSingletonLocks(resolveChromiumProfile())` at server.ts:1298 — should
|
||||
read `cfg.chromiumProfile`.
|
||||
|
||||
**Why:** Embedders today happen to share state-dir resolution with the CLI
|
||||
(both go through `resolveConfig()` against the same env), so this doesn't
|
||||
bite. But if an embedder ever passes a divergent `cfg.config` (e.g., a test
|
||||
harness pointing at a temp dir), shutdown will operate on the wrong paths.
|
||||
The `ownsTerminalAgent` flag exposes the problem without fixing it.
|
||||
|
||||
**Pros:** Closes the embedder-composition story properly. Pairs with
|
||||
`cfg.chromiumProfile` to give a single coherent "this factory teardown
|
||||
respects cfg" contract.
|
||||
|
||||
**Cons:** Pre-existing — not a regression. Two call sites today (1285 for
|
||||
terminal files, 1298 for chromium locks). Threading `cfg.config` and
|
||||
`cfg.chromiumProfile` into the right closures is straightforward but
|
||||
broader than the v1.41 fix.
|
||||
|
||||
**Context:** Flagged by both Codex and Claude subagent in the /plan-eng-review
|
||||
dual voices. Documented as out-of-scope in the v1.41 plan; same shape as the
|
||||
`chromiumProfile` PR-body note to the gbrowser team.
|
||||
|
||||
**Depends on:** None.
|
||||
|
||||
---
|
||||
|
||||
### P3: Ownership-object refactor if a 4th caller-owned teardown gate appears
|
||||
|
||||
**What:** Today `ServerConfig` has three caller-owned teardown gates:
|
||||
`xvfb?` (presence ⇒ don't close), `proxyBridge?` (same), and now
|
||||
`ownsTerminalAgent` (explicit boolean). If a 4th gate appears, collapse to
|
||||
`cfg.callerOwns?: Set<'terminalAgent' | 'xvfb' | 'proxyBridge' | ...>` or
|
||||
similar.
|
||||
|
||||
**Why:** Three independent flags is below the refactor threshold — each
|
||||
field has clear, distinct semantics and the JSDoc voice is consistent. A
|
||||
fourth tips the cost balance: the per-field surface gets noisy, and
|
||||
"what does this factory own?" becomes a question you have to ask of three
|
||||
or four scattered fields instead of one explicit set.
|
||||
|
||||
**Pros:** Single source of truth for "what gstack tears down". Trivial
|
||||
extension surface for future caller-owned resources. Easier to assert in
|
||||
tests ("the set should contain X, not Y").
|
||||
|
||||
**Cons:** Premature today. The polarity-inversion note in the
|
||||
`ownsTerminalAgent` JSDoc only hurts a little — it's one anomaly, not a
|
||||
pattern. Refactoring now to an ownership object would touch every embedder.
|
||||
|
||||
**Context:** Recommended by Claude subagent during /plan-ceo-review dual
|
||||
voice (autoplan). Trigger: a 4th caller-owned teardown gate in this same
|
||||
`ServerConfig` shape.
|
||||
|
||||
**Depends on:** A 4th gate to motivate the refactor.
|
||||
|
||||
---
|
||||
|
||||
## /sync-gbrain memory stage perf follow-up
|
||||
|
||||
### P2: Investigate `gbrain import` perf on large staging dirs
|
||||
|
|
@ -736,24 +457,7 @@ reads it yet.
|
|||
|
||||
**Effort:** L (human: ~1 week / CC: ~4h)
|
||||
**Priority:** P0
|
||||
**Depends on:** **90+ days of v1 dogfood stable across 3+ skills** (per
|
||||
`docs/designs/PLAN_TUNING_V0.md` §"Deferred to v2" E1 acceptance criteria).
|
||||
Distinct from the lighter-weight diversity-display gate
|
||||
(`sample_size >= 20 AND skills_covered >= 3 AND question_ids_covered >= 8
|
||||
AND days_span >= 7`) used in /plan-tune to render the inferred column —
|
||||
display is a UI affordance, promotion to E1 needs a much higher bar
|
||||
because behavioral adaptation is consequential and hard to revert. Prior
|
||||
versions of this card cited "2+ weeks" which conflicted with V0 — V0 wins.
|
||||
|
||||
**Substrate risk (Codex outside-voice, Phase A review 2026-05-26):** Generated
|
||||
skill prose is agent-compliance-based. Tests can verify templates contain the
|
||||
right reads of `~/.gstack/developer-profile.json` and the right decision
|
||||
points, but tests cannot prove agents obey them at runtime. E1 ships
|
||||
adaptations as **advisory annotations on AskUserQuestion recommendations**
|
||||
("Recommended via your profile: <choice>") until there's a hard runtime
|
||||
execution path. Do NOT gate any AUTO_DECIDE on inferred profile alone in v1
|
||||
of E1; explicit per-question preferences remain the only AUTO_DECIDE
|
||||
source.
|
||||
**Depends on:** 2+ weeks of v1 dogfood, profile diversity check passing.
|
||||
|
||||
### E3 — `/plan-tune narrative` + `/plan-tune vibe`
|
||||
|
||||
|
|
@ -1939,49 +1643,6 @@ Shipped in v0.6.5. TemplateContext in gen-skill-docs.ts bakes skill name into pr
|
|||
**Priority:** P2
|
||||
**Depends on:** CDP patches proving the value of anti-bot stealth first
|
||||
|
||||
## /spec follow-ups (deferred from v1.47.0.0 via /plan-ceo-review SCOPE EXPANSION)
|
||||
|
||||
### P2: `/spec --epic` mode (parent issue + child issues + dependency graph)
|
||||
|
||||
**Priority:** P2
|
||||
|
||||
**What:** Add `--epic` flag that produces an Epic issue (parent) plus N child issues with explicit dependency graph and topological order. Emits multiple `gh issue create` calls with parent linkage in child bodies.
|
||||
|
||||
**Why:** Multi-week initiatives often span 3-5 specs that share context but ship sequentially. Today `/spec --epic` would let users author the full initiative in one session and file all linked issues atomically. The Epic template already exists in `spec/SKILL.md.tmpl` (carried over from PR #1698); only the flag routing + multi-issue `gh` orchestration is missing.
|
||||
|
||||
**Pros:**
|
||||
- Closes the multi-issue workflow gap that `/spec` v1 doesn't cover.
|
||||
- Parent + child linkage means project boards show the full initiative at-a-glance.
|
||||
- Composes cleanly with existing `--execute` (spawn an agent on the parent epic; agent files children as it works).
|
||||
|
||||
**Cons:**
|
||||
- More gh API surface (one create per child, parent-link edit pass).
|
||||
- Dependency-graph rendering in markdown is fiddly across GitHub vs GitLab renderers.
|
||||
|
||||
**Context:** Considered in `/plan-ceo-review` SCOPE EXPANSION (D5), deferred 2026-05-25 in favor of shipping the 5 critical-path expansions (--execute, --dedupe, archive, quality gate, --audit). Re-evaluate once v1.47 ships and we see how often users hit "this should be 3 issues" in real /spec sessions.
|
||||
|
||||
**Depends on:** v1.47.0.0 `/spec` lands first; need real usage data to calibrate the multi-issue surface.
|
||||
|
||||
### P3: `/spec --dedupe` semantic matching (LLM-based) for v1.1
|
||||
|
||||
**Priority:** P3
|
||||
|
||||
**What:** Upgrade `--dedupe`'s string match against `gh issue list --search` to LLM-based semantic similarity. Today's v1 picks string overlap on title keywords; semantic match would catch "the sidebar terminal flakes on reload" matching an existing issue titled "PTY reconnect fails after extension restart" where keyword overlap is zero.
|
||||
|
||||
**Why:** String match has high precision but low recall — it misses near-duplicates with different vocabulary. LLM semantic match catches more dupes but costs ~$0.01-0.05 per spec dispatch and adds 5-10s latency.
|
||||
|
||||
**Pros:**
|
||||
- Catches dupes string match misses.
|
||||
- One more reason `/spec` is more useful than freehand authoring.
|
||||
|
||||
**Cons:**
|
||||
- Paid + slower. Most v1 users probably don't hit enough false-negatives to justify the cost.
|
||||
- Adds another LLM-judged decision to a skill that already has the quality gate.
|
||||
|
||||
**Context:** Considered in `/plan-ceo-review` build-time decisions; chose string match for v1 to keep the dedupe path free + fast. Revisit if v1 produces a meaningful false-negative rate in real use.
|
||||
|
||||
**Depends on:** v1.47.0.0 ships; gather real false-negative data from the v1 string matcher.
|
||||
|
||||
## Completed
|
||||
|
||||
### Slim preamble + real-PTY plan-mode E2E harness (v1.13.1.0)
|
||||
|
|
@ -2089,165 +1750,3 @@ Shipped in v0.6.5. TemplateContext in gen-skill-docs.ts bakes skill name into pr
|
|||
### Auto-upgrade mode + smart update check
|
||||
- Config CLI (`bin/gstack-config`), auto-upgrade via `~/.gstack/config.yaml`, 12h cache TTL, exponential snooze backoff (24h→48h→1wk), "never ask again" option, vendored copy sync on upgrade
|
||||
**Completed:** v0.3.8
|
||||
|
||||
---
|
||||
|
||||
## Brain-aware planning follow-ups (filed v1.48.0.0 via /plan-ceo-review + /plan-eng-review)
|
||||
|
||||
These are the deferred cherry-picks (E2/E3/E4) from the v1.48 brain-aware
|
||||
planning plan at `~/.claude/plans/hm-interesting-well-why-dapper-eagle.md`.
|
||||
The foundation (Phase 0 entity model + Phase 0.5 cache + Phase 1 preflight
|
||||
+ Phase 1.5 trust policy + Phase 2 write-back scaffolding) ships in
|
||||
v1.48.0.0. These follow-ups extend it.
|
||||
|
||||
### P2: /gstack-reflect nightly synthesis skill (E2)
|
||||
|
||||
**What:** Scheduled skill that reads weekly `gstack/skill-run` + takes +
|
||||
`get_recent_salience` and synthesizes a `gstack/insight` page surfaced at
|
||||
next skill preflight.
|
||||
|
||||
**Why:** Cross-time pattern detection is the compounding move. "You ran 4
|
||||
plan-ceo on infra this week, 0 on product — is product work getting
|
||||
starved?" surfaces patterns the user wouldn't notice.
|
||||
|
||||
**Pros:** Brain compounds across TIME, not just across skills. Patterns
|
||||
become actionable.
|
||||
|
||||
**Cons:** "You're starving product work" is high-judgment territory; needs
|
||||
opt-out per project, careful insight templates.
|
||||
|
||||
**Context:** Deferred from v1.48.0.0 cherry-pick (D4) — wait 4-6 weeks for
|
||||
real `gstack/skill-run` data to accumulate before designing the reflection
|
||||
layer against real patterns instead of imagined ones.
|
||||
|
||||
**Effort:** L (human ~1-2 days, CC ~4-6h)
|
||||
|
||||
**Depends on:** Phase 0 (gstack/skill-run page type from v1.48.0.0) +
|
||||
~6 weeks of accumulated data
|
||||
|
||||
### P3: Cross-machine brain-cache sync (E3)
|
||||
|
||||
**What:** Push compressed digests through the gstack-brain-sync git pipeline
|
||||
so the brain-cache survives moving between Macs / Conductor workspaces.
|
||||
|
||||
**Why:** Eliminates the cold-miss tax on every new machine (~1-2s once per
|
||||
machine per day).
|
||||
|
||||
**Pros:** Instant warm cache on new machines.
|
||||
|
||||
**Cons:** Cache poisoning risk if not designed carefully (hash invariants,
|
||||
endpoint-binding, conflict resolution).
|
||||
|
||||
**Context:** Deferred from v1.48.0.0 cherry-pick (D5) — single-machine
|
||||
cache is fine for V1; correctness risk needs its own design pass.
|
||||
|
||||
**Effort:** M (human ~4h, CC ~30min)
|
||||
|
||||
**Depends on:** Brain-cache layer from v1.48.0.0
|
||||
|
||||
### P3: /gstack-onboarding dedicated skill (E4)
|
||||
|
||||
**What:** Guided 5-minute setup skill for new gstack installs: walks user
|
||||
through reading CLAUDE.md + README + recent commits to build `gstack/product`
|
||||
and active goals with explicit AUQs.
|
||||
|
||||
**Why:** Better UX than the inline bootstrap (which only fires when a
|
||||
planning skill is invoked).
|
||||
|
||||
**Pros:** Cleaner cold-start, explicit ceremony.
|
||||
|
||||
**Cons:** Inline bootstrap (in scope for v1.48) already covers the
|
||||
cold-start path adequately.
|
||||
|
||||
**Context:** Deferred from v1.48.0.0 cherry-pick (D6) — observe inline
|
||||
bootstrap performance first; add dedicated skill if friction is real.
|
||||
|
||||
**Effort:** S (human ~2h, CC ~15min)
|
||||
|
||||
**Depends on:** Inline bootstrap subcommand from v1.48.0.0
|
||||
|
||||
### P2: Upstream gbrain takes_add + takes_resolve MCP ops
|
||||
|
||||
**What:** Add `mcp__gbrain__takes_add` and `mcp__gbrain__takes_resolve`
|
||||
ops in `~/git/gbrain/src/core/operations.ts`. Extract the markdown-fence
|
||||
mirror logic from `commands/takes.ts:570` into a reusable
|
||||
`engine.resolveTake()` helper.
|
||||
|
||||
**Why:** Unlocks Phase 2 calibration write-back without the fence-block
|
||||
fallback. ~150 LOC. Already on gbrain's v0.31.x roadmap.
|
||||
|
||||
**Pros:** Clean Phase 2 path, removes the "fall back to put_page" smell.
|
||||
|
||||
**Cons:** Lives in upstream gbrain repo, not helsinki — separate PR.
|
||||
|
||||
**Context:** Phase 2 write-back is already wired in v1.48.0.0 behind the
|
||||
BRAIN_CALIBRATION_WRITEBACK feature flag (default off). Flag flips to
|
||||
true once upstream gbrain ships these ops. ~50 LOC follow-up in
|
||||
helsinki to swap the fallback for the preferred op.
|
||||
|
||||
**Effort:** S (human ~1d, CC ~1h) in gbrain repo; trivial wire-up in
|
||||
helsinki.
|
||||
|
||||
**Depends on:** None (parallel-track from v1.48.0.0)
|
||||
|
||||
### P3: Background-refresh hook supervision
|
||||
|
||||
**What:** Codex outside-voice raised that "background refresh at skill END"
|
||||
is hand-wavy. Add proper process supervision: PID file, timeout, failure
|
||||
log, cross-platform spawn.
|
||||
|
||||
**Why:** Current implementation backgrounds with `&` which works but
|
||||
leaves no observability when a refresh fails.
|
||||
|
||||
**Context:** Deferred from v1.48.0.0 codex tension T3. Stays low priority
|
||||
until users report stale digests where a background refresh silently
|
||||
failed.
|
||||
|
||||
**Effort:** S (human ~2h, CC ~20min)
|
||||
|
||||
### P2: Re-verify calibration takes when gbrain v0.42+ lands
|
||||
|
||||
**What:** When upstream gbrain ships `takes_add` MCP op and we flip
|
||||
`BRAIN_CALIBRATION_WRITEBACK` from FALSE to TRUE, re-run the manual
|
||||
probe in `docs/gbrain-write-surfaces.md` against `/office-hours` and
|
||||
confirm `gbrain takes_list` surfaces a `kind=bet` entry with the
|
||||
expected weight (0.9 for office-hours, per
|
||||
`scripts/brain-cache-spec.ts:151-157`).
|
||||
|
||||
**Why:** Today the calibration take path falls back to writing inside a
|
||||
`gbrain put` fence block because `takes_add` isn't available yet. Once
|
||||
v0.42+ ships, the agent will call `takes_add` directly — we should
|
||||
confirm the new path actually persists a queryable take.
|
||||
|
||||
**Context:** v1.50.0.0 plan §"NOT in scope". The fence-block fallback
|
||||
test (`test/takes-fence-fallback.test.ts`) covers wiring for both paths;
|
||||
this TODO is about live verification of the preferred path when it
|
||||
becomes available.
|
||||
|
||||
**Effort:** XS (human ~15min, CC ~5min)
|
||||
|
||||
**Depends on:** Upstream gbrain v0.42+ release shipping `takes_add` MCP
|
||||
op (separate TODO above).
|
||||
|
||||
### P2: Extend brain-writeback E2E to the other 4 planning skills
|
||||
|
||||
**What:** `test/skill-e2e-office-hours-brain-writeback.test.ts` covers
|
||||
the brain-writeback path for `/office-hours` only. Adding parallel
|
||||
tests for `/plan-ceo-review`, `/plan-eng-review`, `/plan-design-review`,
|
||||
and `/plan-devex-review` would bring per-skill agent-obedience coverage
|
||||
to parity with the resolver unit test
|
||||
(`test/resolvers-gbrain-save-results.test.ts`, which covers wiring for
|
||||
all 5).
|
||||
|
||||
**Why:** The resolver test proves the right instructions get emitted;
|
||||
the E2E proves the agent actually obeys. Today we only have that
|
||||
end-to-end signal for one of five planning skills.
|
||||
|
||||
**Context:** v1.50.0.0 plan §"NOT in scope". Extract `makeFakeGbrain`
|
||||
into `test/helpers/fake-gbrain.ts` when the second consumer arrives
|
||||
(YAGNI for one consumer today).
|
||||
|
||||
**Effort:** S (human ~1d, CC ~1h). Periodic-tier (~$2-4 total for 4
|
||||
runs).
|
||||
|
||||
**Depends on:** None.
|
||||
|
|
|
|||
|
|
@ -57,9 +57,7 @@ Best for: you'd rather click through supabase.com yourself than paste a PAT.
|
|||
|
||||
Best for: try-it-first, no account, no cloud, no sharing. Or a dedicated "this Mac's brain" that stays isolated from any cloud agent.
|
||||
|
||||
**What happens:** `gbrain init --pglite`. Brain lives at `~/.gbrain/brain.pglite`. No network calls for the init itself. Done in 30 seconds.
|
||||
|
||||
**Embedding model.** When `VOYAGE_API_KEY` is set, gstack inits PGLite with `voyage-code-3` (1024-dim) — Voyage's code-specialized embedding model, which beats their general-purpose `voyage-4-large` and OpenAI `text-embedding-3-large` head-to-head on this codebase's symbol queries. Without `VOYAGE_API_KEY`, gbrain auto-selects (OpenAI 1536-dim when `OPENAI_API_KEY` is present, else falls down its provider chain). Either way, the embeddings call out to the chosen provider's API during sync — set the key for the provider you want before running `/sync-gbrain`.
|
||||
**What happens:** `gbrain init --pglite`. Brain lives at `~/.gbrain/brain.pglite`. No network calls. Done in 30 seconds.
|
||||
|
||||
This is the best first choice if you just want to see what gbrain feels like before committing to cloud. You can always migrate later with `/setup-gbrain --switch`.
|
||||
|
||||
|
|
@ -84,7 +82,7 @@ By default the skill asks "Give Claude Code a typed tool surface for gbrain?" If
|
|||
claude mcp add gbrain -- gbrain serve
|
||||
```
|
||||
|
||||
That registers gbrain's stdio MCP server with Claude Code. Now `gbrain search`, `gbrain put`, `gbrain get`, etc. show up as first-class tools in every session, not bash shell-outs.
|
||||
That registers gbrain's stdio MCP server with Claude Code. Now `gbrain search`, `gbrain put_page`, `gbrain get_page`, etc. show up as first-class tools in every session, not bash shell-outs.
|
||||
|
||||
**If `claude` is not on PATH**, the skill skips MCP registration gracefully with a manual-register hint. The CLI resolver still works from any skill that shells out to `gbrain` — MCP is an upgrade, not a prerequisite.
|
||||
|
||||
|
|
@ -136,7 +134,7 @@ The skill runs three stages — code, memory, brain-sync — independently. A fa
|
|||
|
||||
1. **Pre-flight.** Checks `gbrain_local_status` (the local engine's health). If the engine is `broken-db` or `broken-config`, the skill STOPs with a remediation menu — it refuses to silently degrade. If the local engine is missing and you're in remote-MCP mode (Path 4), the code stage SKIPs cleanly and only brain-sync runs.
|
||||
2. **Code stage.** Registers the cwd as a federated source via `gbrain sources add`, writes a `.gbrain-source` pin file in the repo root (kubectl-style context — every worktree gets its own pin, so Conductor sibling worktrees don't collide), runs `gbrain sync --strategy code`.
|
||||
3. **Memory stage.** Stages your `~/.gstack/` transcripts + curated memory. In local-stdio MCP mode, ingests into the local engine. In remote-http MCP mode, persists staged markdown to `~/.gstack/transcripts/run-<pid>-<ts>/` for the remote brain admin's pull pipeline. The ingest timeout is 30 minutes by default; raise it for a big brain with `GSTACK_INGEST_TIMEOUT_MS` (accepts 1 min–24h). On timeout the gbrain import checkpoint is preserved, so the next `/sync-gbrain` resumes instead of starting over.
|
||||
3. **Memory stage.** Stages your `~/.gstack/` transcripts + curated memory. In local-stdio MCP mode, ingests into the local engine. In remote-http MCP mode, persists staged markdown to `~/.gstack/transcripts/run-<pid>-<ts>/` for the remote brain admin's pull pipeline.
|
||||
4. **Brain-sync stage.** Pushes curated artifacts (plans, designs, retros) to your private artifacts repo if you have one configured.
|
||||
5. **CLAUDE.md guidance.** Capability-checks the round-trip (write a page → search → find it). If green, writes the `## GBrain Search Guidance` block to your project's CLAUDE.md. If red, REMOVES the block — the agent should never be told to use a tool that isn't installed.
|
||||
|
||||
|
|
@ -226,8 +224,8 @@ Gbrain itself ships with these that gstack wraps:
|
|||
| `gbrain migrate --to supabase --url ...` | Move a PGLite brain to Supabase (lossless, preserves source as backup) |
|
||||
| `gbrain migrate --to pglite` | Reverse migration |
|
||||
| `gbrain search "query"` | Search the brain |
|
||||
| `gbrain put "<slug>" --content "<markdown-with-frontmatter>"` | Write a page (title/tags go in YAML frontmatter inside `--content`) |
|
||||
| `gbrain get "<slug>"` | Fetch a page |
|
||||
| `gbrain put_page --title "..." --tags "a,b" <<<"content"` | Write a page |
|
||||
| `gbrain get_page "<slug>"` | Fetch a page |
|
||||
| `gbrain serve` | Start the MCP stdio server (used by `claude mcp add`) |
|
||||
|
||||
### Config files + state
|
||||
|
|
@ -253,8 +251,7 @@ Gbrain itself ships with these that gstack wraps:
|
|||
| `SUPABASE_API_BASE` | `gstack-gbrain-supabase-provision` | Override the Management API host. Used by tests to point at a mock server. |
|
||||
| `GBRAIN_INSTALL_DIR` | `gstack-gbrain-install` | Override default install path (`~/gbrain`) |
|
||||
| `GSTACK_HOME` | every bin helper | Override `~/.gstack` state dir. Heavy test use. |
|
||||
| `VOYAGE_API_KEY` | `gbrain embed` subprocess; gstack PGLite init | When set, gstack inits PGLite with `voyage-code-3` (1024-dim), Voyage's code-specialized embedding model. Beats `voyage-4-large` and OpenAI `text-embedding-3-large` head-to-head on this codebase's symbol queries. See CHANGELOG v1.43.1.0 for the A/B numbers. |
|
||||
| `OPENAI_API_KEY` | `gbrain embed` subprocess | Used for embeddings during `gbrain sync` / `/sync-gbrain` when `VOYAGE_API_KEY` is not set (gbrain's auto-selected fallback, `text-embedding-3-large` 1536-dim). Without either key, pages are imported structurally (symbol tables, chunks) but semantic search degrades — you'll see `[gbrain] embedding failed for code file ...` in the sync log. |
|
||||
| `OPENAI_API_KEY` | `gbrain embed` subprocess | Required for embeddings during `gbrain sync` / `/sync-gbrain`. Without it, pages are imported structurally (symbol tables, chunks) but semantic search degrades — you'll see `[gbrain] embedding failed for code file ... OpenAI embedding requires OPENAI_API_KEY` in the sync log. |
|
||||
| `ANTHROPIC_API_KEY` | `claude-agent-sdk`, paid evals | Required for `bun run test:evals` and any direct `query()` call against Claude. |
|
||||
| `GSTACK_OPENAI_API_KEY` | `lib/conductor-env-shim.ts` | Conductor-injected fallback. Promoted to `OPENAI_API_KEY` when the canonical name is empty. |
|
||||
| `GSTACK_ANTHROPIC_API_KEY` | `lib/conductor-env-shim.ts` | Same pattern as above for Anthropic. |
|
||||
|
|
@ -348,7 +345,7 @@ Embeddings probably failed during import. Symbol queries (`code-def`, `code-refs
|
|||
[gbrain] embedding failed for code file <name>: OpenAI embedding requires OPENAI_API_KEY
|
||||
```
|
||||
|
||||
The fix is to put a provider API key in the process env before re-running. `VOYAGE_API_KEY` is preferred for code (gstack defaults PGLite to `voyage-code-3` when set); otherwise `OPENAI_API_KEY` falls back to `text-embedding-3-large`. On a bare Mac shell, source the key from `~/.zshrc` before calling. In Conductor, the `lib/conductor-env-shim.ts` shim promotes `GSTACK_ANTHROPIC_API_KEY` / `GSTACK_OPENAI_API_KEY` to their canonical names automatically; for `VOYAGE_API_KEY`, set it directly in your Conductor workspace env. Re-run `/sync-gbrain --code-only` to backfill embeddings on already-imported pages.
|
||||
The fix is to put `OPENAI_API_KEY` in the process env before re-running. On a bare Mac shell, source it from `~/.zshrc` before calling. In Conductor, set `GSTACK_OPENAI_API_KEY` at the workspace level — `lib/conductor-env-shim.ts` promotes it to canonical automatically when imported. Re-run `/sync-gbrain --code-only` to backfill embeddings on already-imported pages.
|
||||
|
||||
### `gbrain sync` blocked at a commit hash — `FILE_TOO_LARGE`
|
||||
|
||||
|
|
@ -379,7 +376,7 @@ Another gstack session in a sibling Conductor workspace may be holding a lock on
|
|||
## Related skills + next steps
|
||||
|
||||
- `/health` — includes a GBrain dimension (doctor status, sync queue depth, last-push age) in its 0-10 composite score. The dimension is omitted when gbrain isn't installed; running `/health` on a non-gbrain machine doesn't penalize that choice.
|
||||
- `/gstack-upgrade` — keeps gstack itself up to date. Does NOT upgrade gbrain independently. gbrain installs at the latest HEAD by default; to refresh it, `git pull` in your gbrain clone (default `~/gbrain`) and re-run `/setup-gbrain`. Pin a specific commit with `gstack-gbrain-install --pinned-commit <sha>` if you need reproducibility. Installs below the minimum tested version are refused.
|
||||
- `/gstack-upgrade` — keeps gstack itself up to date. Does NOT upgrade gbrain independently. To bump gbrain, update `PINNED_COMMIT` in `bin/gstack-gbrain-install` and re-run `/setup-gbrain`.
|
||||
- `/retro` — weekly retrospective pulls learnings and plans from your gbrain when memory sync is on, letting the retro reference cross-machine history.
|
||||
|
||||
Run `/setup-gbrain` and see what sticks.
|
||||
|
|
|
|||
|
|
@ -2,7 +2,16 @@
|
|||
name: autoplan
|
||||
preamble-tier: 3
|
||||
version: 1.0.0
|
||||
description: Auto-review pipeline — reads the full CEO, design, eng, and DX review skills from disk and runs them sequentially with auto-decisions using 6 decision principles. (gstack)
|
||||
description: |
|
||||
Auto-review pipeline — reads the full CEO, design, eng, and DX review skills from disk
|
||||
and runs them sequentially with auto-decisions using 6 decision principles. Surfaces
|
||||
taste decisions (close approaches, borderline scope, codex disagreements) at a final
|
||||
approval gate. One command, fully reviewed plan out.
|
||||
Use when asked to "auto review", "autoplan", "run all reviews", "review this plan
|
||||
automatically", or "make the decisions for me".
|
||||
Proactively suggest when the user has a plan file and wants to run the full review
|
||||
gauntlet without answering 15-30 intermediate questions. (gstack)
|
||||
Voice triggers (speech-to-text aliases): "auto plan", "automatic review".
|
||||
benefits-from: [office-hours]
|
||||
triggers:
|
||||
- run all reviews
|
||||
|
|
@ -21,19 +30,6 @@ allowed-tools:
|
|||
<!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly -->
|
||||
<!-- Regenerate: bun run gen:skill-docs -->
|
||||
|
||||
|
||||
## When to invoke this skill
|
||||
|
||||
Surfaces
|
||||
taste decisions (close approaches, borderline scope, codex disagreements) at a final
|
||||
approval gate. One command, fully reviewed plan out.
|
||||
Use when asked to "auto review", "autoplan", "run all reviews", "review this plan
|
||||
automatically", or "make the decisions for me".
|
||||
Proactively suggest when the user has a plan file and wants to run the full review
|
||||
gauntlet without answering 15-30 intermediate questions.
|
||||
|
||||
Voice triggers (speech-to-text aliases): "auto plan", "automatic review".
|
||||
|
||||
## Preamble (run first)
|
||||
|
||||
```bash
|
||||
|
|
@ -69,7 +65,7 @@ _QUESTION_TUNING=$(~/.claude/skills/gstack/bin/gstack-config get question_tuning
|
|||
echo "QUESTION_TUNING: $_QUESTION_TUNING"
|
||||
mkdir -p ~/.gstack/analytics
|
||||
if [ "$_TEL" != "off" ]; then
|
||||
echo '{"skill":"autoplan","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(_repo=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null | tr -cd 'a-zA-Z0-9._-'); echo "${_repo:-unknown}")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
|
||||
echo '{"skill":"autoplan","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
|
||||
fi
|
||||
for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do
|
||||
if [ -f "$_PF" ]; then
|
||||
|
|
@ -111,19 +107,6 @@ _CHECKPOINT_MODE=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_mode
|
|||
_CHECKPOINT_PUSH=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_push 2>/dev/null || echo "false")
|
||||
echo "CHECKPOINT_MODE: $_CHECKPOINT_MODE"
|
||||
echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH"
|
||||
# Plan-mode hint for skills like /spec that branch behavior on plan-mode state.
|
||||
# Claude Code exposes plan mode via system reminders; we detect best-effort
|
||||
# from CLAUDE_PLAN_FILE (set by the harness when plan mode is active) and
|
||||
# fall back to "inactive". Codex hosts and Claude execution mode both end up
|
||||
# inactive, which is the safe default (defaults to file+execute pipeline).
|
||||
if [ -n "${CLAUDE_PLAN_FILE:-}${GSTACK_PLAN_MODE_FORCE:-}" ]; then
|
||||
export GSTACK_PLAN_MODE="active"
|
||||
elif [ "${GSTACK_PLAN_MODE:-}" = "active" ]; then
|
||||
export GSTACK_PLAN_MODE="active"
|
||||
else
|
||||
export GSTACK_PLAN_MODE="inactive"
|
||||
fi
|
||||
echo "GSTACK_PLAN_MODE: $GSTACK_PLAN_MODE"
|
||||
[ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true
|
||||
```
|
||||
|
||||
|
|
@ -179,7 +162,7 @@ Only run `open` if yes. Always run `touch`.
|
|||
|
||||
If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: ask telemetry once via AskUserQuestion:
|
||||
|
||||
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code or file paths. Your repo name is recorded locally only and stripped before any upload.
|
||||
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code, file paths, or repo names.
|
||||
|
||||
Options:
|
||||
- A) Help gstack get better! (recommended)
|
||||
|
|
@ -255,7 +238,6 @@ Key routing rules:
|
|||
- Ship/deploy/PR → invoke /ship or /land-and-deploy
|
||||
- Save progress → invoke /context-save
|
||||
- Resume context → invoke /context-restore
|
||||
- Author a backlog-ready spec/issue → invoke /spec
|
||||
```
|
||||
|
||||
Then commit the change: `git add CLAUDE.md && git commit -m "chore: add gstack skill routing rules to CLAUDE.md"`
|
||||
|
|
@ -342,36 +324,7 @@ Effort both-scales: when an option involves effort, label both human-team and CC
|
|||
|
||||
Net line closes the tradeoff. Per-skill instructions may add stricter rules.
|
||||
|
||||
### Handling 5+ options — split, never drop
|
||||
|
||||
AskUserQuestion caps every call at **4 options**. With 5+ real options, NEVER
|
||||
drop, merge, or silently defer one to fit. Pick a compliant shape:
|
||||
|
||||
- **Batch into ≤4-groups** — for coherent alternatives (e.g. version bumps,
|
||||
layout variants). One call, 5th surfaced only if first 4 don't fit.
|
||||
- **Split per-option** — for independent scope items (e.g. "ship E1..E6?").
|
||||
Fire N sequential calls, one per option. Default to this when unsure.
|
||||
|
||||
Per-option call shape: `D<N>.k` header (e.g. D3.1..D3.5), ELI10 per option,
|
||||
Recommendation, kind-note (no completeness score — Include/Defer/Cut/Hold are
|
||||
decision actions), and 4 buckets:
|
||||
**A) Include**, **B) Defer**, **C) Cut**, **D) Hold** (stop chain, discuss).
|
||||
|
||||
After the chain, fire `D<N>.final` to validate the assembled set (reprompt
|
||||
dependency conflicts) and confirm shipping it. Use `D<N>.revise-<k>` to
|
||||
revise one option without re-running the chain.
|
||||
|
||||
For N>6, fire a `D<N>.0` meta-AskUserQuestion first (proceed / narrow / batch).
|
||||
|
||||
question_ids for split chains: `<skill>-split-<option-slug>` (kebab-case ASCII,
|
||||
≤64 chars, `-2`/`-3` suffix on collision). The runtime checker
|
||||
(`bin/gstack-question-preference`) refuses `never-ask` on any `*-split-*` id,
|
||||
so split chains are never AUTO_DECIDE-eligible — the user's option set is sacred.
|
||||
|
||||
**Full rule + worked examples + Hold/dependency semantics:** see
|
||||
`docs/askuserquestion-split.md` in the gstack repo. Read on demand when N>4.
|
||||
|
||||
**Non-ASCII characters — write directly, never \u-escape.** When any
|
||||
12. **Non-ASCII characters — write directly, never \u-escape.** When any
|
||||
string field (question, option label, option description) contains
|
||||
Chinese (繁體/簡體), Japanese, Korean, or other non-ASCII text, emit
|
||||
the literal UTF-8 characters in the JSON string. **Never escape them
|
||||
|
|
@ -404,9 +357,6 @@ Before calling AskUserQuestion, verify:
|
|||
- [ ] Net line closes the decision
|
||||
- [ ] You are calling the tool, not writing prose
|
||||
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
|
||||
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
|
||||
- [ ] If you split, you checked dependencies between options before firing the chain
|
||||
- [ ] If a per-option Hold fires, you stopped the chain immediately (didn't queue)
|
||||
|
||||
|
||||
## Artifacts Sync (skill start)
|
||||
|
|
@ -606,7 +556,84 @@ Applies to AskUserQuestion, user replies, and findings. AskUserQuestion Format i
|
|||
- User-turn override wins: if the current message asks for terse / no explanations / just the answer, skip this section.
|
||||
- Terse mode (EXPLAIN_LEVEL: terse): no glosses, no outcome-framing layer, shorter responses.
|
||||
|
||||
Curated jargon list lives at `~/.claude/skills/gstack/scripts/jargon-list.json` (80+ terms). On the first jargon term you encounter this session, Read that file once; treat the `terms` array as the canonical list. The list is repo-owned and may grow between releases.
|
||||
Jargon list, gloss on first use if the term appears:
|
||||
- idempotent
|
||||
- idempotency
|
||||
- race condition
|
||||
- deadlock
|
||||
- cyclomatic complexity
|
||||
- N+1
|
||||
- N+1 query
|
||||
- backpressure
|
||||
- memoization
|
||||
- eventual consistency
|
||||
- CAP theorem
|
||||
- CORS
|
||||
- CSRF
|
||||
- XSS
|
||||
- SQL injection
|
||||
- prompt injection
|
||||
- DDoS
|
||||
- rate limit
|
||||
- throttle
|
||||
- circuit breaker
|
||||
- load balancer
|
||||
- reverse proxy
|
||||
- SSR
|
||||
- CSR
|
||||
- hydration
|
||||
- tree-shaking
|
||||
- bundle splitting
|
||||
- code splitting
|
||||
- hot reload
|
||||
- tombstone
|
||||
- soft delete
|
||||
- cascade delete
|
||||
- foreign key
|
||||
- composite index
|
||||
- covering index
|
||||
- OLTP
|
||||
- OLAP
|
||||
- sharding
|
||||
- replication lag
|
||||
- quorum
|
||||
- two-phase commit
|
||||
- saga
|
||||
- outbox pattern
|
||||
- inbox pattern
|
||||
- optimistic locking
|
||||
- pessimistic locking
|
||||
- thundering herd
|
||||
- cache stampede
|
||||
- bloom filter
|
||||
- consistent hashing
|
||||
- virtual DOM
|
||||
- reconciliation
|
||||
- closure
|
||||
- hoisting
|
||||
- tail call
|
||||
- GIL
|
||||
- zero-copy
|
||||
- mmap
|
||||
- cold start
|
||||
- warm start
|
||||
- green-blue deploy
|
||||
- canary deploy
|
||||
- feature flag
|
||||
- kill switch
|
||||
- dead letter queue
|
||||
- fan-out
|
||||
- fan-in
|
||||
- debounce
|
||||
- throttle (UI)
|
||||
- hydration mismatch
|
||||
- memory leak
|
||||
- GC pause
|
||||
- heap fragmentation
|
||||
- stack overflow
|
||||
- null pointer
|
||||
- dangling pointer
|
||||
- buffer overflow
|
||||
|
||||
|
||||
## Completeness Principle — Boil the Lake
|
||||
|
|
@ -654,11 +681,7 @@ If you are looping on the same diagnostic, same file, or failed fix variants, ST
|
|||
|
||||
Before each AskUserQuestion, choose `question_id` from `scripts/question-registry.ts` or `{skill}-{slug}`, then run `~/.claude/skills/gstack/bin/gstack-question-preference --check "<id>"`. `AUTO_DECIDE` means choose the recommended option and say "Auto-decided [summary] → [option] (your preference). Change with /plan-tune." `ASK_NORMALLY` means ask.
|
||||
|
||||
**Embed the question_id as a marker in the question text** so hooks can identify it deterministically (plan-tune cathedral T14 / D18 progressive markers). Append `<gstack-qid:{question_id}>` somewhere in the rendered question (the leading line or trailing line is fine; the marker doesn't render visibly to the user when wrapped in HTML-style angle brackets, but the hook strips it). Without the marker the PreToolUse enforcement hook treats the AUQ as observed-only and never auto-decides — so always include it when the question matches a registered `question_id`.
|
||||
|
||||
**Embed the option recommendation via the `(recommended)` label suffix** on exactly one option per AUQ. The PreToolUse hook parses `(recommended)` first, falls back to "Recommendation: X" prose, and refuses to auto-decide if ambiguous. Two `(recommended)` labels = refuse.
|
||||
|
||||
After answer, log best-effort (PostToolUse hook also captures deterministically when installed; dedup on (source, tool_use_id) handles double-writes):
|
||||
After answer, log best-effort:
|
||||
```bash
|
||||
~/.claude/skills/gstack/bin/gstack-question-log '{"skill":"autoplan","question_id":"<id>","question_summary":"<short>","category":"<approval|clarification|routing|cherry-pick|feedback-loop>","door_type":"<one-way|two-way>","options_count":N,"user_choice":"<key>","recommended":"<key>","session_id":"'"$_SESSION_ID"'"}' 2>/dev/null || true
|
||||
```
|
||||
|
|
|
|||
|
|
@ -2,7 +2,14 @@
|
|||
name: benchmark-models
|
||||
preamble-tier: 1
|
||||
version: 1.0.0
|
||||
description: Cross-model benchmark for gstack skills. (gstack)
|
||||
description: |
|
||||
Cross-model benchmark for gstack skills. Runs the same prompt through Claude,
|
||||
GPT (via Codex CLI), and Gemini side-by-side — compares latency, tokens, cost,
|
||||
and optionally quality via LLM judge. Answers "which model is actually best
|
||||
for this skill?" with data instead of vibes. Separate from /benchmark, which
|
||||
measures web page performance. Use when: "benchmark models", "compare models",
|
||||
"which model is best for X", "cross-model comparison", "model shootout". (gstack)
|
||||
Voice triggers (speech-to-text aliases): "compare models", "model shootout", "which model is best".
|
||||
triggers:
|
||||
- cross model benchmark
|
||||
- compare claude gpt gemini
|
||||
|
|
@ -16,18 +23,6 @@ allowed-tools:
|
|||
<!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly -->
|
||||
<!-- Regenerate: bun run gen:skill-docs -->
|
||||
|
||||
|
||||
## When to invoke this skill
|
||||
|
||||
Runs the same prompt through Claude,
|
||||
GPT (via Codex CLI), and Gemini side-by-side — compares latency, tokens, cost,
|
||||
and optionally quality via LLM judge. Answers "which model is actually best
|
||||
for this skill?" with data instead of vibes. Separate from /benchmark, which
|
||||
measures web page performance. Use when: "benchmark models", "compare models",
|
||||
"which model is best for X", "cross-model comparison", "model shootout".
|
||||
|
||||
Voice triggers (speech-to-text aliases): "compare models", "model shootout", "which model is best".
|
||||
|
||||
## Preamble (run first)
|
||||
|
||||
```bash
|
||||
|
|
@ -63,7 +58,7 @@ _QUESTION_TUNING=$(~/.claude/skills/gstack/bin/gstack-config get question_tuning
|
|||
echo "QUESTION_TUNING: $_QUESTION_TUNING"
|
||||
mkdir -p ~/.gstack/analytics
|
||||
if [ "$_TEL" != "off" ]; then
|
||||
echo '{"skill":"benchmark-models","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(_repo=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null | tr -cd 'a-zA-Z0-9._-'); echo "${_repo:-unknown}")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
|
||||
echo '{"skill":"benchmark-models","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
|
||||
fi
|
||||
for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do
|
||||
if [ -f "$_PF" ]; then
|
||||
|
|
@ -105,19 +100,6 @@ _CHECKPOINT_MODE=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_mode
|
|||
_CHECKPOINT_PUSH=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_push 2>/dev/null || echo "false")
|
||||
echo "CHECKPOINT_MODE: $_CHECKPOINT_MODE"
|
||||
echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH"
|
||||
# Plan-mode hint for skills like /spec that branch behavior on plan-mode state.
|
||||
# Claude Code exposes plan mode via system reminders; we detect best-effort
|
||||
# from CLAUDE_PLAN_FILE (set by the harness when plan mode is active) and
|
||||
# fall back to "inactive". Codex hosts and Claude execution mode both end up
|
||||
# inactive, which is the safe default (defaults to file+execute pipeline).
|
||||
if [ -n "${CLAUDE_PLAN_FILE:-}${GSTACK_PLAN_MODE_FORCE:-}" ]; then
|
||||
export GSTACK_PLAN_MODE="active"
|
||||
elif [ "${GSTACK_PLAN_MODE:-}" = "active" ]; then
|
||||
export GSTACK_PLAN_MODE="active"
|
||||
else
|
||||
export GSTACK_PLAN_MODE="inactive"
|
||||
fi
|
||||
echo "GSTACK_PLAN_MODE: $GSTACK_PLAN_MODE"
|
||||
[ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true
|
||||
```
|
||||
|
||||
|
|
@ -173,7 +155,7 @@ Only run `open` if yes. Always run `touch`.
|
|||
|
||||
If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: ask telemetry once via AskUserQuestion:
|
||||
|
||||
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code or file paths. Your repo name is recorded locally only and stripped before any upload.
|
||||
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code, file paths, or repo names.
|
||||
|
||||
Options:
|
||||
- A) Help gstack get better! (recommended)
|
||||
|
|
@ -249,7 +231,6 @@ Key routing rules:
|
|||
- Ship/deploy/PR → invoke /ship or /land-and-deploy
|
||||
- Save progress → invoke /context-save
|
||||
- Resume context → invoke /context-restore
|
||||
- Author a backlog-ready spec/issue → invoke /spec
|
||||
```
|
||||
|
||||
Then commit the change: `git add CLAUDE.md && git commit -m "chore: add gstack skill routing rules to CLAUDE.md"`
|
||||
|
|
|
|||
|
|
@ -2,7 +2,13 @@
|
|||
name: benchmark
|
||||
preamble-tier: 1
|
||||
version: 1.0.0
|
||||
description: Performance regression detection using the browse daemon. (gstack)
|
||||
description: |
|
||||
Performance regression detection using the browse daemon. Establishes
|
||||
baselines for page load times, Core Web Vitals, and resource sizes.
|
||||
Compares before/after on every PR. Tracks performance trends over time.
|
||||
Use when: "performance", "benchmark", "page speed", "lighthouse", "web vitals",
|
||||
"bundle size", "load time". (gstack)
|
||||
Voice triggers (speech-to-text aliases): "speed test", "check performance".
|
||||
triggers:
|
||||
- performance benchmark
|
||||
- check page speed
|
||||
|
|
@ -17,17 +23,6 @@ allowed-tools:
|
|||
<!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly -->
|
||||
<!-- Regenerate: bun run gen:skill-docs -->
|
||||
|
||||
|
||||
## When to invoke this skill
|
||||
|
||||
Establishes
|
||||
baselines for page load times, Core Web Vitals, and resource sizes.
|
||||
Compares before/after on every PR. Tracks performance trends over time.
|
||||
Use when: "performance", "benchmark", "page speed", "lighthouse", "web vitals",
|
||||
"bundle size", "load time".
|
||||
|
||||
Voice triggers (speech-to-text aliases): "speed test", "check performance".
|
||||
|
||||
## Preamble (run first)
|
||||
|
||||
```bash
|
||||
|
|
@ -63,7 +58,7 @@ _QUESTION_TUNING=$(~/.claude/skills/gstack/bin/gstack-config get question_tuning
|
|||
echo "QUESTION_TUNING: $_QUESTION_TUNING"
|
||||
mkdir -p ~/.gstack/analytics
|
||||
if [ "$_TEL" != "off" ]; then
|
||||
echo '{"skill":"benchmark","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(_repo=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null | tr -cd 'a-zA-Z0-9._-'); echo "${_repo:-unknown}")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
|
||||
echo '{"skill":"benchmark","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
|
||||
fi
|
||||
for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do
|
||||
if [ -f "$_PF" ]; then
|
||||
|
|
@ -105,19 +100,6 @@ _CHECKPOINT_MODE=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_mode
|
|||
_CHECKPOINT_PUSH=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_push 2>/dev/null || echo "false")
|
||||
echo "CHECKPOINT_MODE: $_CHECKPOINT_MODE"
|
||||
echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH"
|
||||
# Plan-mode hint for skills like /spec that branch behavior on plan-mode state.
|
||||
# Claude Code exposes plan mode via system reminders; we detect best-effort
|
||||
# from CLAUDE_PLAN_FILE (set by the harness when plan mode is active) and
|
||||
# fall back to "inactive". Codex hosts and Claude execution mode both end up
|
||||
# inactive, which is the safe default (defaults to file+execute pipeline).
|
||||
if [ -n "${CLAUDE_PLAN_FILE:-}${GSTACK_PLAN_MODE_FORCE:-}" ]; then
|
||||
export GSTACK_PLAN_MODE="active"
|
||||
elif [ "${GSTACK_PLAN_MODE:-}" = "active" ]; then
|
||||
export GSTACK_PLAN_MODE="active"
|
||||
else
|
||||
export GSTACK_PLAN_MODE="inactive"
|
||||
fi
|
||||
echo "GSTACK_PLAN_MODE: $GSTACK_PLAN_MODE"
|
||||
[ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true
|
||||
```
|
||||
|
||||
|
|
@ -173,7 +155,7 @@ Only run `open` if yes. Always run `touch`.
|
|||
|
||||
If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: ask telemetry once via AskUserQuestion:
|
||||
|
||||
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code or file paths. Your repo name is recorded locally only and stripped before any upload.
|
||||
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code, file paths, or repo names.
|
||||
|
||||
Options:
|
||||
- A) Help gstack get better! (recommended)
|
||||
|
|
@ -249,7 +231,6 @@ Key routing rules:
|
|||
- Ship/deploy/PR → invoke /ship or /land-and-deploy
|
||||
- Save progress → invoke /context-save
|
||||
- Resume context → invoke /context-restore
|
||||
- Author a backlog-ready spec/issue → invoke /spec
|
||||
```
|
||||
|
||||
Then commit the change: `git add CLAUDE.md && git commit -m "chore: add gstack skill routing rules to CLAUDE.md"`
|
||||
|
|
|
|||
|
|
@ -56,23 +56,8 @@ if [ ! -e "$AGENTS_LINK" ]; then
|
|||
ln -s "$REPO_ROOT" "$AGENTS_LINK"
|
||||
fi
|
||||
|
||||
# 6. Run setup via the symlink so it detects .claude/skills/ as its parent.
|
||||
#
|
||||
# Workspace/dev setup MUST be non-interactive: Conductor runs this under a
|
||||
# forwarded pty, so any `read` in setup (skill-prefix prompt, plan-tune hook
|
||||
# consent) would hang the workspace forever. Detaching stdin makes every setup
|
||||
# prompt take its smart non-interactive default (flat skill names, etc.).
|
||||
#
|
||||
# `--plan-tune-hooks=prompt` is load-bearing, not redundant: stdin alone only
|
||||
# suppresses the *prompt* branch. A saved `plan_tune_hooks: yes` or an exported
|
||||
# GSTACK_PLAN_TUNE_HOOKS=yes would still resolve to "install" and rewrite the
|
||||
# user's global ~/.claude/settings.json to point at THIS ephemeral worktree —
|
||||
# which breaks once the workspace is deleted. The flag has highest precedence,
|
||||
# so it pins resolution to "prompt", and closed stdin then makes prompt-mode a
|
||||
# no-op skip (no install, no decline marker). A dev workspace must never mutate
|
||||
# global settings.json. To install the hooks, run `./setup --plan-tune-hooks`
|
||||
# directly (outside dev-setup). Saved prefix/other config preferences still apply.
|
||||
"$GSTACK_LINK/setup" --plan-tune-hooks=prompt </dev/null
|
||||
# 6. Run setup via the symlink so it detects .claude/skills/ as its parent
|
||||
"$GSTACK_LINK/setup"
|
||||
|
||||
echo ""
|
||||
echo "Dev mode active. Skills resolve from this working tree."
|
||||
|
|
|
|||
|
|
@ -49,19 +49,6 @@ strip_git() {
|
|||
echo "${1%.git}"
|
||||
}
|
||||
|
||||
valid_owner_repo() {
|
||||
local owner_repo="$1"
|
||||
case "$owner_repo" in
|
||||
""|/*|*/|*//*)
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
case "$owner_repo" in
|
||||
*/*) return 0 ;;
|
||||
*) return 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
# Parse to (host, owner_repo) regardless of input shape.
|
||||
parse_url() {
|
||||
local u="$1"
|
||||
|
|
@ -95,7 +82,7 @@ parse_url() {
|
|||
exit 3
|
||||
;;
|
||||
esac
|
||||
if [ -z "$host" ] || ! valid_owner_repo "$owner_repo"; then
|
||||
if [ -z "$host" ] || [ -z "$owner_repo" ] || [ "$owner_repo" = "$u" ]; then
|
||||
echo "gstack-artifacts-url: failed to parse host/owner from: $u" >&2
|
||||
exit 3
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -1,949 +0,0 @@
|
|||
#!/usr/bin/env bun
|
||||
/**
|
||||
* gstack-brain-cache — three-tier cache for brain-aware planning skills.
|
||||
*
|
||||
* Subcommands:
|
||||
* get <entity-name> [--project <slug>] — return digest content; refresh if stale
|
||||
* refresh [--full] [--entity X] [--project <slug>] — force refresh one or all
|
||||
* invalidate <entity-name> [--project <slug>] — mark stale; next get triggers cold
|
||||
* digest <entity-slug> — compress a brain page slug to digest
|
||||
* meta [--project <slug>] — print _meta.json
|
||||
*
|
||||
* (Later commits add: bootstrap [T2b], list [T18], purge [T18], retention sweep [T18].)
|
||||
*
|
||||
* Cache layout:
|
||||
* ~/.gstack/brain-cache/ ← cross-project (user-profile only)
|
||||
* ~/.gstack/projects/<slug>/brain-cache/ ← per-project (everything else)
|
||||
*
|
||||
* Atomic writes via .tmp + rename. Stale-but-usable fallback when brain
|
||||
* unreachable. Concurrent-refresh dedup is a follow-up commit (T15).
|
||||
*/
|
||||
|
||||
import { existsSync, mkdirSync, readFileSync, writeFileSync, renameSync, statSync, unlinkSync, readdirSync, openSync, closeSync } from 'fs';
|
||||
import { join, dirname } from 'path';
|
||||
import { homedir, hostname } from 'os';
|
||||
import { spawnSync } from 'child_process';
|
||||
import { execGbrainJson, spawnGbrain } from '../lib/gbrain-exec';
|
||||
import {
|
||||
BRAIN_CACHE_ENTITIES,
|
||||
CACHE_REFRESH_LOCK_TIMEOUT_MS,
|
||||
GSTACK_SCHEMA_PACK_NAME,
|
||||
GSTACK_SCHEMA_PACK_VERSION,
|
||||
SALIENCE_DEFAULT_ALLOWLIST,
|
||||
type BrainCacheEntity,
|
||||
} from '../scripts/brain-cache-spec';
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Paths + meta
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
const GSTACK_HOME = process.env.GSTACK_HOME || join(homedir(), '.gstack');
|
||||
|
||||
interface CacheMeta {
|
||||
/** Version of the schema pack the cache was built against. Mismatch → full rebuild. */
|
||||
schema_version: string;
|
||||
/** SHA8 hash of the brain MCP endpoint URL (or 'local' for on-disk engines). */
|
||||
endpoint_hash: string;
|
||||
/** Per-entity last-refresh epoch ms. Absent → never refreshed. */
|
||||
last_refresh: Record<string, number>;
|
||||
/** Per-entity last-attempt epoch ms (even if attempt failed). For stale-but-usable diagnostics. */
|
||||
last_attempt?: Record<string, number>;
|
||||
}
|
||||
|
||||
/** Returns the directory holding a given entity's cache file. */
|
||||
export function entityDir(entity: BrainCacheEntity, projectSlug: string | null): string {
|
||||
if (entity.scope === 'cross-project') {
|
||||
return join(GSTACK_HOME, 'brain-cache');
|
||||
}
|
||||
if (!projectSlug) {
|
||||
throw new Error(`Per-project entity needs a project slug: ${entity.file}`);
|
||||
}
|
||||
return join(GSTACK_HOME, 'projects', projectSlug, 'brain-cache');
|
||||
}
|
||||
|
||||
/** Returns the path to the cache file for a given entity. */
|
||||
export function entityPath(entityName: string, projectSlug: string | null): string {
|
||||
const entity = BRAIN_CACHE_ENTITIES[entityName];
|
||||
if (!entity) throw new Error(`Unknown brain cache entity: ${entityName}`);
|
||||
return join(entityDir(entity, projectSlug), entity.file);
|
||||
}
|
||||
|
||||
/** Returns the path to the _meta.json for a given scope. */
|
||||
export function metaPath(scope: 'cross-project' | 'per-project', projectSlug: string | null): string {
|
||||
if (scope === 'cross-project') {
|
||||
return join(GSTACK_HOME, 'brain-cache', '_meta.json');
|
||||
}
|
||||
if (!projectSlug) throw new Error('Per-project meta needs a project slug');
|
||||
return join(GSTACK_HOME, 'projects', projectSlug, 'brain-cache', '_meta.json');
|
||||
}
|
||||
|
||||
function loadMeta(scope: 'cross-project' | 'per-project', projectSlug: string | null): CacheMeta {
|
||||
const path = metaPath(scope, projectSlug);
|
||||
if (!existsSync(path)) {
|
||||
return { schema_version: GSTACK_SCHEMA_PACK_VERSION, endpoint_hash: detectEndpointHash(), last_refresh: {}, last_attempt: {} };
|
||||
}
|
||||
try {
|
||||
return JSON.parse(readFileSync(path, 'utf-8')) as CacheMeta;
|
||||
} catch {
|
||||
// Corrupt _meta — start fresh (entries will refresh on next access).
|
||||
return { schema_version: GSTACK_SCHEMA_PACK_VERSION, endpoint_hash: detectEndpointHash(), last_refresh: {}, last_attempt: {} };
|
||||
}
|
||||
}
|
||||
|
||||
function saveMeta(scope: 'cross-project' | 'per-project', projectSlug: string | null, meta: CacheMeta): void {
|
||||
const path = metaPath(scope, projectSlug);
|
||||
mkdirSync(dirname(path), { recursive: true });
|
||||
atomicWrite(path, JSON.stringify(meta, null, 2));
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Endpoint hash detection
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
import { createHash } from 'crypto';
|
||||
|
||||
function sha8(input: string): string {
|
||||
return createHash('sha256').update(input).digest('hex').slice(0, 8);
|
||||
}
|
||||
|
||||
/**
|
||||
* Detects the active brain endpoint (MCP URL or 'local') and returns its
|
||||
* stable identity hash. Used to detect when the user switches brains
|
||||
* (different endpoint → different cache).
|
||||
*/
|
||||
export function detectEndpointHash(): string {
|
||||
const claudeJsonPath = join(homedir(), '.claude.json');
|
||||
if (existsSync(claudeJsonPath)) {
|
||||
try {
|
||||
const cfg = JSON.parse(readFileSync(claudeJsonPath, 'utf-8'));
|
||||
const gbrainServer = cfg?.mcpServers?.gbrain;
|
||||
const url = gbrainServer?.url || gbrainServer?.transport?.url;
|
||||
if (typeof url === 'string' && url.length > 0) {
|
||||
return sha8(url);
|
||||
}
|
||||
} catch { /* fall through to local */ }
|
||||
}
|
||||
// Local engine — no endpoint URL; use a stable literal hash.
|
||||
return 'local';
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Atomic write (tmp + rename)
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
function atomicWrite(path: string, content: string): void {
|
||||
mkdirSync(dirname(path), { recursive: true });
|
||||
const tmp = `${path}.tmp.${process.pid}.${Date.now()}`;
|
||||
writeFileSync(tmp, content, 'utf-8');
|
||||
renameSync(tmp, path);
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Staleness + refresh logic
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/** Returns true if the cached digest is past its TTL. */
|
||||
function isStale(entityName: string, meta: CacheMeta): boolean {
|
||||
const entity = BRAIN_CACHE_ENTITIES[entityName];
|
||||
if (!entity) return true;
|
||||
const last = meta.last_refresh[entityName];
|
||||
if (!last) return true;
|
||||
return Date.now() - last > entity.ttl_ms;
|
||||
}
|
||||
|
||||
/** Returns true if the cache file exists on disk. */
|
||||
function hasFile(entityName: string, projectSlug: string | null): boolean {
|
||||
return existsSync(entityPath(entityName, projectSlug));
|
||||
}
|
||||
|
||||
/** Returns true if schema version recorded in meta differs from current pack version. */
|
||||
function schemaVersionMismatch(meta: CacheMeta): boolean {
|
||||
return meta.schema_version !== GSTACK_SCHEMA_PACK_VERSION;
|
||||
}
|
||||
|
||||
/** Returns true if endpoint hash recorded in meta differs from current detected endpoint. */
|
||||
function endpointSwitched(meta: CacheMeta): boolean {
|
||||
return meta.endpoint_hash !== detectEndpointHash();
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Subcommand: get
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
interface GetResult {
|
||||
/** Path to the digest file. */
|
||||
path: string;
|
||||
/** Cache state: 'warm' (fresh + valid), 'cold-refreshed' (was stale, refreshed inline), 'stale-fallback' (used stale because refresh failed), 'missing' (no cache and no refresh). */
|
||||
state: 'warm' | 'cold-refreshed' | 'stale-fallback' | 'missing';
|
||||
/** Optional message for diagnostics. */
|
||||
message?: string;
|
||||
}
|
||||
|
||||
export function cmdGet(entityName: string, projectSlug: string | null): GetResult {
|
||||
const entity = BRAIN_CACHE_ENTITIES[entityName];
|
||||
if (!entity) throw new Error(`Unknown entity: ${entityName}`);
|
||||
const scope = entity.scope;
|
||||
const meta = loadMeta(scope, projectSlug);
|
||||
|
||||
// Schema-version mismatch → full rebuild (D4 A4).
|
||||
if (schemaVersionMismatch(meta) || endpointSwitched(meta)) {
|
||||
rebuildAllForScope(scope, projectSlug);
|
||||
// After rebuild, meta is fresh; fall through to warm path.
|
||||
const newMeta = loadMeta(scope, projectSlug);
|
||||
if (hasFile(entityName, projectSlug) && !isStale(entityName, newMeta)) {
|
||||
return { path: entityPath(entityName, projectSlug), state: 'warm' };
|
||||
}
|
||||
// Rebuild may have failed for this entity specifically.
|
||||
return { path: entityPath(entityName, projectSlug), state: 'missing', message: 'rebuild after schema/endpoint change' };
|
||||
}
|
||||
|
||||
if (hasFile(entityName, projectSlug) && !isStale(entityName, meta)) {
|
||||
return { path: entityPath(entityName, projectSlug), state: 'warm' };
|
||||
}
|
||||
|
||||
// Stale or missing — try cold refresh.
|
||||
const refreshed = refreshEntity(entityName, projectSlug);
|
||||
if (refreshed) {
|
||||
return { path: entityPath(entityName, projectSlug), state: 'cold-refreshed' };
|
||||
}
|
||||
// Refresh failed. Use stale-but-usable if file exists.
|
||||
if (hasFile(entityName, projectSlug)) {
|
||||
return { path: entityPath(entityName, projectSlug), state: 'stale-fallback', message: 'brain unreachable; using stale cache' };
|
||||
}
|
||||
// No cache and no refresh = missing.
|
||||
return { path: entityPath(entityName, projectSlug), state: 'missing', message: 'brain unreachable; no cache available' };
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Subcommand: refresh
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Lockfile dedup (T15 / D3)
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Returns the lock file path for a project scope. Cross-project entities
|
||||
* still lock per-project (the project triggering the refresh holds the lock);
|
||||
* concurrent attempts from different projects on cross-project entities
|
||||
* serialize naturally because they're rare and the lock window is short.
|
||||
*/
|
||||
function lockPath(projectSlug: string | null): string {
|
||||
const dir = projectSlug
|
||||
? join(GSTACK_HOME, 'projects', projectSlug, 'brain-cache')
|
||||
: join(GSTACK_HOME, 'brain-cache');
|
||||
return join(dir, '.refresh.lock');
|
||||
}
|
||||
|
||||
interface LockHandle {
|
||||
fd: number;
|
||||
path: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to acquire the refresh lock. Returns null when another process holds it
|
||||
* (and the lock is fresh). Stale locks (process dead OR older than the
|
||||
* timeout) are taken over.
|
||||
*/
|
||||
function tryAcquireLock(projectSlug: string | null): LockHandle | null {
|
||||
const path = lockPath(projectSlug);
|
||||
mkdirSync(dirname(path), { recursive: true });
|
||||
|
||||
// If a lock exists, see if it's stale
|
||||
if (existsSync(path)) {
|
||||
try {
|
||||
const raw = readFileSync(path, 'utf-8');
|
||||
const lock = JSON.parse(raw) as { pid: number; host: string; ts: number };
|
||||
const age = Date.now() - lock.ts;
|
||||
const sameHost = lock.host === hostname();
|
||||
const processGone = sameHost && lock.pid > 0 && !isPidAlive(lock.pid);
|
||||
if (age <= CACHE_REFRESH_LOCK_TIMEOUT_MS && !processGone) {
|
||||
return null; // someone else holds a fresh lock
|
||||
}
|
||||
// Stale: take over
|
||||
} catch {
|
||||
// Corrupt lock file → take over
|
||||
}
|
||||
}
|
||||
|
||||
// Write our lock (best-effort O_EXCL via tmp+rename for atomic creation)
|
||||
const payload = JSON.stringify({ pid: process.pid, host: hostname(), ts: Date.now() });
|
||||
const tmp = `${path}.tmp.${process.pid}.${Date.now()}`;
|
||||
try {
|
||||
writeFileSync(tmp, payload);
|
||||
renameSync(tmp, path);
|
||||
} catch (err) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Race: another process may have raced us. Re-read and verify ownership.
|
||||
try {
|
||||
const raw = readFileSync(path, 'utf-8');
|
||||
const lock = JSON.parse(raw) as { pid: number; host: string };
|
||||
if (lock.pid !== process.pid || lock.host !== hostname()) {
|
||||
return null;
|
||||
}
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
return { fd: -1, path };
|
||||
}
|
||||
|
||||
function releaseLock(handle: LockHandle): void {
|
||||
try { unlinkSync(handle.path); } catch { /* best effort */ }
|
||||
}
|
||||
|
||||
function isPidAlive(pid: number): boolean {
|
||||
try {
|
||||
process.kill(pid, 0);
|
||||
return true;
|
||||
} catch (err: any) {
|
||||
if (err?.code === 'EPERM') return true; // exists but we don't own it
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a refresh callback under the project-scoped lock. If another refresh is
|
||||
* already in flight, returns 'dedup' and the caller can either wait + retry
|
||||
* (the resolver does this) or fall through to stale-but-usable. Stale locks
|
||||
* (process dead, or older than CACHE_REFRESH_LOCK_TIMEOUT_MS) are taken over.
|
||||
*/
|
||||
export function withRefreshLock<T>(projectSlug: string | null, fn: () => T): T | 'dedup' {
|
||||
const handle = tryAcquireLock(projectSlug);
|
||||
if (!handle) return 'dedup';
|
||||
try {
|
||||
return fn();
|
||||
} finally {
|
||||
releaseLock(handle);
|
||||
}
|
||||
}
|
||||
|
||||
/** Refreshes one entity from the brain. Returns true on success. */
|
||||
export function refreshEntity(entityName: string, projectSlug: string | null): boolean {
|
||||
const entity = BRAIN_CACHE_ENTITIES[entityName];
|
||||
if (!entity) return false;
|
||||
|
||||
// Mark attempt
|
||||
const meta = loadMeta(entity.scope, projectSlug);
|
||||
meta.last_attempt = meta.last_attempt || {};
|
||||
meta.last_attempt[entityName] = Date.now();
|
||||
|
||||
// Fetch from brain. The actual fetch logic varies per entity — derived digests
|
||||
// (recent-decisions, salience) need different queries from direct page reads.
|
||||
// For T2a we implement the direct-page path; derived digests get filled in by
|
||||
// the resolver / write-back paths in later commits.
|
||||
const digestContent = fetchAndCompressEntity(entityName, projectSlug);
|
||||
if (digestContent === null) {
|
||||
saveMeta(entity.scope, projectSlug, meta);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Enforce per-entity budget by truncating from end (oldest items live there
|
||||
// by convention in our compressor). The per-skill budget is separately
|
||||
// enforced at preflight injection time.
|
||||
let final = digestContent;
|
||||
if (Buffer.byteLength(final, 'utf-8') > entity.budget_bytes) {
|
||||
final = truncateToBudget(final, entity.budget_bytes);
|
||||
}
|
||||
|
||||
atomicWrite(entityPath(entityName, projectSlug), final);
|
||||
meta.last_refresh[entityName] = Date.now();
|
||||
// Keep schema/endpoint identity fresh.
|
||||
meta.schema_version = GSTACK_SCHEMA_PACK_VERSION;
|
||||
meta.endpoint_hash = detectEndpointHash();
|
||||
saveMeta(entity.scope, projectSlug, meta);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Refresh all entities for a scope (per-project or cross-project).
|
||||
* Used by --full and by schema/endpoint-change rebuilds.
|
||||
*/
|
||||
export function refreshAll(projectSlug: string | null): { success: number; failed: number } {
|
||||
let success = 0;
|
||||
let failed = 0;
|
||||
for (const [name, entity] of Object.entries(BRAIN_CACHE_ENTITIES)) {
|
||||
// Cross-project entities only refresh when explicitly targeted via no-slug calls
|
||||
if (entity.scope === 'cross-project' && projectSlug) continue;
|
||||
if (entity.scope === 'per-project' && !projectSlug) continue;
|
||||
if (refreshEntity(name, projectSlug)) success++; else failed++;
|
||||
}
|
||||
return { success, failed };
|
||||
}
|
||||
|
||||
/** Rebuild on schema-version mismatch or endpoint switch. Wipes affected scope first. */
|
||||
function rebuildAllForScope(scope: 'cross-project' | 'per-project', projectSlug: string | null): void {
|
||||
// Wipe files but preserve dir; meta gets fully rewritten by refreshes below.
|
||||
for (const [name, entity] of Object.entries(BRAIN_CACHE_ENTITIES)) {
|
||||
if (entity.scope !== scope) continue;
|
||||
const p = entityPath(name, projectSlug);
|
||||
if (existsSync(p)) {
|
||||
try { unlinkSync(p); } catch { /* best effort */ }
|
||||
}
|
||||
}
|
||||
// Fresh meta starts here
|
||||
const fresh: CacheMeta = {
|
||||
schema_version: GSTACK_SCHEMA_PACK_VERSION,
|
||||
endpoint_hash: detectEndpointHash(),
|
||||
last_refresh: {},
|
||||
last_attempt: {},
|
||||
};
|
||||
saveMeta(scope, projectSlug, fresh);
|
||||
// Refresh all entities in this scope
|
||||
for (const [name, entity] of Object.entries(BRAIN_CACHE_ENTITIES)) {
|
||||
if (entity.scope !== scope) continue;
|
||||
refreshEntity(name, projectSlug);
|
||||
}
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Subcommand: invalidate
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
export function cmdInvalidate(entityName: string, projectSlug: string | null): void {
|
||||
const entity = BRAIN_CACHE_ENTITIES[entityName];
|
||||
if (!entity) throw new Error(`Unknown entity: ${entityName}`);
|
||||
const meta = loadMeta(entity.scope, projectSlug);
|
||||
delete meta.last_refresh[entityName];
|
||||
saveMeta(entity.scope, projectSlug, meta);
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Fetch + compress per-entity
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Returns the digest markdown content for an entity, or null if the brain is
|
||||
* unreachable / the source page doesn't exist.
|
||||
*
|
||||
* For T2a we implement the entity → page-slug mapping for the simple cases.
|
||||
* Derived digests (recent-decisions, salience) get specialized paths.
|
||||
*/
|
||||
function fetchAndCompressEntity(entityName: string, projectSlug: string | null): string | null {
|
||||
switch (entityName) {
|
||||
case 'user-profile':
|
||||
return fetchUserProfile();
|
||||
case 'product':
|
||||
return fetchProduct(projectSlug);
|
||||
case 'goals':
|
||||
return fetchGoals(projectSlug);
|
||||
case 'developer-persona':
|
||||
return fetchSimplePage(`gstack/developer-persona/${projectSlug}`);
|
||||
case 'brand':
|
||||
return fetchSimplePage(`gstack/brand/${projectSlug}`);
|
||||
case 'competitive-intel':
|
||||
return fetchSimplePage(`gstack/competitive-intel/${projectSlug}`);
|
||||
case 'recent-decisions':
|
||||
return fetchRecentDecisions(projectSlug);
|
||||
case 'salience':
|
||||
// D9 salience allowlist applied in T17 commit; T2a returns raw output for now.
|
||||
return fetchSalience(projectSlug);
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Generic single-page fetch via `gbrain get`. Returns null on miss/unreachable. */
|
||||
function fetchSimplePage(slug: string): string | null {
|
||||
const result = spawnGbrain(['get', slug, '--json'], { timeout: 10_000 });
|
||||
if (result.status !== 0) return null;
|
||||
try {
|
||||
const page = JSON.parse(result.stdout) as { body?: string; title?: string };
|
||||
if (!page?.body) return null;
|
||||
return compressPage(slug, page.title || slug, page.body);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function fetchUserProfile(): string | null {
|
||||
// The user-slug discovery is implemented in T16 (D4 A3). For T2a we accept
|
||||
// env GSTACK_USER_SLUG as override, fallback to $USER for direct calls.
|
||||
const slug = process.env.GSTACK_USER_SLUG || process.env.USER || 'unknown';
|
||||
return fetchSimplePage(`gstack/user-profile/${slug}`);
|
||||
}
|
||||
|
||||
function fetchProduct(projectSlug: string | null): string | null {
|
||||
if (!projectSlug) return null;
|
||||
return fetchSimplePage(`gstack/product/${projectSlug}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Goals are LIST queries: all gstack/goal/<project>/* pages.
|
||||
* Compress the top N by recency.
|
||||
*/
|
||||
function fetchGoals(projectSlug: string | null): string | null {
|
||||
if (!projectSlug) return null;
|
||||
const result = execGbrainJson<{ pages?: Array<{ slug: string; title?: string; body?: string }> }>([
|
||||
'list-pages',
|
||||
'--type', 'gstack/goal',
|
||||
'--limit', '10',
|
||||
'--json',
|
||||
]);
|
||||
if (!result?.pages) return null;
|
||||
const goals = result.pages.filter((p) => p.slug?.startsWith(`gstack/goal/${projectSlug}/`));
|
||||
if (goals.length === 0) {
|
||||
// Empty digest is valid (just header + 'no active goals' line)
|
||||
return `# Active goals (project: ${projectSlug})\n\n_No active goals recorded yet._\n`;
|
||||
}
|
||||
const lines = goals.map((g) => `- [[${g.slug}]] — ${g.title || '(untitled)'}`);
|
||||
return `# Active goals (project: ${projectSlug})\n\n${lines.join('\n')}\n`;
|
||||
}
|
||||
|
||||
/**
|
||||
* recent-decisions: last 5 gstack/skill-run pages for this project, compressed
|
||||
* to one-line summaries.
|
||||
*/
|
||||
function fetchRecentDecisions(projectSlug: string | null): string | null {
|
||||
if (!projectSlug) return null;
|
||||
const result = execGbrainJson<{ pages?: Array<{ slug: string; title?: string }> }>([
|
||||
'list-pages',
|
||||
'--type', 'gstack/skill-run',
|
||||
'--limit', '5',
|
||||
'--sort', 'updated_desc',
|
||||
'--json',
|
||||
]);
|
||||
if (!result?.pages) {
|
||||
return `# Recent decisions (project: ${projectSlug})\n\n_No prior skill runs recorded._\n`;
|
||||
}
|
||||
const lines = result.pages.map((p) => `- ${p.title || p.slug}`);
|
||||
return `# Recent decisions (project: ${projectSlug})\n\n${lines.join('\n')}\n`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the user's salience allowlist override from gstack-config. If unset,
|
||||
* returns SALIENCE_DEFAULT_ALLOWLIST. The override is comma-separated; we
|
||||
* trim and drop empty entries.
|
||||
*/
|
||||
export function getSalienceAllowlist(): ReadonlyArray<string> {
|
||||
// Short-circuit via env var for tests + headless callers.
|
||||
const env = process.env.GSTACK_SALIENCE_ALLOWLIST;
|
||||
if (typeof env === 'string' && env.length > 0) {
|
||||
return env.split(',').map((s) => s.trim()).filter(Boolean);
|
||||
}
|
||||
// Shell out to gstack-config with a tight timeout. Falls back to defaults
|
||||
// on any failure (config script missing, command non-zero, parse error).
|
||||
try {
|
||||
const skillRoot = join(homedir(), '.claude', 'skills', 'gstack');
|
||||
const bin = join(skillRoot, 'bin', 'gstack-config');
|
||||
if (!existsSync(bin)) return SALIENCE_DEFAULT_ALLOWLIST;
|
||||
const result = spawnSync(bin, ['get', 'salience_allowlist'], { timeout: 2000, encoding: 'utf-8' });
|
||||
if (result.status !== 0 || !result.stdout) return SALIENCE_DEFAULT_ALLOWLIST;
|
||||
const trimmed = result.stdout.trim();
|
||||
if (!trimmed) return SALIENCE_DEFAULT_ALLOWLIST;
|
||||
const parts = trimmed.split(',').map((s) => s.trim()).filter(Boolean);
|
||||
return parts.length > 0 ? parts : SALIENCE_DEFAULT_ALLOWLIST;
|
||||
} catch {
|
||||
return SALIENCE_DEFAULT_ALLOWLIST;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* D9 salience privacy gate: returns true if the slug starts with any allowlisted
|
||||
* prefix. Anything NOT matching is stripped at digest write time so that family,
|
||||
* therapy, reflection, and other sensitive content never leaks into work-flow
|
||||
* planning prompts by default.
|
||||
*/
|
||||
export function isSalienceSlugAllowed(slug: string, allowlist: ReadonlyArray<string>): boolean {
|
||||
for (const prefix of allowlist) {
|
||||
if (slug.startsWith(prefix)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function fetchSalience(projectSlug: string | null): string | null {
|
||||
// get-recent-salience is a gbrain CLI sub-shape; we use the MCP-shape JSON
|
||||
const result = execGbrainJson<{ pages?: Array<{ slug: string; title?: string; emotional_weight?: number }> }>([
|
||||
'get-recent-salience',
|
||||
'--days', '14',
|
||||
'--limit', '10',
|
||||
'--json',
|
||||
]);
|
||||
if (!result?.pages) return `# Recent salience\n\n_No salient pages in last 14d._\n`;
|
||||
|
||||
// D9 privacy gate: strip entries outside the allowlist BEFORE rendering.
|
||||
// Sensitive personal content (family, therapy, reflection) is never written
|
||||
// into the digest cache file, even when the brain itself ranks it salient.
|
||||
const allowlist = getSalienceAllowlist();
|
||||
const filtered = result.pages.filter((p) => p.slug && isSalienceSlugAllowed(p.slug, allowlist));
|
||||
const stripped = result.pages.length - filtered.length;
|
||||
if (filtered.length === 0) {
|
||||
const header = `# Recent salience (last 14d)`;
|
||||
const note = stripped > 0
|
||||
? `\n_All ${stripped} salient entries stripped by allowlist gate (no work-flow content in window)._\n`
|
||||
: `\n_No salient pages in last 14d._\n`;
|
||||
return `${header}\n${note}`;
|
||||
}
|
||||
const lines = filtered.map((p) => `- [[${p.slug}]] — ${p.title || ''} (weight: ${p.emotional_weight?.toFixed(2) ?? 'n/a'})`);
|
||||
const footer = stripped > 0
|
||||
? `\n\n_${stripped} private entries stripped by allowlist gate._`
|
||||
: '';
|
||||
return `# Recent salience (last 14d)\n\n${lines.join('\n')}${footer}\n`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compress a brain page body into a digest. The compressor keeps frontmatter
|
||||
* out, trims body to the first H2/H3 sections, and prepends a slug header.
|
||||
* Per-entity budget enforcement happens at the caller (refreshEntity).
|
||||
*/
|
||||
function compressPage(slug: string, title: string, body: string): string {
|
||||
const trimmed = body
|
||||
.replace(/^---[\s\S]*?---\s*\n/m, '') // strip frontmatter
|
||||
.trim();
|
||||
return `# ${title}\nslug: ${slug}\n\n${trimmed}\n`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncate a digest to a byte budget. Tries to cut at the last newline before
|
||||
* the budget so the digest stays readable.
|
||||
*/
|
||||
function truncateToBudget(content: string, budgetBytes: number): string {
|
||||
const buf = Buffer.from(content, 'utf-8');
|
||||
if (buf.byteLength <= budgetBytes) return content;
|
||||
const truncated = buf.slice(0, budgetBytes).toString('utf-8');
|
||||
const lastNewline = truncated.lastIndexOf('\n');
|
||||
const cleanCut = lastNewline > budgetBytes * 0.8 ? truncated.slice(0, lastNewline) : truncated;
|
||||
return `${cleanCut}\n\n_(digest truncated to ${budgetBytes}-byte budget)_\n`;
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Subcommand: digest
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Public: compress a brain page slug to digest format. Used by callers that
|
||||
* want to know what the digest WOULD look like without writing to cache.
|
||||
*/
|
||||
export function cmdDigest(slug: string): string | null {
|
||||
return fetchSimplePage(slug);
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Subcommand: meta
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
export function cmdMeta(projectSlug: string | null): CacheMeta {
|
||||
if (projectSlug) return loadMeta('per-project', projectSlug);
|
||||
return loadMeta('cross-project', null);
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Subcommand: bootstrap (T2b)
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Bootstrap synthesizes draft entity content from CLAUDE.md + README +
|
||||
* recent commits + learnings.jsonl for a fresh project. Emits as JSON for
|
||||
* the caller (skill template) to AUQ-confirm before any write to the brain.
|
||||
*
|
||||
* This keeps the CLI pure (no AUQ logic) while preventing silent
|
||||
* auto-extraction garbage (D10 T4 fix). The agent is responsible for the
|
||||
* "Synthesized X — looks right?" prompt per entity.
|
||||
*/
|
||||
export interface BootstrapDraft {
|
||||
product?: { slug: string; title: string; body: string };
|
||||
goals?: Array<{ slug: string; title: string; body: string }>;
|
||||
developer_persona?: { slug: string; title: string; body: string };
|
||||
brand?: { slug: string; title: string; body: string };
|
||||
competitive_intel?: { slug: string; title: string; body: string };
|
||||
}
|
||||
|
||||
export function cmdBootstrap(projectSlug: string): BootstrapDraft {
|
||||
const draft: BootstrapDraft = {};
|
||||
const repoRoot = process.env.GSTACK_REPO_ROOT || process.cwd();
|
||||
|
||||
// Product synthesis: CLAUDE.md headline + README first paragraph
|
||||
let claudeMd = '';
|
||||
try { claudeMd = readFileSync(join(repoRoot, 'CLAUDE.md'), 'utf-8'); } catch { /* missing is fine */ }
|
||||
let readmeMd = '';
|
||||
try { readmeMd = readFileSync(join(repoRoot, 'README.md'), 'utf-8'); } catch { /* missing is fine */ }
|
||||
|
||||
const productLead = synthesizeProductLead(claudeMd, readmeMd, projectSlug);
|
||||
if (productLead) {
|
||||
draft.product = {
|
||||
slug: `gstack/product/${projectSlug}`,
|
||||
title: projectSlug,
|
||||
body: productLead,
|
||||
};
|
||||
}
|
||||
|
||||
// Goals: try learnings.jsonl + recent commit messages mentioning "goal" or "ship"
|
||||
const learningsPath = join(GSTACK_HOME, 'projects', projectSlug, 'learnings.jsonl');
|
||||
const goalsHints = synthesizeGoalsHints(learningsPath, repoRoot);
|
||||
if (goalsHints.length > 0) {
|
||||
draft.goals = goalsHints.slice(0, 3).map((hint, idx) => ({
|
||||
slug: `gstack/goal/${projectSlug}/bootstrap-${idx + 1}`,
|
||||
title: hint.title,
|
||||
body: hint.body,
|
||||
}));
|
||||
}
|
||||
|
||||
return draft;
|
||||
}
|
||||
|
||||
function synthesizeProductLead(claudeMd: string, readmeMd: string, slug: string): string | null {
|
||||
// First H1 in CLAUDE.md or README, plus first paragraph after it.
|
||||
const source = claudeMd || readmeMd;
|
||||
if (!source) return null;
|
||||
const h1Match = source.match(/^#\s+(.+)$/m);
|
||||
const heading = h1Match?.[1]?.trim() || slug;
|
||||
// First non-heading paragraph
|
||||
const paraMatch = source.match(/(?:^|\n)([^#\n][^\n]+(?:\n[^#\n][^\n]+)*)/);
|
||||
const lead = paraMatch?.[1]?.trim() || '(no description found in CLAUDE.md or README)';
|
||||
return [
|
||||
`# ${heading}`,
|
||||
'',
|
||||
'## What',
|
||||
lead.slice(0, 500),
|
||||
'',
|
||||
'## Stage',
|
||||
'(fill in current stage, e.g., v1.x shipped, in development, paused)',
|
||||
'',
|
||||
'## Team',
|
||||
'(fill in team composition + size)',
|
||||
'',
|
||||
'## Active goals',
|
||||
'(populated by /office-hours over time)',
|
||||
'',
|
||||
'## Recent decisions',
|
||||
'(populated by /plan-ceo-review over time)',
|
||||
'',
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
function synthesizeGoalsHints(learningsPath: string, repoRoot: string): Array<{ title: string; body: string }> {
|
||||
const hints: Array<{ title: string; body: string }> = [];
|
||||
if (existsSync(learningsPath)) {
|
||||
try {
|
||||
const lines = readFileSync(learningsPath, 'utf-8').split('\n').filter(Boolean);
|
||||
for (const line of lines.slice(-10)) {
|
||||
try {
|
||||
const entry = JSON.parse(line);
|
||||
if (entry?.insight && (entry?.type === 'pattern' || entry?.type === 'architecture')) {
|
||||
hints.push({
|
||||
title: entry.insight.slice(0, 80),
|
||||
body: `Source: learnings.jsonl\nType: ${entry.type}\n\n${entry.insight}\n`,
|
||||
});
|
||||
}
|
||||
} catch { /* skip malformed line */ }
|
||||
}
|
||||
} catch { /* unreadable file, skip */ }
|
||||
}
|
||||
return hints;
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Subcommand: list (T18)
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Lists all gstack-owned pages currently in the brain for a project, grouped
|
||||
* by type. Powers the user's ability to audit what gstack has written.
|
||||
*/
|
||||
export function cmdList(projectSlug: string | null): Array<{ type: string; slug: string; title?: string }> {
|
||||
// We probe each gstack/<type>/ namespace via list-pages with a type filter.
|
||||
const types = ['gstack/user-profile', 'gstack/product', 'gstack/goal', 'gstack/developer-persona', 'gstack/brand', 'gstack/competitive-intel', 'gstack/skill-run', 'gstack/take'];
|
||||
const all: Array<{ type: string; slug: string; title?: string }> = [];
|
||||
for (const type of types) {
|
||||
const result = execGbrainJson<{ pages?: Array<{ slug: string; title?: string }> }>([
|
||||
'list-pages',
|
||||
'--type', type,
|
||||
'--limit', '200',
|
||||
'--json',
|
||||
]);
|
||||
if (!result?.pages) continue;
|
||||
for (const page of result.pages) {
|
||||
if (projectSlug && !page.slug?.includes(`/${projectSlug}`) && type !== 'gstack/user-profile') {
|
||||
continue;
|
||||
}
|
||||
all.push({ type, slug: page.slug, title: page.title });
|
||||
}
|
||||
}
|
||||
return all;
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Subcommand: purge (T18)
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Delete one gstack-owned page from the brain. Caller (skill template) is
|
||||
* responsible for the confirm prompt; this is the raw operation.
|
||||
*/
|
||||
export function cmdPurge(slug: string): { deleted: boolean; error?: string } {
|
||||
if (!slug.startsWith('gstack/')) {
|
||||
return { deleted: false, error: 'refusing to purge non-gstack page' };
|
||||
}
|
||||
const result = spawnGbrain(['delete-page', slug], { timeout: 10_000 });
|
||||
if (result.status !== 0) {
|
||||
return { deleted: false, error: result.stderr?.trim() || `exit ${result.status}` };
|
||||
}
|
||||
// Also invalidate any cached digests that referenced this page.
|
||||
// Best-effort — derived digests may need explicit invalidate.
|
||||
return { deleted: true };
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// CLI dispatch
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
function parseArgs(argv: string[]): { cmd: string; positional: string[]; flags: Record<string, string | boolean> } {
|
||||
const cmd = argv[2] || '';
|
||||
const rest = argv.slice(3);
|
||||
const positional: string[] = [];
|
||||
const flags: Record<string, string | boolean> = {};
|
||||
for (let i = 0; i < rest.length; i++) {
|
||||
const arg = rest[i];
|
||||
if (arg.startsWith('--')) {
|
||||
const key = arg.slice(2);
|
||||
const next = rest[i + 1];
|
||||
if (next && !next.startsWith('--')) {
|
||||
flags[key] = next;
|
||||
i++;
|
||||
} else {
|
||||
flags[key] = true;
|
||||
}
|
||||
} else {
|
||||
positional.push(arg);
|
||||
}
|
||||
}
|
||||
return { cmd, positional, flags };
|
||||
}
|
||||
|
||||
function projectSlugFromFlag(flags: Record<string, string | boolean>): string | null {
|
||||
const v = flags.project;
|
||||
return typeof v === 'string' ? v : null;
|
||||
}
|
||||
|
||||
function printUsage(): void {
|
||||
process.stderr.write(`Usage: gstack-brain-cache <subcommand>
|
||||
|
||||
Subcommands:
|
||||
get <entity-name> [--project <slug>]
|
||||
refresh [--full] [--entity X] [--project <slug>]
|
||||
invalidate <entity-name> [--project <slug>]
|
||||
digest <entity-slug>
|
||||
meta [--project <slug>]
|
||||
bootstrap --project <slug> — emit synthesized entity drafts (JSON)
|
||||
list [--project <slug>] — list gstack-owned pages in brain
|
||||
purge <slug> — delete a gstack-owned brain page (refuses non-gstack/ slugs)
|
||||
`);
|
||||
}
|
||||
|
||||
async function main(): Promise<number> {
|
||||
const { cmd, positional, flags } = parseArgs(process.argv);
|
||||
const projectSlug = projectSlugFromFlag(flags);
|
||||
|
||||
try {
|
||||
switch (cmd) {
|
||||
case 'get': {
|
||||
const entityName = positional[0];
|
||||
if (!entityName) { printUsage(); return 1; }
|
||||
const result = cmdGet(entityName, projectSlug);
|
||||
if (result.state === 'missing') {
|
||||
process.stderr.write(`(${result.state}: ${result.message ?? 'no cache'})\n`);
|
||||
return 2;
|
||||
}
|
||||
if (result.state !== 'warm') {
|
||||
process.stderr.write(`(${result.state}${result.message ? ': ' + result.message : ''})\n`);
|
||||
}
|
||||
process.stdout.write(readFileSync(result.path, 'utf-8'));
|
||||
return 0;
|
||||
}
|
||||
case 'refresh': {
|
||||
// D3: dedup concurrent refreshes via lockfile. Skipped (dedup) when
|
||||
// another process is already mid-refresh on the same project.
|
||||
if (flags.entity) {
|
||||
const entityName = String(flags.entity);
|
||||
const result = withRefreshLock(projectSlug, () => refreshEntity(entityName, projectSlug));
|
||||
if (result === 'dedup') {
|
||||
process.stderr.write(`(dedup: another refresh in flight)\n`);
|
||||
return 3;
|
||||
}
|
||||
process.stdout.write(result ? `refreshed ${entityName}\n` : `failed to refresh ${entityName}\n`);
|
||||
return result ? 0 : 1;
|
||||
}
|
||||
const allResult = withRefreshLock(projectSlug, () => refreshAll(projectSlug));
|
||||
if (allResult === 'dedup') {
|
||||
process.stderr.write(`(dedup: another refresh in flight)\n`);
|
||||
return 3;
|
||||
}
|
||||
process.stdout.write(`refreshed=${allResult.success} failed=${allResult.failed}\n`);
|
||||
return allResult.failed > 0 ? 1 : 0;
|
||||
}
|
||||
case 'invalidate': {
|
||||
const entityName = positional[0];
|
||||
if (!entityName) { printUsage(); return 1; }
|
||||
cmdInvalidate(entityName, projectSlug);
|
||||
process.stdout.write(`invalidated ${entityName}\n`);
|
||||
return 0;
|
||||
}
|
||||
case 'digest': {
|
||||
const slug = positional[0];
|
||||
if (!slug) { printUsage(); return 1; }
|
||||
const content = cmdDigest(slug);
|
||||
if (content === null) {
|
||||
process.stderr.write('brain unreachable or page not found\n');
|
||||
return 2;
|
||||
}
|
||||
process.stdout.write(content);
|
||||
return 0;
|
||||
}
|
||||
case 'meta': {
|
||||
const meta = cmdMeta(projectSlug);
|
||||
process.stdout.write(JSON.stringify(meta, null, 2) + '\n');
|
||||
return 0;
|
||||
}
|
||||
case 'bootstrap': {
|
||||
if (!projectSlug) {
|
||||
process.stderr.write('bootstrap requires --project <slug>\n');
|
||||
return 1;
|
||||
}
|
||||
const draft = cmdBootstrap(projectSlug);
|
||||
process.stdout.write(JSON.stringify(draft, null, 2) + '\n');
|
||||
return 0;
|
||||
}
|
||||
case 'list': {
|
||||
const pages = cmdList(projectSlug);
|
||||
if (flags.json) {
|
||||
process.stdout.write(JSON.stringify(pages, null, 2) + '\n');
|
||||
} else {
|
||||
for (const p of pages) {
|
||||
process.stdout.write(`${p.type}\t${p.slug}\t${p.title ?? ''}\n`);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
case 'purge': {
|
||||
const slug = positional[0];
|
||||
if (!slug) { printUsage(); return 1; }
|
||||
const result = cmdPurge(slug);
|
||||
if (result.deleted) {
|
||||
process.stdout.write(`deleted ${slug}\n`);
|
||||
return 0;
|
||||
}
|
||||
process.stderr.write(`failed: ${result.error}\n`);
|
||||
return 1;
|
||||
}
|
||||
case '':
|
||||
case 'help':
|
||||
case '--help':
|
||||
case '-h':
|
||||
printUsage();
|
||||
return 0;
|
||||
default:
|
||||
process.stderr.write(`unknown subcommand: ${cmd}\n`);
|
||||
printUsage();
|
||||
return 1;
|
||||
}
|
||||
} catch (err) {
|
||||
process.stderr.write(`error: ${err instanceof Error ? err.message : String(err)}\n`);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Only run main when invoked as a script (not when imported by tests)
|
||||
if (import.meta.main) {
|
||||
main().then((code) => process.exit(code));
|
||||
}
|
||||
|
|
@ -192,10 +192,7 @@ function resolveSkillFile(args: CliArgs): string | null {
|
|||
|
||||
function gbrainAvailable(): boolean {
|
||||
try {
|
||||
execFileSync("gbrain", ["--version"], {
|
||||
stdio: "ignore",
|
||||
timeout: MCP_TIMEOUT_MS,
|
||||
});
|
||||
execFileSync("command", ["-v", "gbrain"], { stdio: "ignore" });
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -136,11 +136,7 @@ def load_privacy_map(path):
|
|||
|
||||
allowlist_globs = load_lines(allowlist_path)
|
||||
privacy_map = load_privacy_map(privacy_path)
|
||||
# Normalize skip entries to the POSIX form queued paths use, so a backslash
|
||||
# entry in .brain-skip.txt still matches on Windows. The drain is the safety
|
||||
# boundary that actually stages files, so it must normalize identically to
|
||||
# discover_new — otherwise an explicitly-skipped file gets committed.
|
||||
skip_lines = {s.replace(os.sep, "/") for s in load_lines(skip_path)}
|
||||
skip_lines = set(load_lines(skip_path))
|
||||
|
||||
# Read queue; collect unique file paths.
|
||||
queue_paths = set()
|
||||
|
|
@ -257,8 +253,6 @@ subcmd_once() {
|
|||
|
||||
# Stage with git add -f (forces past .gitignore=*) explicit paths only.
|
||||
while IFS= read -r p; do
|
||||
p="${p%$'\r'}" # Windows: compute_paths_to_stage's python print() emits CRLF;
|
||||
# a trailing CR makes the pathspec match nothing (silent no-stage).
|
||||
[ -z "$p" ] && continue
|
||||
git -C "$GSTACK_HOME" add -f -- "$p" 2>/dev/null || true
|
||||
done < "$paths_file"
|
||||
|
|
@ -382,13 +376,10 @@ subcmd_discover_new() {
|
|||
exit 0
|
||||
fi
|
||||
# Walk allowlist globs; enqueue any file where mtime+size differs from cursor.
|
||||
python3 - "$GSTACK_HOME" "$ALLOWLIST" "$DISCOVER_CURSOR" <<'PYEOF' 2>/dev/null || true
|
||||
import sys, os, json, fnmatch
|
||||
from datetime import datetime, timezone
|
||||
python3 - "$GSTACK_HOME" "$ALLOWLIST" "$DISCOVER_CURSOR" "$SCRIPT_DIR/gstack-brain-enqueue" <<'PYEOF' 2>/dev/null || true
|
||||
import sys, os, json, glob, fnmatch, subprocess, hashlib
|
||||
|
||||
gstack_home, allowlist_path, cursor_path = sys.argv[1:4]
|
||||
queue_path = os.path.join(gstack_home, ".brain-queue.jsonl")
|
||||
skip_path = os.path.join(gstack_home, ".brain-skip.txt")
|
||||
gstack_home, allowlist_path, cursor_path, enqueue_bin = sys.argv[1:5]
|
||||
|
||||
def load_lines(path):
|
||||
try:
|
||||
|
|
@ -412,12 +403,8 @@ def save_cursor(path, data):
|
|||
pass
|
||||
|
||||
allowlist = load_lines(allowlist_path)
|
||||
# Normalize skip entries to the same POSIX form as `rel` below, so a
|
||||
# backslash entry in .brain-skip.txt still matches a normalized path on Windows.
|
||||
skip = {s.replace(os.sep, "/") for s in load_lines(skip_path)}
|
||||
cursor = load_cursor(cursor_path)
|
||||
new_cursor = dict(cursor)
|
||||
to_enqueue = []
|
||||
|
||||
# Walk all files under gstack_home, match against allowlist.
|
||||
for root, dirs, files in os.walk(gstack_home):
|
||||
|
|
@ -426,54 +413,22 @@ for root, dirs, files in os.walk(gstack_home):
|
|||
continue
|
||||
for name in files:
|
||||
full = os.path.join(root, name)
|
||||
# Repo paths are POSIX-relative. os.path.relpath yields backslash
|
||||
# separators on Windows, which never match the forward-slash allowlist
|
||||
# globs (e.g. "projects/*/learnings.jsonl"), so discovery silently
|
||||
# enqueued nothing under projects/ on Windows. Normalize to "/".
|
||||
rel = os.path.relpath(full, gstack_home).replace(os.sep, "/")
|
||||
rel = os.path.relpath(full, gstack_home)
|
||||
if rel.startswith(".brain-"):
|
||||
continue
|
||||
if not any(fnmatch.fnmatchcase(rel, pat) for pat in allowlist):
|
||||
continue
|
||||
if rel in skip:
|
||||
matched = any(fnmatch.fnmatchcase(rel, pat) for pat in allowlist)
|
||||
if not matched:
|
||||
continue
|
||||
try:
|
||||
st = os.stat(full)
|
||||
key = f"{int(st.st_mtime)}:{st.st_size}"
|
||||
except OSError:
|
||||
continue
|
||||
if cursor.get(rel) != key:
|
||||
to_enqueue.append((rel, key))
|
||||
|
||||
# Append to the queue directly. The previous implementation shelled out to
|
||||
# gstack-brain-enqueue once per file, but Windows Python cannot exec a
|
||||
# bash-shebang script (the spawn fails with a fork error), so discovery
|
||||
# enqueued nothing on Windows even after the path-match fix above.
|
||||
# Writing the queue line here is platform-agnostic; the drain step
|
||||
# (compute_paths_to_stage) still re-applies the skip-list + privacy filters.
|
||||
if to_enqueue:
|
||||
ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
try:
|
||||
# One atomic append per record (O_APPEND, each line < PIPE_BUF), matching
|
||||
# gstack-brain-enqueue's concurrency contract so a writer-shim append
|
||||
# running in parallel can't interleave mid-record. Buffered text writes
|
||||
# don't guarantee that. Compact separators match the shim's JSON shape.
|
||||
fd = os.open(queue_path, os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0o644)
|
||||
try:
|
||||
for rel, key in to_enqueue:
|
||||
rec = json.dumps({"file": rel, "ts": ts}, separators=(",", ":"))
|
||||
os.write(fd, (rec + "\n").encode("utf-8"))
|
||||
finally:
|
||||
os.close(fd)
|
||||
except OSError:
|
||||
# Queue write failed (disk full, AV file lock). Leave the cursor
|
||||
# unadvanced so these files are retried on the next discover instead of
|
||||
# being silently recorded as synced (which loses the change until the
|
||||
# file next changes).
|
||||
to_enqueue = []
|
||||
# Advance the cursor only for records actually written.
|
||||
for rel, key in to_enqueue:
|
||||
new_cursor[rel] = key
|
||||
prev = cursor.get(rel)
|
||||
if prev != key:
|
||||
# Enqueue via the shim (respects sync mode + skip list).
|
||||
subprocess.run([enqueue_bin, rel], check=False)
|
||||
new_cursor[rel] = key
|
||||
|
||||
save_cursor(cursor_path, new_cursor)
|
||||
PYEOF
|
||||
|
|
|
|||
|
|
@ -1,223 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# gstack-codex-session-import — backfill question-log.jsonl from Codex sessions.
|
||||
#
|
||||
# Codex has no AskUserQuestion tool (per docs/spikes/codex-session-format.md).
|
||||
# gstack skills running on Codex emit Decision Briefs as plain agent_message
|
||||
# text, and the user's response shows up in the next user_message. This
|
||||
# importer reconstructs those question/answer pairs from the structured
|
||||
# JSONL session files at ~/.codex/sessions/<date>/.
|
||||
#
|
||||
# Usage:
|
||||
# gstack-codex-session-import # latest session under ~/.codex/sessions/
|
||||
# gstack-codex-session-import <path/to.jsonl> # explicit session file
|
||||
# gstack-codex-session-import --since <iso> # all sessions newer than <iso>
|
||||
#
|
||||
# Recovery strategy (two-tier per D5/T4 spike):
|
||||
# 1. Marker-first: extract <gstack-qid:foo-bar> from agent_message → stable id.
|
||||
# 2. Pattern fallback: detect D<N> header + numbered options → hash id
|
||||
# (source=codex-import-pattern, never used as preference key per D18).
|
||||
#
|
||||
# Writes via bin/gstack-question-log so source tagging, dedup, and async
|
||||
# derive all apply uniformly.
|
||||
set -euo pipefail
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}"
|
||||
CODEX_SESSIONS_ROOT="${CODEX_SESSIONS_ROOT:-$HOME/.codex/sessions}"
|
||||
|
||||
MODE="latest"
|
||||
EXPLICIT_PATH=""
|
||||
SINCE_ISO=""
|
||||
|
||||
if [ $# -gt 0 ]; then
|
||||
case "$1" in
|
||||
--since)
|
||||
MODE="since"
|
||||
SINCE_ISO="${2:-}"
|
||||
;;
|
||||
--help|-h)
|
||||
sed -n '1,/^set -euo/p' "$0" | sed 's|^# \?||'
|
||||
exit 0
|
||||
;;
|
||||
-*)
|
||||
echo "unknown flag: $1" >&2
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
MODE="explicit"
|
||||
EXPLICIT_PATH="$1"
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
# Resolve list of session files to process.
|
||||
SESSION_FILES=()
|
||||
case "$MODE" in
|
||||
explicit)
|
||||
if [ ! -f "$EXPLICIT_PATH" ]; then
|
||||
echo "gstack-codex-session-import: file not found: $EXPLICIT_PATH" >&2
|
||||
exit 1
|
||||
fi
|
||||
SESSION_FILES=("$EXPLICIT_PATH")
|
||||
;;
|
||||
latest)
|
||||
if [ ! -d "$CODEX_SESSIONS_ROOT" ]; then
|
||||
echo "NO_SESSIONS: $CODEX_SESSIONS_ROOT does not exist"
|
||||
exit 0
|
||||
fi
|
||||
LATEST=$(find "$CODEX_SESSIONS_ROOT" -type f -name "rollout-*.jsonl" -print 2>/dev/null \
|
||||
| xargs ls -t 2>/dev/null | head -1 || true)
|
||||
if [ -z "$LATEST" ]; then
|
||||
echo "NO_SESSIONS: no rollout-*.jsonl files under $CODEX_SESSIONS_ROOT"
|
||||
exit 0
|
||||
fi
|
||||
SESSION_FILES=("$LATEST")
|
||||
;;
|
||||
since)
|
||||
if [ -z "$SINCE_ISO" ]; then
|
||||
echo "--since requires an ISO 8601 timestamp" >&2
|
||||
exit 1
|
||||
fi
|
||||
while IFS= read -r f; do
|
||||
SESSION_FILES+=("$f")
|
||||
done < <(find "$CODEX_SESSIONS_ROOT" -type f -name "rollout-*.jsonl" -newer <(date -u -d "$SINCE_ISO" 2>/dev/null || date -u) 2>/dev/null)
|
||||
;;
|
||||
esac
|
||||
|
||||
if [ ${#SESSION_FILES[@]} -eq 0 ]; then
|
||||
echo "NO_SESSIONS: nothing to import"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Parse + extract via bun. Emits one line per question found, ready to pipe
|
||||
# into gstack-question-log. Tagged with source so downstream consumers
|
||||
# (/plan-tune stats, dream cycle) can distinguish backfilled events from
|
||||
# live captures.
|
||||
IMPORTED=0
|
||||
SKIPPED_NO_ANSWER=0
|
||||
|
||||
for SESSION_FILE in "${SESSION_FILES[@]}"; do
|
||||
COUNT_LINE=$(SESSION_FILE_PATH="$SESSION_FILE" QLOG_BIN="$SCRIPT_DIR/gstack-question-log" bun -e '
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
const { spawnSync } = require("child_process");
|
||||
const crypto = require("crypto");
|
||||
|
||||
const sessionPath = process.env.SESSION_FILE_PATH;
|
||||
const qlogBin = process.env.QLOG_BIN;
|
||||
const lines = fs.readFileSync(sessionPath, "utf-8").trim().split("\n").filter(Boolean);
|
||||
|
||||
let meta = null;
|
||||
const stream = [];
|
||||
for (const ln of lines) {
|
||||
try {
|
||||
const e = JSON.parse(ln);
|
||||
if (e.type === "session_meta") meta = e.payload;
|
||||
else stream.push(e);
|
||||
} catch {}
|
||||
}
|
||||
if (!meta) {
|
||||
console.error("WARN: no session_meta in " + sessionPath);
|
||||
console.log("0 0");
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const cwd = meta.cwd || "";
|
||||
const sessionId = (meta.id || path.basename(sessionPath)).slice(0, 64);
|
||||
|
||||
// Walk for agent_message → next user_message pairs.
|
||||
const briefs = [];
|
||||
for (let i = 0; i < stream.length; i++) {
|
||||
const e = stream[i];
|
||||
if (e.type !== "event_msg" || e.payload?.type !== "agent_message") continue;
|
||||
const text = String(e.payload?.message || "");
|
||||
if (!text) continue;
|
||||
// Detect D-numbered brief or marker. Markers are sufficient on their own.
|
||||
const markerMatch = text.match(/<gstack-qid:([a-z0-9-]{1,64})>/i);
|
||||
const dMatch = text.match(/^D\d+[\.\d]*\s*[—\-]\s*(.+?)$/m);
|
||||
if (!markerMatch && !dMatch) continue;
|
||||
|
||||
// Find the next user_message in the stream.
|
||||
let answer = null;
|
||||
for (let j = i + 1; j < stream.length; j++) {
|
||||
const e2 = stream[j];
|
||||
if (e2.type === "event_msg" && e2.payload?.type === "user_message") {
|
||||
answer = String(e2.payload?.message || "").trim();
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!answer) continue;
|
||||
|
||||
// Extract options A) ... B) ... from the brief.
|
||||
const optMatches = [...text.matchAll(/^([A-Z])\)\s+(.+?)(?:\s+\(recommended\))?$/gm)];
|
||||
const options = optMatches.map((m) => m[2].trim());
|
||||
|
||||
// Identify recommended option (label first, prose fallback).
|
||||
let recommended;
|
||||
const recLabel = [...text.matchAll(/^([A-Z])\)\s+(.+?)\s+\(recommended\)$/gm)];
|
||||
if (recLabel.length === 1) recommended = recLabel[0][2].trim();
|
||||
|
||||
// Identify which option the user picked from their answer.
|
||||
// Look for "A" / "A) ..." / option-label prefix match.
|
||||
let userChoice = "__unknown__";
|
||||
const letterMatch = answer.match(/^\s*([A-Z])\b/);
|
||||
if (letterMatch) {
|
||||
const idx = letterMatch[1].charCodeAt(0) - 65;
|
||||
if (idx >= 0 && idx < options.length) userChoice = options[idx];
|
||||
else userChoice = letterMatch[1];
|
||||
} else if (options.length > 0) {
|
||||
const lower = answer.toLowerCase();
|
||||
const m = options.find((o) => lower.includes(o.toLowerCase().slice(0, 12)));
|
||||
if (m) userChoice = m;
|
||||
}
|
||||
if (userChoice === "__unknown__") {
|
||||
userChoice = answer.slice(0, 64);
|
||||
}
|
||||
|
||||
const summary = (dMatch?.[1] || text.split("\n")[0]).slice(0, 200);
|
||||
|
||||
let questionId, source;
|
||||
if (markerMatch) {
|
||||
questionId = markerMatch[1];
|
||||
source = "codex-import-marker";
|
||||
} else {
|
||||
const sortedOpts = [...options].sort().join("|");
|
||||
const h = crypto.createHash("sha1").update("codex::" + summary + "::" + sortedOpts).digest("hex").slice(0, 10);
|
||||
questionId = "hook-" + h;
|
||||
source = "codex-import-pattern";
|
||||
}
|
||||
|
||||
briefs.push({
|
||||
skill: "codex",
|
||||
question_id: questionId,
|
||||
question_summary: summary,
|
||||
options_count: options.length || 1,
|
||||
user_choice: userChoice.slice(0, 64),
|
||||
...(recommended ? { recommended: recommended.slice(0, 64) } : {}),
|
||||
source,
|
||||
session_id: sessionId,
|
||||
// Use ts_nanos+ts shape from the event itself if available; else null.
|
||||
ts: e.timestamp || undefined,
|
||||
});
|
||||
}
|
||||
|
||||
let imported = 0;
|
||||
for (const b of briefs) {
|
||||
const res = spawnSync(qlogBin, [JSON.stringify(b)], {
|
||||
encoding: "utf-8",
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
// Run from the originating cwd so gstack-slug bucks events into the
|
||||
// right project. Falls back to the importer cwd if the session cwd
|
||||
// no longer exists.
|
||||
cwd: cwd && fs.existsSync(cwd) ? cwd : undefined,
|
||||
timeout: 5000,
|
||||
});
|
||||
if (res.status === 0) imported++;
|
||||
}
|
||||
console.log(imported + " 0");
|
||||
' 2>&1)
|
||||
|
||||
IMP=$(echo "$COUNT_LINE" | awk "{print \$1}")
|
||||
IMPORTED=$((IMPORTED + IMP))
|
||||
done
|
||||
|
||||
echo "IMPORTED: $IMPORTED events from ${#SESSION_FILES[@]} session(s)"
|
||||
|
|
@ -8,13 +8,11 @@
|
|||
# gstack-config defaults — show just the defaults table
|
||||
#
|
||||
# Env overrides (for testing):
|
||||
# GSTACK_STATE_ROOT — override ~/.gstack state directory (highest priority,
|
||||
# matches D16 cathedral isolation convention)
|
||||
# GSTACK_HOME — override ~/.gstack state directory (aligns with writer scripts)
|
||||
# GSTACK_STATE_DIR — legacy alias for GSTACK_HOME (kept for backwards compat)
|
||||
set -euo pipefail
|
||||
|
||||
STATE_DIR="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-${GSTACK_STATE_DIR:-$HOME/.gstack}}}"
|
||||
STATE_DIR="${GSTACK_HOME:-${GSTACK_STATE_DIR:-$HOME/.gstack}}"
|
||||
CONFIG_FILE="$STATE_DIR/config.yaml"
|
||||
|
||||
# Annotated header for new config files. Written once on first `set`.
|
||||
|
|
@ -75,16 +73,6 @@ CONFIG_HEADER='# gstack configuration — edit freely, changes take effect on ne
|
|||
# # Set to true once the privacy gate has asked the user.
|
||||
# # Flip back to false to be re-prompted.
|
||||
#
|
||||
# ─── Plan-tune hooks ─────────────────────────────────────────────────
|
||||
# plan_tune_hooks: prompt # Controls whether ./setup installs the plan-tune
|
||||
# # Claude Code hooks (PostToolUse capture +
|
||||
# # PreToolUse preference enforcement).
|
||||
# # prompt — ask on a real TTY, skip otherwise (default)
|
||||
# # yes — install non-interactively
|
||||
# # no — skip non-interactively
|
||||
# # Override per-run: ./setup --plan-tune-hooks /
|
||||
# # --no-plan-tune-hooks, or env GSTACK_PLAN_TUNE_HOOKS.
|
||||
#
|
||||
# ─── Advanced ────────────────────────────────────────────────────────
|
||||
# codex_reviews: enabled # disabled = skip Codex adversarial reviews in /ship
|
||||
# gstack_contributor: false # true = file field reports when gstack misbehaves
|
||||
|
|
@ -112,7 +100,6 @@ lookup_default() {
|
|||
skill_prefix) echo "false" ;;
|
||||
checkpoint_mode) echo "explicit" ;;
|
||||
checkpoint_push) echo "false" ;;
|
||||
explain_level) echo "default" ;;
|
||||
codex_reviews) echo "enabled" ;;
|
||||
gstack_contributor) echo "false" ;;
|
||||
skip_eng_review) echo "false" ;;
|
||||
|
|
@ -120,145 +107,19 @@ lookup_default() {
|
|||
cross_project_learnings) echo "" ;; # intentionally empty → unset triggers first-time prompt
|
||||
artifacts_sync_mode) echo "off" ;;
|
||||
artifacts_sync_mode_prompted) echo "false" ;;
|
||||
plan_tune_hooks) echo "prompt" ;; # prompt | yes | no — controls ./setup plan-tune hook install
|
||||
|
||||
redact_repo_visibility) echo "" ;; # empty → fall through to gh/glab detection
|
||||
redact_prepush_hook) echo "false" ;;
|
||||
# Brain-aware planning (v1.48 / T5+T10+T16). Defaults documented inline:
|
||||
# brain_trust_policy@<hash> — unset on fresh install; setup-gbrain
|
||||
# writes 'personal' for local engines,
|
||||
# asks the user for remote-ambiguous.
|
||||
# salience_allowlist — empty falls through to
|
||||
# SALIENCE_DEFAULT_ALLOWLIST (D9).
|
||||
# user_slug_at_<hash> — empty triggers resolve-user-slug
|
||||
# fallback chain (D4 A3) on first call.
|
||||
brain_trust_policy*) echo "unset" ;;
|
||||
salience_allowlist) echo "" ;;
|
||||
user_slug_at_*) echo "" ;;
|
||||
*) echo "" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Brain-integration helpers (T5+T10+T16)
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
# Compute sha8 of a string. Used for endpoint hashing.
|
||||
sha8_of() {
|
||||
printf '%s' "$1" | shasum -a 256 | cut -c1-8
|
||||
}
|
||||
|
||||
# Detect the active brain endpoint hash. Reads ~/.claude.json for the gbrain
|
||||
# MCP server URL. Falls back to the literal 'local' when no MCP is configured.
|
||||
endpoint_hash() {
|
||||
_claude_json="$HOME/.claude.json"
|
||||
if [ -f "$_claude_json" ] && command -v jq >/dev/null 2>&1; then
|
||||
_url=$(jq -r '.mcpServers.gbrain.url // .mcpServers.gbrain.transport.url // empty' "$_claude_json" 2>/dev/null)
|
||||
if [ -n "$_url" ] && [ "$_url" != "null" ]; then
|
||||
sha8_of "$_url"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
printf '%s' "local"
|
||||
}
|
||||
|
||||
# Detect endpoint hash collisions. When two distinct endpoints share the same
|
||||
# sha8 prefix (rare but possible), escalate to sha16 by emitting the longer
|
||||
# hash. Detection: scan config file for existing brain_trust_policy@<hash> or
|
||||
# user_slug_at_<hash> keys; if any non-active hash equals the active sha8 but
|
||||
# would differ at sha16, the active endpoint needs sha16.
|
||||
endpoint_hash_with_collision_check() {
|
||||
_active=$(endpoint_hash)
|
||||
if [ "$_active" = "local" ]; then
|
||||
printf '%s' "$_active"
|
||||
return 0
|
||||
fi
|
||||
# If a different endpoint (different URL) shares this sha8, escalate.
|
||||
# We only catch this when the config has another endpoint recorded.
|
||||
_matching=$(grep -E "^(brain_trust_policy|user_slug_at)@${_active}" "$CONFIG_FILE" 2>/dev/null | head -1 || true)
|
||||
_claude_json="$HOME/.claude.json"
|
||||
if [ -n "$_matching" ] && [ -f "$_claude_json" ] && command -v jq >/dev/null 2>&1; then
|
||||
_url=$(jq -r '.mcpServers.gbrain.url // .mcpServers.gbrain.transport.url // empty' "$_claude_json" 2>/dev/null)
|
||||
_sha16=$(printf '%s' "$_url" | shasum -a 256 | cut -c1-16)
|
||||
# Look for any sha16-namespaced key that conflicts. If a stored sha16 exists
|
||||
# and differs from current sha16, that's the collision evidence; emit sha16.
|
||||
_stored16=$(grep -E "^(brain_trust_policy|user_slug_at)@${_sha16}" "$CONFIG_FILE" 2>/dev/null | head -1 || true)
|
||||
if [ -n "$_stored16" ]; then
|
||||
printf '%s' "$_sha16"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
printf '%s' "$_active"
|
||||
}
|
||||
|
||||
# Resolve the user-slug per D4 A3 chain:
|
||||
# 1. mcp__gbrain__whoami.client_name (best effort via gbrain CLI shell-out)
|
||||
# 2. $USER env
|
||||
# 3. sha8($(git config user.email))
|
||||
# 4. anonymous-<sha8(hostname)>
|
||||
# Persists result via gstack-config set user_slug_at_<endpoint-hash> on first call.
|
||||
resolve_user_slug() {
|
||||
_hash=$(endpoint_hash_with_collision_check)
|
||||
_stored=$(grep -E "^user_slug_at_${_hash}:" "$CONFIG_FILE" 2>/dev/null | tail -1 | awk '{print $2}' | tr -d '[:space:]' || true)
|
||||
if [ -n "$_stored" ]; then
|
||||
printf '%s' "$_stored"
|
||||
return 0
|
||||
fi
|
||||
|
||||
_slug=""
|
||||
|
||||
# Layer 1: gbrain whoami
|
||||
if command -v gbrain >/dev/null 2>&1; then
|
||||
_whoami=$(gbrain whoami --json 2>/dev/null || true)
|
||||
if [ -n "$_whoami" ] && command -v jq >/dev/null 2>&1; then
|
||||
_client_name=$(printf '%s' "$_whoami" | jq -r '.client_name // .token_name // empty' 2>/dev/null || true)
|
||||
if [ -n "$_client_name" ] && [ "$_client_name" != "null" ]; then
|
||||
_slug=$(printf '%s' "$_client_name" | tr '[:upper:] ' '[:lower:]-' | tr -dc '[:alnum:]-')
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Layer 2: $USER
|
||||
if [ -z "$_slug" ] && [ -n "${USER:-}" ]; then
|
||||
_slug=$(printf '%s' "$USER" | tr '[:upper:] ' '[:lower:]-' | tr -dc '[:alnum:]-')
|
||||
fi
|
||||
|
||||
# Layer 3: sha8 of git email
|
||||
if [ -z "$_slug" ]; then
|
||||
_email=$(git config user.email 2>/dev/null || true)
|
||||
if [ -n "$_email" ]; then
|
||||
_slug="email-$(sha8_of "$_email")"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Layer 4: anonymous-<sha8(hostname)>
|
||||
if [ -z "$_slug" ]; then
|
||||
_slug="anonymous-$(sha8_of "$(hostname 2>/dev/null || echo unknown)")"
|
||||
fi
|
||||
|
||||
# Persist via direct file write (avoid recursion into gstack-config set)
|
||||
mkdir -p "$STATE_DIR"
|
||||
if [ ! -f "$CONFIG_FILE" ]; then
|
||||
printf '%s' "$CONFIG_HEADER" > "$CONFIG_FILE"
|
||||
fi
|
||||
if ! grep -qE "^user_slug_at_${_hash}:" "$CONFIG_FILE" 2>/dev/null; then
|
||||
echo "user_slug_at_${_hash}: ${_slug}" >> "$CONFIG_FILE"
|
||||
fi
|
||||
|
||||
printf '%s' "$_slug"
|
||||
}
|
||||
|
||||
case "${1:-}" in
|
||||
get)
|
||||
KEY="${2:?Usage: gstack-config get <key>}"
|
||||
# Validate key (alphanumeric + underscore + optional @<hash> suffix for
|
||||
# endpoint-namespaced keys introduced by the brain-aware planning layer)
|
||||
if ! printf '%s' "$KEY" | grep -qE '^[a-zA-Z0-9_]+(@[a-f0-9]+)?$'; then
|
||||
echo "Error: key must contain only alphanumeric characters, underscores, and an optional @<hex-hash> suffix" >&2
|
||||
# Validate key (alphanumeric + underscore only)
|
||||
if ! printf '%s' "$KEY" | grep -qE '^[a-zA-Z0-9_]+$'; then
|
||||
echo "Error: key must contain only alphanumeric characters and underscores" >&2
|
||||
exit 1
|
||||
fi
|
||||
# Use literal match for keys containing @ (sha hashes), regex otherwise
|
||||
VALUE=$(grep -F "${KEY}:" "$CONFIG_FILE" 2>/dev/null | grep -E "^${KEY%@*}(@[a-f0-9]+)?:" | grep -F "${KEY}:" | tail -1 | awk '{print $2}' | tr -d '[:space:]' || true)
|
||||
VALUE=$(grep -E "^${KEY}:" "$CONFIG_FILE" 2>/dev/null | tail -1 | awk '{print $2}' | tr -d '[:space:]' || true)
|
||||
if [ -z "$VALUE" ]; then
|
||||
VALUE=$(lookup_default "$KEY")
|
||||
fi
|
||||
|
|
@ -267,17 +128,11 @@ case "${1:-}" in
|
|||
set)
|
||||
KEY="${2:?Usage: gstack-config set <key> <value>}"
|
||||
VALUE="${3:?Usage: gstack-config set <key> <value>}"
|
||||
# Validate key (alphanumeric + underscore + optional @<hash> suffix)
|
||||
if ! printf '%s' "$KEY" | grep -qE '^[a-zA-Z0-9_]+(@[a-f0-9]+)?$'; then
|
||||
echo "Error: key must contain only alphanumeric characters, underscores, and an optional @<hex-hash> suffix" >&2
|
||||
# Validate key (alphanumeric + underscore only)
|
||||
if ! printf '%s' "$KEY" | grep -qE '^[a-zA-Z0-9_]+$'; then
|
||||
echo "Error: key must contain only alphanumeric characters and underscores" >&2
|
||||
exit 1
|
||||
fi
|
||||
# Validate brain_trust_policy value domain (D4 / D11)
|
||||
if printf '%s' "$KEY" | grep -qE '^brain_trust_policy(@|$)' && \
|
||||
[ "$VALUE" != "personal" ] && [ "$VALUE" != "shared" ] && [ "$VALUE" != "unset" ]; then
|
||||
echo "Warning: brain_trust_policy '$VALUE' not recognized. Valid values: personal, shared, unset. Using unset." >&2
|
||||
VALUE="unset"
|
||||
fi
|
||||
# V1: whitelist values for keys with closed value domains. Unknown values warn + default.
|
||||
if [ "$KEY" = "explain_level" ] && [ "$VALUE" != "default" ] && [ "$VALUE" != "terse" ]; then
|
||||
echo "Warning: explain_level '$VALUE' not recognized. Valid values: default, terse. Using default." >&2
|
||||
|
|
@ -287,21 +142,6 @@ case "${1:-}" in
|
|||
echo "Warning: artifacts_sync_mode '$VALUE' not recognized. Valid values: off, artifacts-only, full. Using off." >&2
|
||||
VALUE="off"
|
||||
fi
|
||||
# redact_repo_visibility: a LOCAL override for repos gh/glab can't read (e.g.
|
||||
# self-hosted GitLab). It lives in ~/.gstack/config.yaml (never committed), so
|
||||
# it can't be used to weaken the gate repo-wide for other contributors.
|
||||
if [ "$KEY" = "redact_repo_visibility" ] && [ "$VALUE" != "public" ] && [ "$VALUE" != "private" ] && [ "$VALUE" != "unknown" ]; then
|
||||
echo "Warning: redact_repo_visibility '$VALUE' not recognized. Valid values: public, private, unknown. Using unknown." >&2
|
||||
VALUE="unknown"
|
||||
fi
|
||||
if [ "$KEY" = "redact_prepush_hook" ] && [ "$VALUE" != "true" ] && [ "$VALUE" != "false" ]; then
|
||||
echo "Warning: redact_prepush_hook '$VALUE' not recognized. Valid values: true, false. Using false." >&2
|
||||
VALUE="false"
|
||||
fi
|
||||
if [ "$KEY" = "plan_tune_hooks" ] && [ "$VALUE" != "prompt" ] && [ "$VALUE" != "yes" ] && [ "$VALUE" != "no" ]; then
|
||||
echo "Warning: plan_tune_hooks '$VALUE' not recognized. Valid values: prompt, yes, no. Using prompt." >&2
|
||||
VALUE="prompt"
|
||||
fi
|
||||
mkdir -p "$STATE_DIR"
|
||||
# Write annotated header on first creation
|
||||
if [ ! -f "$CONFIG_FILE" ]; then
|
||||
|
|
@ -329,9 +169,9 @@ case "${1:-}" in
|
|||
echo ""
|
||||
echo "# ─── Active values (including defaults for unset keys) ───"
|
||||
for KEY in proactive routing_declined telemetry auto_upgrade update_check \
|
||||
skill_prefix checkpoint_mode checkpoint_push explain_level \
|
||||
codex_reviews gstack_contributor skip_eng_review workspace_root \
|
||||
artifacts_sync_mode artifacts_sync_mode_prompted plan_tune_hooks; do
|
||||
skill_prefix checkpoint_mode checkpoint_push codex_reviews \
|
||||
gstack_contributor skip_eng_review workspace_root \
|
||||
artifacts_sync_mode artifacts_sync_mode_prompted; do
|
||||
VALUE=$(grep -E "^${KEY}:" "$CONFIG_FILE" 2>/dev/null | tail -1 | awk '{print $2}' | tr -d '[:space:]' || true)
|
||||
SOURCE="default"
|
||||
if [ -n "$VALUE" ]; then
|
||||
|
|
@ -345,68 +185,14 @@ case "${1:-}" in
|
|||
defaults)
|
||||
echo "# gstack-config defaults"
|
||||
for KEY in proactive routing_declined telemetry auto_upgrade update_check \
|
||||
skill_prefix checkpoint_mode checkpoint_push explain_level \
|
||||
codex_reviews gstack_contributor skip_eng_review workspace_root \
|
||||
artifacts_sync_mode artifacts_sync_mode_prompted plan_tune_hooks; do
|
||||
skill_prefix checkpoint_mode checkpoint_push codex_reviews \
|
||||
gstack_contributor skip_eng_review workspace_root \
|
||||
artifacts_sync_mode artifacts_sync_mode_prompted; do
|
||||
printf ' %-24s %s\n' "$KEY:" "$(lookup_default "$KEY")"
|
||||
done
|
||||
;;
|
||||
endpoint-hash)
|
||||
# Brain integration helper (T10): print active brain endpoint sha8
|
||||
endpoint_hash_with_collision_check
|
||||
;;
|
||||
resolve-user-slug)
|
||||
# Brain integration helper (T16 / D4 A3): resolve + persist user-slug
|
||||
resolve_user_slug
|
||||
;;
|
||||
gbrain-refresh)
|
||||
# Brain integration helper: re-detect gbrain installation state and
|
||||
# persist to ~/.gstack/gbrain-detection.json. gen-skill-docs reads this
|
||||
# file (when invoked with --respect-detection) to decide whether to
|
||||
# render GBRAIN_CONTEXT_LOAD and GBRAIN_SAVE_RESULTS blocks in
|
||||
# generated SKILL.md files.
|
||||
#
|
||||
# Run this after installing or uninstalling gbrain so your locally
|
||||
# generated SKILL.md files match your installation state.
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
DETECT_BIN="$SCRIPT_DIR/gstack-gbrain-detect"
|
||||
DETECTION_FILE="$STATE_DIR/gbrain-detection.json"
|
||||
mkdir -p "$STATE_DIR"
|
||||
if [ ! -x "$DETECT_BIN" ]; then
|
||||
echo "gstack-gbrain-detect not found at $DETECT_BIN" >&2
|
||||
exit 1
|
||||
fi
|
||||
if ! "$DETECT_BIN" > "$DETECTION_FILE.tmp" 2>/dev/null; then
|
||||
printf '{"gbrain_on_path":false,"gbrain_local_status":"no-cli"}\n' > "$DETECTION_FILE.tmp"
|
||||
fi
|
||||
mv "$DETECTION_FILE.tmp" "$DETECTION_FILE"
|
||||
|
||||
# Summarize for the user. Use python (already required elsewhere) to
|
||||
# parse the JSON portably; fall back to grep if python is unavailable.
|
||||
PYTHON_CMD=$(command -v python3 || command -v python || true)
|
||||
if [ -n "$PYTHON_CMD" ]; then
|
||||
STATUS=$("$PYTHON_CMD" -c "import json,sys; d=json.load(open('$DETECTION_FILE')); print(d.get('gbrain_local_status','unknown'))" 2>/dev/null || echo unknown)
|
||||
VERSION=$("$PYTHON_CMD" -c "import json,sys; d=json.load(open('$DETECTION_FILE')); print(d.get('gbrain_version') or 'unknown')" 2>/dev/null || echo unknown)
|
||||
else
|
||||
STATUS=$(grep -o '"gbrain_local_status":[[:space:]]*"[^"]*"' "$DETECTION_FILE" | sed 's/.*"\([^"]*\)"$/\1/')
|
||||
VERSION=$(grep -o '"gbrain_version":[[:space:]]*"[^"]*"' "$DETECTION_FILE" | sed 's/.*"\([^"]*\)"$/\1/')
|
||||
[ -z "$STATUS" ] && STATUS=unknown
|
||||
[ -z "$VERSION" ] && VERSION=unknown
|
||||
fi
|
||||
|
||||
case "$STATUS" in
|
||||
ok)
|
||||
echo "Detected gbrain v$VERSION → brain-aware blocks will render in planning-skill SKILL.md files."
|
||||
echo "Run 'bun run gen:skill-docs' in the gstack repo (or re-run ./setup) to regenerate now."
|
||||
;;
|
||||
*)
|
||||
echo "gbrain not detected (local-status: $STATUS) → brain-aware blocks will be suppressed in planning-skill SKILL.md files."
|
||||
echo "Install gbrain (see /setup-gbrain) and re-run 'gstack-config gbrain-refresh' once it's configured."
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
*)
|
||||
echo "Usage: gstack-config {get|set|list|defaults|endpoint-hash|resolve-user-slug|gbrain-refresh} [key] [value]"
|
||||
echo "Usage: gstack-config {get|set|list|defaults} [key] [value]"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
|
|
|||
|
|
@ -17,9 +17,6 @@
|
|||
# --check-mismatch detect meaningful gaps between declared and observed.
|
||||
# --migrate migrate builder-profile.jsonl → developer-profile.json.
|
||||
# Idempotent; archives the source file on success.
|
||||
# --log-session append a session entry (from /office-hours) to
|
||||
# sessions[] and update aggregates. Required fields:
|
||||
# date, mode. Silent skip on invalid input.
|
||||
#
|
||||
# Profile file: ~/.gstack/developer-profile.json (unified schema — see
|
||||
# docs/designs/PLAN_TUNING_V0.md). Event file: ~/.gstack/projects/{SLUG}/
|
||||
|
|
@ -28,8 +25,7 @@ set -euo pipefail
|
|||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
# GSTACK_STATE_ROOT takes precedence over GSTACK_HOME (test isolation per D16).
|
||||
GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}"
|
||||
GSTACK_HOME="${GSTACK_HOME:-$HOME/.gstack}"
|
||||
PROFILE_FILE="$GSTACK_HOME/developer-profile.json"
|
||||
LEGACY_FILE="$GSTACK_HOME/builder-profile.jsonl"
|
||||
eval "$("$SCRIPT_DIR/gstack-slug" 2>/dev/null || true)"
|
||||
|
|
@ -158,65 +154,6 @@ ensure_profile() {
|
|||
EOF
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Record session: append a session entry from /office-hours to sessions[]
|
||||
# and update aggregates (signals_accumulated, resources_shown, topics).
|
||||
# Fix for #1671: the writer side of the v1.0.0.0 migration. Reader and
|
||||
# writer now share the same file.
|
||||
# Silent skip on invalid input (matches gstack-timeline-log:22-26 pattern).
|
||||
# -----------------------------------------------------------------------
|
||||
do_log_session() {
|
||||
local INPUT="${1:-}"
|
||||
if [ -z "$INPUT" ]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Validate: input must be parseable JSON with required fields (date, mode).
|
||||
if ! printf '%s' "$INPUT" | bun -e "
|
||||
const j = JSON.parse(await Bun.stdin.text());
|
||||
if (!j.date || !j.mode) process.exit(1);
|
||||
" 2>/dev/null; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
ensure_profile
|
||||
|
||||
local TMPOUT
|
||||
TMPOUT=$(mktemp "$GSTACK_HOME/developer-profile.json.XXXXXX.tmp")
|
||||
trap 'rm -f "$TMPOUT"' EXIT
|
||||
|
||||
PROFILE_FILE_PATH="$PROFILE_FILE" RECORD_INPUT="$INPUT" TMPOUT_PATH="$TMPOUT" bun -e "
|
||||
const fs = require('fs');
|
||||
const entry = JSON.parse(process.env.RECORD_INPUT);
|
||||
if (!entry.ts) entry.ts = new Date().toISOString();
|
||||
|
||||
const profile = JSON.parse(fs.readFileSync(process.env.PROFILE_FILE_PATH, 'utf-8'));
|
||||
profile.sessions = profile.sessions || [];
|
||||
profile.sessions.push(entry);
|
||||
|
||||
profile.signals_accumulated = profile.signals_accumulated || {};
|
||||
for (const s of (entry.signals || [])) {
|
||||
profile.signals_accumulated[s] = (profile.signals_accumulated[s] || 0) + 1;
|
||||
}
|
||||
|
||||
profile.resources_shown = profile.resources_shown || [];
|
||||
const resSet = new Set(profile.resources_shown);
|
||||
for (const r of (entry.resources_shown || [])) resSet.add(r);
|
||||
profile.resources_shown = Array.from(resSet);
|
||||
|
||||
profile.topics = profile.topics || [];
|
||||
const topicSet = new Set(profile.topics);
|
||||
for (const t of (entry.topics || [])) topicSet.add(t);
|
||||
profile.topics = Array.from(topicSet);
|
||||
|
||||
fs.writeFileSync(process.env.TMPOUT_PATH, JSON.stringify(profile, null, 2));
|
||||
"
|
||||
|
||||
mv "$TMPOUT" "$PROFILE_FILE"
|
||||
trap - EXIT
|
||||
"$SCRIPT_DIR/gstack-brain-enqueue" "developer-profile.json" 2>/dev/null &
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Read: emit legacy KEY: VALUE output for /office-hours compat.
|
||||
# -----------------------------------------------------------------------
|
||||
|
|
@ -231,19 +168,14 @@ do_read() {
|
|||
else if (count >= 4) tier = 'regular';
|
||||
else if (count >= 1) tier = 'welcome_back';
|
||||
|
||||
// LAST_* / CROSS_PROJECT must reflect real sessions, not resource-tracking
|
||||
// events (the Phase 6 auto-append). Without this filter, a session's
|
||||
// resources entry written immediately after the real session would clobber
|
||||
// LAST_PROJECT/LAST_ASSIGNMENT/LAST_DESIGN_TITLE.
|
||||
const realSessions = sessions.filter(e => e.mode !== 'resources');
|
||||
const last = realSessions[realSessions.length - 1] || {};
|
||||
const prev = realSessions[realSessions.length - 2] || {};
|
||||
const last = sessions[count - 1] || {};
|
||||
const prev = sessions[count - 2] || {};
|
||||
const crossProject = prev.project_slug && last.project_slug
|
||||
? prev.project_slug !== last.project_slug
|
||||
: false;
|
||||
|
||||
const designs = realSessions.map(e => e.design_doc || '').filter(Boolean);
|
||||
const designTitles = realSessions
|
||||
const designs = sessions.map(e => e.design_doc || '').filter(Boolean);
|
||||
const designTitles = sessions
|
||||
.map(e => (e.design_doc ? (e.project_slug || 'unknown') : ''))
|
||||
.filter(Boolean);
|
||||
|
||||
|
|
@ -509,7 +441,6 @@ case "$CMD" in
|
|||
--vibe) do_vibe ;;
|
||||
--check-mismatch) do_check_mismatch ;;
|
||||
--migrate) do_migrate ;;
|
||||
--log-session) do_log_session "$@" ;;
|
||||
--help|-h) sed -n '1,/^set -euo/p' "$0" | sed 's|^# \?||' ;;
|
||||
*)
|
||||
echo "gstack-developer-profile: unknown subcommand '$CMD'" >&2
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ while IFS= read -r f; do
|
|||
*.md) DOCS=true ;;
|
||||
|
||||
# Config
|
||||
package.json|package-lock.json|yarn.lock|bun.lock|bun.lockb) CONFIG=true ;;
|
||||
package.json|package-lock.json|yarn.lock|bun.lockb) CONFIG=true ;;
|
||||
Gemfile|Gemfile.lock) CONFIG=true ;;
|
||||
*.yml|*.yaml) CONFIG=true ;;
|
||||
.github/*) CONFIG=true ;;
|
||||
|
|
|
|||
|
|
@ -1,181 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# gstack-distill-apply — apply a single distillation proposal after user Y.
|
||||
#
|
||||
# Plan-tune cathedral T11. Reads distillation-proposals.json, applies the
|
||||
# Nth proposal to the right surface:
|
||||
#
|
||||
# preference → gstack-question-preference --write
|
||||
# declared-nudge → atomic update to ~/.gstack/developer-profile.json declared
|
||||
# memory-nugget → append to ~/.gstack/free-text-memory.json (local fallback)
|
||||
#
|
||||
# Always confirm before calling this from the skill — the bin assumes the user
|
||||
# already approved (Codex #15 trust boundary). The skill template (/plan-tune
|
||||
# distill review section) handles the confirm UX.
|
||||
#
|
||||
# gbrain integration: when gbrain is configured, the skill template ALSO
|
||||
# invokes mcp__gbrain__put_page / extract_facts / add_tag in the same turn
|
||||
# (those are MCP tools, not CLI-callable). Pass --gbrain-published true to
|
||||
# mark the proposal as mirrored to gbrain. The local file always gets the
|
||||
# write so it's the durable source-of-truth even on machines without gbrain.
|
||||
#
|
||||
# Usage:
|
||||
# gstack-distill-apply --proposal <N> # apply Nth proposal
|
||||
# gstack-distill-apply --proposal <N> --gbrain-published true
|
||||
# gstack-distill-apply --list # show pending proposals
|
||||
set -euo pipefail
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}"
|
||||
eval "$("$SCRIPT_DIR/gstack-slug" 2>/dev/null || true)"
|
||||
SLUG="${SLUG:-unknown}"
|
||||
PROJECT_DIR="$GSTACK_HOME/projects/$SLUG"
|
||||
PROPOSAL_FILE="$PROJECT_DIR/distillation-proposals.json"
|
||||
MEMORY_FILE="$GSTACK_HOME/free-text-memory.json"
|
||||
PROFILE_FILE="$GSTACK_HOME/developer-profile.json"
|
||||
|
||||
ACTION="apply"
|
||||
PROPOSAL_IDX=""
|
||||
GBRAIN_PUBLISHED="false"
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--proposal) PROPOSAL_IDX="$2"; shift 2 ;;
|
||||
--gbrain-published) GBRAIN_PUBLISHED="$2"; shift 2 ;;
|
||||
--list) ACTION="list"; shift ;;
|
||||
--help|-h)
|
||||
sed -n '1,/^set -euo/p' "$0" | sed 's|^# \?||'
|
||||
exit 0
|
||||
;;
|
||||
*) echo "unknown arg: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ ! -f "$PROPOSAL_FILE" ]; then
|
||||
echo "NO_PROPOSALS: $PROPOSAL_FILE missing — run gstack-distill-free-text first"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ "$ACTION" = "list" ]; then
|
||||
PROPOSAL_FILE_PATH="$PROPOSAL_FILE" bun -e '
|
||||
const fs = require("fs");
|
||||
const p = JSON.parse(fs.readFileSync(process.env.PROPOSAL_FILE_PATH, "utf-8"));
|
||||
const proposals = p.proposals || [];
|
||||
if (proposals.length === 0) { console.log("(no proposals)"); process.exit(0); }
|
||||
console.log("GENERATED: " + p.generated_at);
|
||||
console.log("SOURCE_EVENTS: " + (p.source_event_count || 0));
|
||||
proposals.forEach((pr, i) => {
|
||||
console.log("");
|
||||
console.log("[" + i + "] " + (pr.kind || "?") + " (confidence: " + (pr.confidence || "?") + ")");
|
||||
if (pr.rationale) console.log(" rationale: " + pr.rationale);
|
||||
if (pr.kind === "preference") {
|
||||
console.log(" question_id: " + pr.question_id);
|
||||
console.log(" preference: " + pr.preference);
|
||||
} else if (pr.kind === "declared-nudge") {
|
||||
console.log(" dimension: " + pr.dimension);
|
||||
console.log(" direction: " + pr.direction + " (" + (pr.magnitude || "?") + ")");
|
||||
} else if (pr.kind === "memory-nugget") {
|
||||
console.log(" nugget: " + pr.nugget);
|
||||
console.log(" signal_keys: " + JSON.stringify(pr.applies_to_signal_keys || []));
|
||||
}
|
||||
if (pr.source_quotes && pr.source_quotes.length) {
|
||||
console.log(" quotes:");
|
||||
pr.source_quotes.forEach((q) => console.log(" - \"" + q + "\""));
|
||||
}
|
||||
});
|
||||
'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ -z "$PROPOSAL_IDX" ]; then
|
||||
echo "--proposal <N> required" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Apply via bun. Each kind has its own surface.
|
||||
mkdir -p "$PROJECT_DIR"
|
||||
PROPOSAL_IDX="$PROPOSAL_IDX" \
|
||||
PROPOSAL_FILE_PATH="$PROPOSAL_FILE" \
|
||||
MEMORY_FILE_PATH="$MEMORY_FILE" \
|
||||
PROFILE_FILE_PATH="$PROFILE_FILE" \
|
||||
PREF_BIN="$SCRIPT_DIR/gstack-question-preference" \
|
||||
GBRAIN_PUBLISHED="$GBRAIN_PUBLISHED" \
|
||||
bun -e '
|
||||
const fs = require("fs");
|
||||
const { spawnSync } = require("child_process");
|
||||
const idx = parseInt(process.env.PROPOSAL_IDX, 10);
|
||||
const p = JSON.parse(fs.readFileSync(process.env.PROPOSAL_FILE_PATH, "utf-8"));
|
||||
const proposals = p.proposals || [];
|
||||
if (!Number.isInteger(idx) || idx < 0 || idx >= proposals.length) {
|
||||
process.stderr.write("invalid --proposal index " + idx + " (have " + proposals.length + ")\n");
|
||||
process.exit(1);
|
||||
}
|
||||
const pr = proposals[idx];
|
||||
|
||||
const stamp = new Date().toISOString();
|
||||
|
||||
// Memory-nugget: always write to local file (durable source-of-truth even
|
||||
// when gbrain is configured — gbrain is mirror, file is canon for the
|
||||
// PreToolUse hook injection path in Layer 8).
|
||||
if (pr.kind === "memory-nugget") {
|
||||
const memPath = process.env.MEMORY_FILE_PATH;
|
||||
let mem = { nuggets: [] };
|
||||
try { mem = JSON.parse(fs.readFileSync(memPath, "utf-8")); } catch {}
|
||||
if (!Array.isArray(mem.nuggets)) mem.nuggets = [];
|
||||
mem.nuggets.push({
|
||||
nugget: pr.nugget,
|
||||
applies_to_signal_keys: pr.applies_to_signal_keys || [],
|
||||
applied_at: stamp,
|
||||
gbrain_published: process.env.GBRAIN_PUBLISHED === "true",
|
||||
source_quotes: pr.source_quotes || [],
|
||||
});
|
||||
const tmp = memPath + ".tmp";
|
||||
fs.writeFileSync(tmp, JSON.stringify(mem, null, 2));
|
||||
fs.renameSync(tmp, memPath);
|
||||
console.log("APPLIED: memory-nugget appended to " + memPath);
|
||||
}
|
||||
|
||||
// Preference: route through gstack-question-preference for the user-origin
|
||||
// gate + event audit trail. source=plan-tune is the allowed value since
|
||||
// the user opt-in came from inside /plan-tune.
|
||||
if (pr.kind === "preference") {
|
||||
const res = spawnSync(process.env.PREF_BIN, [
|
||||
"--write",
|
||||
JSON.stringify({
|
||||
question_id: pr.question_id,
|
||||
preference: pr.preference,
|
||||
source: "plan-tune",
|
||||
free_text: (pr.source_quotes || []).join(" | ").slice(0, 300),
|
||||
}),
|
||||
], { encoding: "utf-8", stdio: ["ignore", "pipe", "pipe"], timeout: 5000 });
|
||||
if (res.status !== 0) {
|
||||
process.stderr.write("preference apply failed: " + (res.stderr || res.stdout) + "\n");
|
||||
process.exit(1);
|
||||
}
|
||||
console.log("APPLIED: preference " + pr.question_id + " → " + pr.preference);
|
||||
}
|
||||
|
||||
// Declared-nudge: atomic update to developer-profile.json declared. Magnitude
|
||||
// tiers: small=0.05, medium=0.10, large=0.15. Clamp to [0, 1].
|
||||
if (pr.kind === "declared-nudge") {
|
||||
const mag = { small: 0.05, medium: 0.10, large: 0.15 }[pr.magnitude || "small"] || 0.05;
|
||||
const delta = pr.direction === "down" ? -mag : mag;
|
||||
const profilePath = process.env.PROFILE_FILE_PATH;
|
||||
let profile = {};
|
||||
try { profile = JSON.parse(fs.readFileSync(profilePath, "utf-8")); } catch {}
|
||||
profile.declared = profile.declared || {};
|
||||
const cur = typeof profile.declared[pr.dimension] === "number" ? profile.declared[pr.dimension] : 0.5;
|
||||
const next = Math.max(0, Math.min(1, cur + delta));
|
||||
profile.declared[pr.dimension] = +next.toFixed(3);
|
||||
profile.declared_at = stamp;
|
||||
const tmp = profilePath + ".tmp";
|
||||
fs.writeFileSync(tmp, JSON.stringify(profile, null, 2));
|
||||
fs.renameSync(tmp, profilePath);
|
||||
console.log("APPLIED: declared." + pr.dimension + " " + cur + " → " + profile.declared[pr.dimension]);
|
||||
}
|
||||
|
||||
// Mark the proposal as applied so /plan-tune list shows it consumed.
|
||||
pr.applied_at = stamp;
|
||||
pr.gbrain_published = process.env.GBRAIN_PUBLISHED === "true";
|
||||
const tmp = process.env.PROPOSAL_FILE_PATH + ".tmp";
|
||||
fs.writeFileSync(tmp, JSON.stringify(p, null, 2));
|
||||
fs.renameSync(tmp, process.env.PROPOSAL_FILE_PATH);
|
||||
'
|
||||
|
|
@ -1,272 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# gstack-distill-free-text — Layer 8 "dream cycle" batch distiller.
|
||||
#
|
||||
# Reads auq-other free-text events from this project's question-log.jsonl,
|
||||
# sends them to Claude via the Anthropic SDK, and writes structured proposals
|
||||
# the user can review via /plan-tune distill. Proposals require explicit
|
||||
# user Y before applying — never autonomous (Codex #15 trust boundary).
|
||||
#
|
||||
# Usage:
|
||||
# gstack-distill-free-text # sync, prompts at end
|
||||
# gstack-distill-free-text --background # spawn detached; results
|
||||
# # surface on next /plan-tune
|
||||
# gstack-distill-free-text --dry-run # show prompt, no API call
|
||||
# gstack-distill-free-text --status # show last-run stats
|
||||
#
|
||||
# No rate cap — the natural rate of free-text events (rare; user has to type
|
||||
# "Other" then content) bounds this loop already. Each Haiku call is ~$0.01,
|
||||
# so even a runaway at one-per-minute would be ~$14/day worst case. The
|
||||
# cumulative cost log at $GSTACK_STATE_ROOT/distill-cost.jsonl gives full
|
||||
# auditability via --status when you want it.
|
||||
# Per D6: Anthropic SDK direct call, fail-loud on missing ANTHROPIC_API_KEY.
|
||||
set -euo pipefail
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}"
|
||||
eval "$("$SCRIPT_DIR/gstack-slug" 2>/dev/null || true)"
|
||||
SLUG="${SLUG:-unknown}"
|
||||
PROJECT_DIR="$GSTACK_HOME/projects/$SLUG"
|
||||
LOG_FILE="$PROJECT_DIR/question-log.jsonl"
|
||||
PROPOSAL_FILE="$PROJECT_DIR/distillation-proposals.json"
|
||||
COST_LOG="$GSTACK_HOME/distill-cost.jsonl"
|
||||
mkdir -p "$PROJECT_DIR"
|
||||
|
||||
MODE="sync"
|
||||
case "${1:-}" in
|
||||
--background) MODE="background" ;;
|
||||
--dry-run) MODE="dry-run" ;;
|
||||
--status) MODE="status" ;;
|
||||
--help|-h)
|
||||
sed -n '1,/^set -euo/p' "$0" | sed 's|^# \?||'
|
||||
exit 0
|
||||
;;
|
||||
'') ;;
|
||||
*) echo "unknown arg: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
|
||||
# --- Status subcommand --------------------------------------------------
|
||||
|
||||
if [ "$MODE" = "status" ]; then
|
||||
COST_LOG_PATH="$COST_LOG" SLUG_PATH="$SLUG" bun -e '
|
||||
const fs = require("fs");
|
||||
const slug = process.env.SLUG_PATH;
|
||||
const path = process.env.COST_LOG_PATH;
|
||||
if (!fs.existsSync(path)) { console.log("no distill runs yet"); process.exit(0); }
|
||||
const lines = fs.readFileSync(path, "utf-8").trim().split("\n").filter(Boolean);
|
||||
const mine = lines.map((l) => JSON.parse(l)).filter((e) => e.slug === slug);
|
||||
if (mine.length === 0) { console.log("no distill runs yet for slug=" + slug); process.exit(0); }
|
||||
const totalUsd = mine.reduce((a, e) => a + (e.cost_usd_est || 0), 0);
|
||||
const todayIso = new Date().toISOString().slice(0, 10);
|
||||
const today = mine.filter((e) => (e.ts || "").startsWith(todayIso));
|
||||
const todayUsd = today.reduce((a, e) => a + (e.cost_usd_est || 0), 0);
|
||||
console.log("RUNS: " + mine.length);
|
||||
console.log("TODAY: " + today.length + " run(s), $" + todayUsd.toFixed(4));
|
||||
console.log("ESTIMATED_TOTAL_USD: $" + totalUsd.toFixed(4));
|
||||
const last = mine[mine.length - 1];
|
||||
console.log("LAST_RUN: " + (last.ts || "?") + " | " + (last.proposals_count || 0) + " proposals");
|
||||
'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# --- Background mode: detach + invoke self synchronously ---------------
|
||||
|
||||
if [ "$MODE" = "background" ]; then
|
||||
nohup "$0" >/dev/null 2>&1 &
|
||||
echo "DISTILL_SPAWNED: pid=$!"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# No rate cap. Natural input rate (free-text events are rare) + Haiku price
|
||||
# (~$0.01/run) keep this bounded. Use --status to audit spend.
|
||||
|
||||
# --- Gather unprocessed auq-other events from this project -------------
|
||||
|
||||
if [ ! -f "$LOG_FILE" ]; then
|
||||
echo "NO_LOG: no question-log.jsonl in $PROJECT_DIR"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
EVENTS_JSON=$(LOG_FILE_PATH="$LOG_FILE" bun -e '
|
||||
const fs = require("fs");
|
||||
const lines = fs.readFileSync(process.env.LOG_FILE_PATH, "utf-8").trim().split("\n").filter(Boolean);
|
||||
const out = [];
|
||||
for (const l of lines) {
|
||||
try {
|
||||
const e = JSON.parse(l);
|
||||
if (e.source === "auq-other" && !e.distilled_at && e.free_text) {
|
||||
out.push({
|
||||
ts: e.ts,
|
||||
question_id: e.question_id,
|
||||
question_summary: e.question_summary,
|
||||
free_text: e.free_text,
|
||||
session_id: e.session_id,
|
||||
});
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
process.stdout.write(JSON.stringify(out));
|
||||
')
|
||||
|
||||
EVENT_COUNT=$(printf '%s' "$EVENTS_JSON" | bun -e 'const a = JSON.parse(await Bun.stdin.text()); console.log(a.length);')
|
||||
if [ "$EVENT_COUNT" -eq 0 ]; then
|
||||
echo "NO_FREE_TEXT: nothing to distill"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# --- Build distill prompt ---------------------------------------------
|
||||
|
||||
# Heredoc into temp file (avoids $(cat <<'PROMPT'...) which choked the
|
||||
# bash parser on apostrophes elsewhere in the script).
|
||||
DISTILL_PROMPT_FILE=$(mktemp)
|
||||
trap 'rm -f "$DISTILL_PROMPT_FILE"' EXIT
|
||||
cat > "$DISTILL_PROMPT_FILE" <<'PROMPT'
|
||||
You are gstack dream-cycle distiller. Below are free-text responses the
|
||||
user typed into AskUserQuestion prompts (option "Other") across recent gstack
|
||||
sessions. For each response, extract structured signal that should update the
|
||||
user plan-tune profile or preferences.
|
||||
|
||||
Return strict JSON with this shape:
|
||||
{
|
||||
"proposals": [
|
||||
{
|
||||
"kind": "preference" | "declared-nudge" | "memory-nugget",
|
||||
"confidence": 0.0-1.0,
|
||||
"source_quotes": ["<verbatim quote 1>", "<verbatim quote 2>"],
|
||||
"question_id": "<id>",
|
||||
"preference": "never-ask" | "always-ask" | "ask-only-for-one-way",
|
||||
"dimension": "scope_appetite | risk_tolerance | detail_preference | autonomy | architecture_care",
|
||||
"direction": "up | down",
|
||||
"magnitude": "small | medium | large",
|
||||
"rationale": "<one sentence>",
|
||||
"nugget": "<one-line memory>",
|
||||
"applies_to_signal_keys": ["scope-appetite", "..."]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
Rules:
|
||||
- Reject any proposal where confidence < 0.7.
|
||||
- Quote VERBATIM from the user free_text. Never paraphrase a source quote.
|
||||
- A single user response may produce multiple proposals.
|
||||
- If nothing meaningful to extract, return {"proposals": []}.
|
||||
- No commentary outside the JSON.
|
||||
PROMPT
|
||||
DISTILL_PROMPT=$(cat "$DISTILL_PROMPT_FILE")
|
||||
|
||||
# --- Dry-run: emit prompt + events, exit ------------------------------
|
||||
|
||||
if [ "$MODE" = "dry-run" ]; then
|
||||
echo "=== DISTILL PROMPT ==="
|
||||
echo "$DISTILL_PROMPT"
|
||||
echo
|
||||
echo "=== EVENTS ($EVENT_COUNT) ==="
|
||||
echo "$EVENTS_JSON" | bun -e 'console.log(JSON.stringify(JSON.parse(await Bun.stdin.text()), null, 2));'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# --- SDK call: fail-loud on missing key -------------------------------
|
||||
|
||||
if [ -z "${ANTHROPIC_API_KEY:-}" ]; then
|
||||
cat <<EOF >&2
|
||||
gstack-distill-free-text: ANTHROPIC_API_KEY not set.
|
||||
|
||||
Dream-cycle distillation needs an API key for the SDK call. Set
|
||||
ANTHROPIC_API_KEY in your environment, or run with --dry-run to see
|
||||
what would be sent without actually calling.
|
||||
|
||||
Note: this is a separate billing/auth surface from your interactive
|
||||
Claude Code session (per Codex correction in D6).
|
||||
EOF
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Run the SDK call in bun. Emits JSON: {proposals_count, cost_usd_est}.
|
||||
RESULT=$(EVENTS_JSON="$EVENTS_JSON" DISTILL_PROMPT="$DISTILL_PROMPT" \
|
||||
PROPOSAL_FILE_PATH="$PROPOSAL_FILE" LOG_FILE_PATH="$LOG_FILE" \
|
||||
ANTHROPIC_API_KEY="$ANTHROPIC_API_KEY" \
|
||||
bun --cwd "$ROOT_DIR" -e '
|
||||
const fs = require("fs");
|
||||
const Anthropic = require("@anthropic-ai/sdk").default;
|
||||
const client = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY });
|
||||
|
||||
const events = JSON.parse(process.env.EVENTS_JSON);
|
||||
const prompt = process.env.DISTILL_PROMPT + "\n\nFREE-TEXT RESPONSES (JSON array):\n" + JSON.stringify(events, null, 2);
|
||||
|
||||
// Pricing (Haiku 4.5 — cheap, fast, sufficient for structured extraction).
|
||||
// Per token, USD: input $0.001/1k = 1e-6, output $0.005/1k = 5e-6.
|
||||
const INPUT_PER_TOKEN = 1e-6;
|
||||
const OUTPUT_PER_TOKEN = 5e-6;
|
||||
|
||||
const resp = await client.messages.create({
|
||||
model: "claude-haiku-4-5-20251001",
|
||||
max_tokens: 4096,
|
||||
messages: [{ role: "user", content: prompt }],
|
||||
});
|
||||
|
||||
const text = resp.content.map((b) => (b.type === "text" ? b.text : "")).join("");
|
||||
|
||||
// Strip optional fenced code blocks the model may wrap JSON in.
|
||||
const stripped = text.replace(/^```(?:json)?\s*/i, "").replace(/```\s*$/i, "").trim();
|
||||
let parsed;
|
||||
try { parsed = JSON.parse(stripped); } catch (e) {
|
||||
process.stderr.write("DISTILL: model returned non-JSON: " + text.slice(0, 200) + "\n");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const proposals = Array.isArray(parsed.proposals) ? parsed.proposals : [];
|
||||
// Keep only proposals with confidence >= 0.7 (model is told this rule;
|
||||
// double-check in case it slipped).
|
||||
const filtered = proposals.filter((p) => typeof p.confidence === "number" && p.confidence >= 0.7);
|
||||
|
||||
// Write proposals file (overwrite — only the latest run is reviewable).
|
||||
fs.writeFileSync(process.env.PROPOSAL_FILE_PATH, JSON.stringify({
|
||||
generated_at: new Date().toISOString(),
|
||||
source_event_count: events.length,
|
||||
proposals: filtered,
|
||||
}, null, 2));
|
||||
|
||||
// Mark source events as distilled_at so they do not re-propose.
|
||||
// Update question-log.jsonl in place: read all, rewrite with distilled_at
|
||||
// set on the matching events. Match by ts + question_id.
|
||||
const logPath = process.env.LOG_FILE_PATH;
|
||||
const distilledAt = new Date().toISOString();
|
||||
const matchKeys = new Set(events.map((e) => (e.ts || "") + "::" + (e.question_id || "")));
|
||||
const lines = fs.readFileSync(logPath, "utf-8").split("\n");
|
||||
const out = [];
|
||||
for (const ln of lines) {
|
||||
if (!ln.trim()) { out.push(ln); continue; }
|
||||
try {
|
||||
const e = JSON.parse(ln);
|
||||
const key = (e.ts || "") + "::" + (e.question_id || "");
|
||||
if (matchKeys.has(key)) {
|
||||
e.distilled_at = distilledAt;
|
||||
out.push(JSON.stringify(e));
|
||||
} else {
|
||||
out.push(ln);
|
||||
}
|
||||
} catch { out.push(ln); }
|
||||
}
|
||||
fs.writeFileSync(logPath, out.join("\n"));
|
||||
|
||||
// Cost estimate from usage tokens.
|
||||
const usage = resp.usage || {};
|
||||
const inTok = usage.input_tokens || 0;
|
||||
const outTok = usage.output_tokens || 0;
|
||||
const cost = inTok * INPUT_PER_TOKEN + outTok * OUTPUT_PER_TOKEN;
|
||||
|
||||
process.stdout.write(JSON.stringify({
|
||||
proposals_count: filtered.length,
|
||||
rejected_low_confidence: proposals.length - filtered.length,
|
||||
input_tokens: inTok,
|
||||
output_tokens: outTok,
|
||||
cost_usd_est: cost,
|
||||
}));
|
||||
')
|
||||
|
||||
# Append cost log line.
|
||||
TS=$(date -u +%Y-%m-%dT%H:%M:%SZ)
|
||||
echo "{\"ts\":\"$TS\",\"slug\":\"$SLUG\",$(echo "$RESULT" | sed 's/^{//; s/}$//')}" >> "$COST_LOG"
|
||||
|
||||
echo "DISTILL_COMPLETE:"
|
||||
echo " proposals_file: $PROPOSAL_FILE"
|
||||
echo " $RESULT"
|
||||
|
|
@ -18,8 +18,7 @@
|
|||
* "gstack_brain_sync_mode": "off"|"artifacts-only"|"full",
|
||||
* "gstack_brain_git": true|false,
|
||||
* "gstack_artifacts_remote": "https://..." | "",
|
||||
* "gbrain_local_status": "ok"|"no-cli"|"missing-config"|"broken-config"|"broken-db",
|
||||
* "gbrain_pooler_mode": "transaction"|"session"|null
|
||||
* "gbrain_local_status": "ok"|"no-cli"|"missing-config"|"broken-config"|"broken-db"
|
||||
* }
|
||||
*
|
||||
* Backward compatibility (per plan codex #5): the 9 pre-existing fields stay
|
||||
|
|
@ -43,7 +42,6 @@ import {
|
|||
resolveGbrainBin,
|
||||
readGbrainVersion,
|
||||
} from "../lib/gbrain-local-status";
|
||||
import { isTransactionModePooler } from "../lib/gbrain-exec";
|
||||
|
||||
const STATE_DIR = process.env.GSTACK_HOME || join(userHome(), ".gstack");
|
||||
const SCRIPT_DIR = __dirname;
|
||||
|
|
@ -100,17 +98,6 @@ function detectConfig(): { exists: boolean; engine: "pglite" | "postgres" | null
|
|||
return { exists: true, engine: null };
|
||||
}
|
||||
|
||||
// --- pooler mode detection (#1435) ---
|
||||
//
|
||||
// Reads DATABASE_URL from ~/.gbrain/config.json and checks whether it targets
|
||||
// a PgBouncer transaction-mode pooler (port 6543). Surfaced so /sync-gbrain
|
||||
// and /setup-gbrain can advise users when search may require GBRAIN_PREPARE.
|
||||
function detectPoolerMode(): "transaction" | "session" | "unknown" | null {
|
||||
const parsed = tryReadJSON(GBRAIN_CONFIG) as { database_url?: string } | null;
|
||||
if (!parsed?.database_url) return null;
|
||||
return isTransactionModePooler(parsed.database_url) ? "transaction" : "session";
|
||||
}
|
||||
|
||||
// --- gbrain doctor health (any nonzero exit or non-"ok"/"warnings" status → false) ---
|
||||
//
|
||||
// Uses --fast to avoid hanging on a dead DB. Per the local-status classifier
|
||||
|
|
@ -228,7 +215,6 @@ function main(): void {
|
|||
gstack_brain_git: detectBrainGit(),
|
||||
gstack_artifacts_remote: detectArtifactsRemote(),
|
||||
gbrain_local_status: localEngineStatus({ noCache }),
|
||||
gbrain_pooler_mode: detectPoolerMode(),
|
||||
};
|
||||
|
||||
process.stdout.write(JSON.stringify(out, null, 2) + "\n");
|
||||
|
|
|
|||
|
|
@ -19,14 +19,9 @@
|
|||
# - git
|
||||
# - network reachability to https://github.com
|
||||
#
|
||||
# gbrain installs at the latest default-branch HEAD by default — the hard pin
|
||||
# was removed in #1744 (it had drifted ~23 versions behind). Pass
|
||||
# --pinned-commit <sha> to install a specific commit for reproducibility. A
|
||||
# minimum-version floor (MIN_GBRAIN_VERSION) hard-fails the install when the
|
||||
# resulting gbrain is too old for gstack's sync integration, and a fast
|
||||
# `gbrain doctor` self-test hard-fails a broken install when gbrain is already
|
||||
# configured. This keeps the version gate that the pin used to provide without
|
||||
# freezing users 23 releases behind.
|
||||
# The pinned commit is declared here rather than resolved dynamically so
|
||||
# upgrades are explicit and reviewable. Update PINNED_COMMIT when gstack
|
||||
# verifies compatibility with a new gbrain release.
|
||||
#
|
||||
# Env:
|
||||
# GBRAIN_INSTALL_DIR — override default install path (~/gbrain)
|
||||
|
|
@ -38,14 +33,8 @@
|
|||
set -euo pipefail
|
||||
|
||||
# --- defaults ---
|
||||
# No version pin by default — install the latest default-branch HEAD (#1744).
|
||||
# --pinned-commit <sha> overrides for reproducibility.
|
||||
PINNED_COMMIT=""
|
||||
PINNED_TAG=""
|
||||
# Minimum gbrain version gstack's integration is known to work with. The
|
||||
# `sources list --json` wrapped-object shape + federated sources landed by 0.20;
|
||||
# older predates the surface gstack drives. Hard-fail below this floor (#1744).
|
||||
MIN_GBRAIN_VERSION="0.20.0"
|
||||
PINNED_COMMIT="08b3698e90532b7b66c445e6b1d8cdfe71822802" # gbrain v0.18.2
|
||||
PINNED_TAG="v0.18.2"
|
||||
GBRAIN_REPO_URL="https://github.com/garrytan/gbrain.git"
|
||||
DEFAULT_INSTALL_DIR="${GBRAIN_INSTALL_DIR:-$HOME/gbrain}"
|
||||
INSTALL_DIR="$DEFAULT_INSTALL_DIR"
|
||||
|
|
@ -124,7 +113,7 @@ elif [ -n "$DETECTED_CLONE" ]; then
|
|||
else
|
||||
# Fresh clone path.
|
||||
if $DRY_RUN; then
|
||||
log "DRY RUN: would clone $GBRAIN_REPO_URL ${PINNED_COMMIT:+@ $PINNED_COMMIT }→ $INSTALL_DIR (latest HEAD unless --pinned-commit)"
|
||||
log "DRY RUN: would clone $GBRAIN_REPO_URL @ $PINNED_COMMIT → $INSTALL_DIR"
|
||||
exit 0
|
||||
fi
|
||||
if [ -d "$INSTALL_DIR" ]; then
|
||||
|
|
@ -132,12 +121,8 @@ else
|
|||
fi
|
||||
log "cloning $GBRAIN_REPO_URL → $INSTALL_DIR"
|
||||
git clone --quiet "$GBRAIN_REPO_URL" "$INSTALL_DIR"
|
||||
if [ -n "$PINNED_COMMIT" ]; then
|
||||
( cd "$INSTALL_DIR" && git checkout --quiet "$PINNED_COMMIT" )
|
||||
log "checked out pinned commit $PINNED_COMMIT${PINNED_TAG:+ ($PINNED_TAG)}"
|
||||
else
|
||||
log "installed latest gbrain (default-branch HEAD)"
|
||||
fi
|
||||
( cd "$INSTALL_DIR" && git checkout --quiet "$PINNED_COMMIT" )
|
||||
log "pinned to $PINNED_COMMIT${PINNED_TAG:+ ($PINNED_TAG)}"
|
||||
fi
|
||||
|
||||
if $DRY_RUN; then
|
||||
|
|
@ -210,44 +195,6 @@ fi
|
|||
|
||||
log "installed gbrain $actual_version from $INSTALL_DIR"
|
||||
|
||||
# --- minimum-version floor (#1744) ---
|
||||
# Unpinning means new installs track gbrain HEAD. Hard-fail if the resulting
|
||||
# version is below the floor gstack's sync integration needs — same exit-3 posture
|
||||
# as the PATH-shadow / version-mismatch failures above. A warning here is exactly
|
||||
# how the data-loss class slipped through, so this gate fails closed.
|
||||
version_lt() {
|
||||
# 0 (true) when $1 < $2 by version sort; equal versions are NOT less-than.
|
||||
[ "$1" = "$2" ] && return 1
|
||||
[ "$(printf '%s\n%s\n' "$1" "$2" | sort -V | head -1)" = "$1" ]
|
||||
}
|
||||
if version_lt "$actual_norm" "$MIN_GBRAIN_VERSION"; then
|
||||
echo "" >&2
|
||||
echo "gstack-gbrain-install: gbrain $actual_version is below the minimum gstack-tested version ($MIN_GBRAIN_VERSION)." >&2
|
||||
echo " gstack's sync integration needs the v0.20+ source/list surface." >&2
|
||||
echo " Fix: update the gbrain clone at $INSTALL_DIR to a newer release (git pull), then" >&2
|
||||
echo " re-run /setup-gbrain. Or pass --pinned-commit <sha> to install a specific newer commit." >&2
|
||||
echo "" >&2
|
||||
exit 3
|
||||
fi
|
||||
|
||||
# --- functional self-test when gbrain is already configured (#1744) ---
|
||||
# When a brain config exists (re-install / detected clone), run a fast doctor as
|
||||
# a hard gate so a broken gbrain is caught at setup, not at data-loss time.
|
||||
# Pre-init installs skip this (config not written yet); the full
|
||||
# `/sync-gbrain --dry-run` self-test runs from /setup-gbrain after `gbrain init`.
|
||||
_GBRAIN_HOME_CHECK="${GBRAIN_HOME:-$HOME/.gbrain}"
|
||||
if [ -f "$_GBRAIN_HOME_CHECK/config.json" ]; then
|
||||
if ! gbrain doctor --fast >/dev/null 2>&1; then
|
||||
echo "" >&2
|
||||
echo "gstack-gbrain-install: gbrain $actual_version installed but 'gbrain doctor --fast' failed." >&2
|
||||
echo " Refusing to leave a broken gbrain in place. Run 'gbrain doctor' to see what's wrong," >&2
|
||||
echo " fix it, then re-run /setup-gbrain." >&2
|
||||
echo "" >&2
|
||||
exit 3
|
||||
fi
|
||||
log "gbrain doctor --fast passed"
|
||||
fi
|
||||
|
||||
# v1.40.0.0 post-install validation (T6 / codex review #19): --ignore-scripts
|
||||
# may skip artifacts gbrain needs at runtime, especially on Windows
|
||||
# MSYS/MINGW where we DID pass --ignore-scripts. `gbrain --version` above
|
||||
|
|
@ -270,13 +217,4 @@ if ! gbrain sources --help >/dev/null 2>&1; then
|
|||
fi
|
||||
|
||||
echo ""
|
||||
if [ -n "${VOYAGE_API_KEY:-}" ]; then
|
||||
echo "Next: gbrain init --pglite --embedding-model voyage:voyage-code-3 --embedding-dimensions 1024"
|
||||
echo " (or run /setup-gbrain for the full setup flow)"
|
||||
else
|
||||
echo "Next: gbrain init --pglite (or run /setup-gbrain for the full setup flow)"
|
||||
echo ""
|
||||
echo "Tip: set VOYAGE_API_KEY before init to use voyage-code-3 (best embedding"
|
||||
echo "model for code retrieval on Voyage). Without it, gbrain falls back to its"
|
||||
echo "auto-selected provider (OpenAI when OPENAI_API_KEY is set, etc.)."
|
||||
fi
|
||||
echo "Next: gbrain init --pglite (or run /setup-gbrain for the full setup flow)"
|
||||
|
|
|
|||
|
|
@ -27,22 +27,8 @@
|
|||
# restore), D16 (pooler URL paste hygiene with redacted preview).
|
||||
|
||||
# _gstack_gbrain_validate_varname <name> — returns 0 if usable, 2 otherwise.
|
||||
# `local LC_ALL=C` is load-bearing twice over:
|
||||
# 1. In many macOS shells the default locale (e.g. en_US.UTF-8) makes `case`
|
||||
# glob brackets like `[A-Z]` match lowercase letters too. Without the
|
||||
# LC_ALL=C pin, names like `lower-case` pass validation and then trip
|
||||
# `printf -v "$varname"` and `export "$varname"` with "not a valid
|
||||
# identifier" errors the caller can't easily distinguish from other
|
||||
# failures.
|
||||
# 2. `local` is required because this file is documented as a sourced helper
|
||||
# (see header), so a bare `LC_ALL=C` would mutate the caller's locale for
|
||||
# the rest of the process — silently affecting downstream `sort`, `tr`,
|
||||
# and any locale-aware glob in the same shell.
|
||||
# Together they give ASCII-only bracket semantics on both macOS and Linux
|
||||
# (matching the documented `[A-Z_][A-Z0-9_]*` contract) without leaking.
|
||||
_gstack_gbrain_validate_varname() {
|
||||
local name="$1"
|
||||
local LC_ALL=C
|
||||
case "$name" in
|
||||
[A-Z_][A-Z0-9_]*) return 0 ;;
|
||||
*) return 2 ;;
|
||||
|
|
|
|||
|
|
@ -339,7 +339,7 @@ cmd_pooler_url() {
|
|||
# Prefer the singular Session Pooler config when Supabase returns an
|
||||
# array (response shape can vary by project state). Fall back to the
|
||||
# first PRIMARY entry if no "session" pool_mode is present.
|
||||
local db_user db_host db_port db_name pool_mode
|
||||
local db_user db_host db_port db_name
|
||||
local first_or_session
|
||||
if printf '%s' "$resp" | jq -e 'type == "array"' >/dev/null 2>&1; then
|
||||
first_or_session=$(printf '%s' "$resp" | jq '[.[] | select(.pool_mode == "session")][0] // .[0]')
|
||||
|
|
@ -351,27 +351,11 @@ cmd_pooler_url() {
|
|||
db_host=$(printf '%s' "$first_or_session" | jq -r '.db_host // empty')
|
||||
db_port=$(printf '%s' "$first_or_session" | jq -r '.db_port // empty')
|
||||
db_name=$(printf '%s' "$first_or_session" | jq -r '.db_name // empty')
|
||||
pool_mode=$(printf '%s' "$first_or_session" | jq -r '.pool_mode // empty')
|
||||
|
||||
if [ -z "$db_user" ] || [ -z "$db_host" ] || [ -z "$db_port" ] || [ -z "$db_name" ]; then
|
||||
die "pooler-url: missing pooler config fields (db_user/db_host/db_port/db_name); re-poll or check project state"
|
||||
fi
|
||||
|
||||
# Issue #1301: New Supabase projects' Management API returns a single
|
||||
# transaction-mode pooler at port 6543, but the shared pooler tenant
|
||||
# for fresh projects only listens on the session port 5432. Trusting
|
||||
# db_port verbatim makes `gbrain init` hang to TCP timeout (transaction
|
||||
# port unreachable) before falling into "tenant not found"-style errors
|
||||
# that look like auth bugs. Rewrite transaction/6543 -> session/5432.
|
||||
# Override with GSTACK_SUPABASE_TRUST_API_PORT=1 if a future API version
|
||||
# starts returning a working transaction port and this rewrite is wrong.
|
||||
if [ "${GSTACK_SUPABASE_TRUST_API_PORT:-0}" != "1" ] \
|
||||
&& [ "$pool_mode" = "transaction" ] && [ "$db_port" = "6543" ]; then
|
||||
echo "pooler-url: API returned transaction pooler (port 6543); shared pooler for new projects listens on session port 5432 — rewriting (set GSTACK_SUPABASE_TRUST_API_PORT=1 to disable)" >&2
|
||||
db_port=5432
|
||||
pool_mode="session"
|
||||
fi
|
||||
|
||||
local url="postgresql://${db_user}:${DB_PASS}@${db_host}:${db_port}/${db_name}"
|
||||
|
||||
if $json_mode; then
|
||||
|
|
|
|||
|
|
@ -37,10 +37,9 @@ import { createHash } from "crypto";
|
|||
|
||||
import "../lib/conductor-env-shim";
|
||||
import { detectEngineTier, withErrorContext, canonicalizeRemote } from "../lib/gstack-memory-helpers";
|
||||
import { ensureSourceRegistered, sourcePageCount, parseSourcesList } from "../lib/gbrain-sources";
|
||||
import { detectAutopilot, decideSourceRemove, decideCodeSync } from "../lib/gbrain-guards";
|
||||
import { ensureSourceRegistered, sourcePageCount } from "../lib/gbrain-sources";
|
||||
import { localEngineStatus, type LocalEngineStatus } from "../lib/gbrain-local-status";
|
||||
import { buildGbrainEnv, spawnGbrain, execGbrainJson, NEEDS_SHELL_ON_WINDOWS } from "../lib/gbrain-exec";
|
||||
import { buildGbrainEnv, spawnGbrain, execGbrainJson } from "../lib/gbrain-exec";
|
||||
|
||||
// ── Types ──────────────────────────────────────────────────────────────────
|
||||
|
||||
|
|
@ -53,8 +52,6 @@ interface CliArgs {
|
|||
noMemory: boolean;
|
||||
noBrainSync: boolean;
|
||||
codeOnly: boolean;
|
||||
/** #1734: opt-in to sync a URL-managed source whose code walk may auto-reclone. */
|
||||
allowReclone: boolean;
|
||||
}
|
||||
|
||||
interface CodeStageDetail {
|
||||
|
|
@ -62,7 +59,7 @@ interface CodeStageDetail {
|
|||
source_path?: string;
|
||||
page_count?: number | null;
|
||||
last_imported?: string;
|
||||
status?: "ok" | "skipped" | "failed" | "refused-autopilot" | "refused-reclone";
|
||||
status?: "ok" | "skipped" | "failed";
|
||||
}
|
||||
|
||||
interface StageResult {
|
||||
|
|
@ -83,115 +80,6 @@ const STATE_PATH = join(GSTACK_HOME, ".gbrain-sync-state.json");
|
|||
const LOCK_PATH = join(GSTACK_HOME, ".sync-gbrain.lock");
|
||||
const STALE_LOCK_MS = 5 * 60 * 1000;
|
||||
|
||||
// Default 35-minute timeout for code-walk + memory-ingest stages. Override via
|
||||
// GSTACK_SYNC_CODE_TIMEOUT_MS / GSTACK_SYNC_MEMORY_TIMEOUT_MS. Bounds-checked
|
||||
// in resolveStageTimeoutMs below so wildly-low values don't make resume
|
||||
// useless and wildly-high values don't mask config typos. See #1611.
|
||||
const DEFAULT_STAGE_TIMEOUT_MS = 35 * 60 * 1000; // 2_100_000ms = 35min
|
||||
const MIN_STAGE_TIMEOUT_MS = 60_000; // 1 minute floor
|
||||
const MAX_STAGE_TIMEOUT_MS = 86_400_000; // 24 hour ceiling
|
||||
|
||||
/**
|
||||
* Parse a stage-timeout env value with bounds validation. Returns the bounded
|
||||
* value or the default with a stderr warning if the env was malformed or
|
||||
* out-of-range. Exported for the regression test.
|
||||
*/
|
||||
export function resolveStageTimeoutMs(
|
||||
envValue: string | undefined,
|
||||
envName: string,
|
||||
): number {
|
||||
if (envValue === undefined || envValue === "") return DEFAULT_STAGE_TIMEOUT_MS;
|
||||
const n = Number.parseInt(envValue, 10);
|
||||
if (!Number.isFinite(n) || Number.isNaN(n) || n <= 0) {
|
||||
console.warn(
|
||||
`[sync] ${envName}="${envValue}" is not a positive integer; falling back to ${DEFAULT_STAGE_TIMEOUT_MS}ms`,
|
||||
);
|
||||
return DEFAULT_STAGE_TIMEOUT_MS;
|
||||
}
|
||||
if (n < MIN_STAGE_TIMEOUT_MS) {
|
||||
console.warn(
|
||||
`[sync] ${envName}=${n} is below the ${MIN_STAGE_TIMEOUT_MS}ms (1min) floor; falling back to ${DEFAULT_STAGE_TIMEOUT_MS}ms`,
|
||||
);
|
||||
return DEFAULT_STAGE_TIMEOUT_MS;
|
||||
}
|
||||
if (n > MAX_STAGE_TIMEOUT_MS) {
|
||||
console.warn(
|
||||
`[sync] ${envName}=${n} is above the ${MAX_STAGE_TIMEOUT_MS}ms (24h) ceiling; falling back to ${DEFAULT_STAGE_TIMEOUT_MS}ms`,
|
||||
);
|
||||
return DEFAULT_STAGE_TIMEOUT_MS;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
/**
|
||||
* gbrain writes ~/.gbrain/import-checkpoint.json on every import run. If a
|
||||
* previous /sync-gbrain hit the timeout (SIGTERM = exit 143), the checkpoint
|
||||
* + its staging dir survive on disk. Detect both and let gbrain resume from
|
||||
* processedIndex+1 on the next run. If the staging dir is missing/empty/
|
||||
* unreadable, fall through to a fresh restage with a one-line warning so the
|
||||
* user sees we noticed. See #1611 + plan D1/C1.
|
||||
*/
|
||||
interface GbrainCheckpoint {
|
||||
dir?: string;
|
||||
totalFiles?: number;
|
||||
processedIndex?: number;
|
||||
completedFiles?: number;
|
||||
timestamp?: string;
|
||||
}
|
||||
|
||||
export function readGbrainCheckpoint(): GbrainCheckpoint | null {
|
||||
// Read HOME from env so tests can redirect via process.env.HOME = ...
|
||||
// (Node/Bun's os.homedir() caches at process start and ignores later
|
||||
// mutations.)
|
||||
const home = process.env.HOME || homedir();
|
||||
const cpPath = join(home, ".gbrain", "import-checkpoint.json");
|
||||
if (!existsSync(cpPath)) return null;
|
||||
try {
|
||||
const raw = readFileSync(cpPath, "utf-8");
|
||||
const parsed = JSON.parse(raw);
|
||||
if (!parsed || typeof parsed !== "object") return null;
|
||||
return parsed as GbrainCheckpoint;
|
||||
} catch {
|
||||
// Corrupt JSON — treat as no checkpoint and fall through to fresh restage.
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export type ResumeVerdict =
|
||||
| { kind: "no-checkpoint" }
|
||||
| { kind: "resume"; stagingDir: string; processedIndex: number; totalFiles: number }
|
||||
| { kind: "stale-staging-missing"; stagingDir: string };
|
||||
|
||||
/**
|
||||
* Decide whether the next memory-ingest run should resume from gbrain's
|
||||
* checkpoint or restage from scratch.
|
||||
* - no checkpoint → run a fresh ingest pass
|
||||
* - checkpoint + staging ok → resume (gbrain picks up at processedIndex+1)
|
||||
* - checkpoint + staging gone → warn, fall through to fresh restage
|
||||
*/
|
||||
export function decideResume(): ResumeVerdict {
|
||||
const cp = readGbrainCheckpoint();
|
||||
if (!cp || !cp.dir) return { kind: "no-checkpoint" };
|
||||
const stagingDir = cp.dir;
|
||||
if (!existsSync(stagingDir)) {
|
||||
return { kind: "stale-staging-missing", stagingDir };
|
||||
}
|
||||
// Treat "non-empty" as the safe-to-resume signal. statSync on a missing
|
||||
// file throws; we already handled missing above so this is dir-level shape.
|
||||
try {
|
||||
const st = statSync(stagingDir);
|
||||
if (!st.isDirectory()) return { kind: "stale-staging-missing", stagingDir };
|
||||
} catch {
|
||||
return { kind: "stale-staging-missing", stagingDir };
|
||||
}
|
||||
return {
|
||||
kind: "resume",
|
||||
stagingDir,
|
||||
processedIndex: cp.processedIndex ?? 0,
|
||||
totalFiles: cp.totalFiles ?? 0,
|
||||
};
|
||||
}
|
||||
|
||||
// ── CLI ────────────────────────────────────────────────────────────────────
|
||||
|
||||
function printUsage(): void {
|
||||
|
|
@ -208,8 +96,6 @@ Options:
|
|||
--no-memory Skip the gstack-memory-ingest stage (transcripts + artifacts).
|
||||
--no-brain-sync Skip the gstack-brain-sync git pipeline stage.
|
||||
--code-only Only run the code-import stage (alias for --no-memory --no-brain-sync).
|
||||
--allow-reclone Permit the code walk for URL-managed sources (remote_url set)
|
||||
even though gbrain may auto-reclone the working tree (#1734).
|
||||
--help This text.
|
||||
|
||||
Stages run in order: code → memory ingest → curated git push.
|
||||
|
|
@ -225,7 +111,6 @@ function parseArgs(): CliArgs {
|
|||
let noMemory = false;
|
||||
let noBrainSync = false;
|
||||
let codeOnly = false;
|
||||
let allowReclone = false;
|
||||
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
const a = args[i];
|
||||
|
|
@ -237,7 +122,6 @@ function parseArgs(): CliArgs {
|
|||
case "--no-code": noCode = true; break;
|
||||
case "--no-memory": noMemory = true; break;
|
||||
case "--no-brain-sync": noBrainSync = true; break;
|
||||
case "--allow-reclone": allowReclone = true; break;
|
||||
case "--code-only":
|
||||
codeOnly = true;
|
||||
noMemory = true;
|
||||
|
|
@ -254,7 +138,7 @@ function parseArgs(): CliArgs {
|
|||
}
|
||||
}
|
||||
|
||||
return { mode, quiet, noCode, noMemory, noBrainSync, codeOnly, allowReclone };
|
||||
return { mode, quiet, noCode, noMemory, noBrainSync, codeOnly };
|
||||
}
|
||||
|
||||
// ── Helpers ────────────────────────────────────────────────────────────────
|
||||
|
|
@ -403,18 +287,14 @@ function gbrainSupportsSourcesRename(env?: NodeJS.ProcessEnv): boolean {
|
|||
* `env` is the environment passed to the spawned `gbrain` process; defaults
|
||||
* to `process.env`. Tests inject a PATH that points at a gbrain shim so the
|
||||
* helper can be exercised without a real gbrain CLI.
|
||||
*
|
||||
* Shape note: `gbrain sources list --json` returns `{sources: [...]}` (v0.20+);
|
||||
* older versions returned a flat array. Accept both for forward/backward compat
|
||||
* (mirrors `probeSource`/`sourcePageCount` in lib/gbrain-sources.ts).
|
||||
*/
|
||||
export function sourceLocalPath(sourceId: string, env?: NodeJS.ProcessEnv): string | null {
|
||||
const raw = execGbrainJson<unknown>(
|
||||
const list = execGbrainJson<Array<{ id: string; local_path?: string }>>(
|
||||
["sources", "list", "--json"],
|
||||
{ baseEnv: env },
|
||||
);
|
||||
if (!raw) return null;
|
||||
const found = parseSourcesList(raw).find((s) => s.id === sourceId);
|
||||
if (!list) return null;
|
||||
const found = list.find((s) => s.id === sourceId);
|
||||
return found?.local_path ?? null;
|
||||
}
|
||||
|
||||
|
|
@ -473,50 +353,20 @@ export function planHostnameFoldMigration(
|
|||
return { kind: "pending-cleanup", oldId: legacyPathHashId };
|
||||
}
|
||||
|
||||
export interface GuardedRemoveResult {
|
||||
removed: boolean;
|
||||
/** True when a guard refused the remove (autopilot active or unsafe source). */
|
||||
skipped: boolean;
|
||||
reason: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* #1734: run `gbrain sources remove <id> --confirm-destructive` only behind the
|
||||
* data-loss guards. Checked immediately before the destructive op (E8: as late
|
||||
* as possible) so the autopilot window is as small as we can make it without a
|
||||
* gbrain-side lease. Refuses when autopilot is active or when the source is
|
||||
* user-managed and gbrain can't keep its storage. Pure side-effect helper; the
|
||||
* caller decides whether a skip is fatal (it never is today — removes are
|
||||
* best-effort cleanup).
|
||||
*/
|
||||
export function safeSourcesRemove(sourceId: string, env?: NodeJS.ProcessEnv): GuardedRemoveResult {
|
||||
const ap = detectAutopilot(env);
|
||||
if (ap.active) {
|
||||
return {
|
||||
removed: false,
|
||||
skipped: true,
|
||||
reason: `autopilot active (${ap.signal}); refusing destructive remove of ${sourceId}. ` +
|
||||
`Stop autopilot, then re-run /sync-gbrain.`,
|
||||
};
|
||||
}
|
||||
const decision = decideSourceRemove(sourceId, env);
|
||||
if (!decision.allow) {
|
||||
return { removed: false, skipped: true, reason: decision.reason };
|
||||
}
|
||||
const r = spawnGbrain(
|
||||
["sources", "remove", sourceId, "--confirm-destructive", ...decision.extraArgs],
|
||||
{ baseEnv: env },
|
||||
);
|
||||
return { removed: r.status === 0, skipped: false, reason: decision.reason };
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove an orphaned source. Called only after new-source sync verifies pages
|
||||
* exist, so the old source is provably redundant before deletion. Routed through
|
||||
* safeSourcesRemove for the #1734 guards.
|
||||
* exist, so the old source is provably redundant before deletion.
|
||||
*
|
||||
* Flag note: existing call sites used `--confirm-destructive` here and
|
||||
* `--yes` in `lib/gbrain-sources.ts` — gbrain 0.35.0.0 accepts neither
|
||||
* deterministically (the subcommand surface help is generic). We pass
|
||||
* `--confirm-destructive` to match the existing call site convention; the
|
||||
* flag-helper centralization in commit 4 (lib/gbrain-exec.ts) will resolve
|
||||
* the inconsistency across the codebase.
|
||||
*/
|
||||
export function removeOrphanedSource(oldId: string, env?: NodeJS.ProcessEnv): boolean {
|
||||
return safeSourcesRemove(oldId, env).removed;
|
||||
const r = spawnGbrain(["sources", "remove", oldId, "--confirm-destructive"], { baseEnv: env });
|
||||
return r.status === 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -695,12 +545,13 @@ async function runCodeImport(args: CliArgs): Promise<StageResult> {
|
|||
const legacyId = deriveLegacyCodeSourceId(root);
|
||||
let legacyRemoved = false;
|
||||
if (legacyId !== sourceId) {
|
||||
// #1734: route through the data-loss guards (autopilot + source-safety).
|
||||
const rm = safeSourcesRemove(legacyId, gbrainEnv);
|
||||
if (rm.skipped && !args.quiet) {
|
||||
console.error(`[sync:code] legacy-source cleanup skipped: ${rm.reason}`);
|
||||
}
|
||||
if (rm.removed) legacyRemoved = true;
|
||||
const rm = spawnGbrain(["sources", "remove", legacyId, "--confirm-destructive"], {
|
||||
timeout: 30_000,
|
||||
baseEnv: gbrainEnv,
|
||||
});
|
||||
// Treat absent-source as success (clean state). gbrain emits "not found" on
|
||||
// missing id; treat any non-zero exit without "not found" as a soft fail.
|
||||
if (rm.status === 0) legacyRemoved = true;
|
||||
}
|
||||
|
||||
// Step 0b: Hostname-fold migration (#1414).
|
||||
|
|
@ -738,80 +589,28 @@ async function runCodeImport(args: CliArgs): Promise<StageResult> {
|
|||
};
|
||||
}
|
||||
|
||||
// Step 2: Always run the page-creating file walk first, then (for --full)
|
||||
// a full re-embed.
|
||||
//
|
||||
// `gbrain reindex-code` only RE-EMBEDS pages that already exist; it never
|
||||
// walks the filesystem. On a freshly-registered source (0 pages) a --full
|
||||
// run that called reindex-code alone found nothing ("No code pages to
|
||||
// reindex"), finished in ~1s, and left the code index permanently empty
|
||||
// while still reporting OK. The page-creating walk is `sync --strategy
|
||||
// code`, so --full must run it FIRST, then reindex-code, to honor the
|
||||
// documented "full walk + reindex" contract for both fresh and populated
|
||||
// sources.
|
||||
const codeTimeoutMs = resolveStageTimeoutMs(
|
||||
process.env.GSTACK_SYNC_CODE_TIMEOUT_MS,
|
||||
"GSTACK_SYNC_CODE_TIMEOUT_MS",
|
||||
);
|
||||
// Step 2: Run sync or reindex.
|
||||
const syncArgs = args.mode === "full"
|
||||
? ["reindex-code", "--source", sourceId, "--yes"]
|
||||
: ["sync", "--strategy", "code", "--source", sourceId];
|
||||
|
||||
// #1734 guards, checked immediately before the destructive walk (E8):
|
||||
// - autopilot active → refuse (the race that wiped a working tree).
|
||||
// - URL-managed source → the walk can auto-reclone (rm-rf); require
|
||||
// --allow-reclone. Both surface a visible reason and fail the stage so the
|
||||
// verdict shows ERR rather than silently skipping protection.
|
||||
const apBeforeWalk = detectAutopilot(gbrainEnv);
|
||||
if (apBeforeWalk.active) {
|
||||
return {
|
||||
name: "code", ran: true, ok: false, duration_ms: Date.now() - t0,
|
||||
summary: `refused: gbrain autopilot active (${apBeforeWalk.signal}). Stop autopilot, then re-run /sync-gbrain.`,
|
||||
detail: { source_id: sourceId, source_path: root, status: "refused-autopilot" },
|
||||
};
|
||||
}
|
||||
const reclone = decideCodeSync(sourceId, gbrainEnv, args.allowReclone);
|
||||
if (!reclone.allow) {
|
||||
return {
|
||||
name: "code", ran: true, ok: false, duration_ms: Date.now() - t0,
|
||||
summary: `refused: ${reclone.reason}`,
|
||||
detail: { source_id: sourceId, source_path: root, status: "refused-reclone" },
|
||||
};
|
||||
}
|
||||
|
||||
const walkResult = spawnGbrain(["sync", "--strategy", "code", "--source", sourceId], {
|
||||
const syncResult = spawnGbrain(syncArgs, {
|
||||
stdio: args.quiet ? ["ignore", "ignore", "ignore"] : ["ignore", "inherit", "inherit"],
|
||||
timeout: codeTimeoutMs,
|
||||
timeout: 35 * 60 * 1000,
|
||||
baseEnv: gbrainEnv,
|
||||
});
|
||||
|
||||
if (walkResult.status !== 0) {
|
||||
if (syncResult.status !== 0) {
|
||||
return {
|
||||
name: "code",
|
||||
ran: true,
|
||||
ok: false,
|
||||
duration_ms: Date.now() - t0,
|
||||
summary: `gbrain sync --strategy code --source ${sourceId} exited ${walkResult.status}`,
|
||||
summary: `gbrain ${syncArgs.join(" ")} exited ${syncResult.status}`,
|
||||
detail: { source_id: sourceId, source_path: root, status: "failed" },
|
||||
};
|
||||
}
|
||||
|
||||
if (args.mode === "full") {
|
||||
const reindexResult = spawnGbrain(["reindex-code", "--source", sourceId, "--yes"], {
|
||||
stdio: args.quiet ? ["ignore", "ignore", "ignore"] : ["ignore", "inherit", "inherit"],
|
||||
timeout: codeTimeoutMs,
|
||||
baseEnv: gbrainEnv,
|
||||
});
|
||||
|
||||
if (reindexResult.status !== 0) {
|
||||
return {
|
||||
name: "code",
|
||||
ran: true,
|
||||
ok: false,
|
||||
duration_ms: Date.now() - t0,
|
||||
summary: `gbrain reindex-code --source ${sourceId} exited ${reindexResult.status}`,
|
||||
detail: { source_id: sourceId, source_path: root, status: "failed" },
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3: Pin this worktree's CWD to the source via .gbrain-source. Subsequent
|
||||
// gbrain code-def / code-refs / code-callers calls from anywhere under <root>
|
||||
// route to this source by default — no --source flag needed.
|
||||
|
|
@ -939,25 +738,6 @@ function runMemoryIngest(args: CliArgs): StageResult {
|
|||
return skipStageForLocalStatus("memory", localStatus, t0);
|
||||
}
|
||||
|
||||
// Resume detection (#1611 / plan D1 + C1). If a previous run hit the
|
||||
// timeout and gbrain left ~/.gbrain/import-checkpoint.json plus its staging
|
||||
// dir on disk, signal the grandchild via env so it skips the prepare phase
|
||||
// and lets `gbrain import` resume from processedIndex+1 against the same
|
||||
// staging dir. If the staging dir is gone (disk pressure cleanup, OS
|
||||
// reboot, user manual cleanup), warn and fall through to a fresh restage.
|
||||
const resume = decideResume();
|
||||
const childEnv = buildGbrainEnv({ announce: false });
|
||||
if (resume.kind === "resume") {
|
||||
console.error(
|
||||
`[sync:memory] resuming from gbrain checkpoint (${resume.processedIndex}/${resume.totalFiles} files staged at ${resume.stagingDir})`,
|
||||
);
|
||||
childEnv.GSTACK_INGEST_RESUME_DIR = resume.stagingDir;
|
||||
} else if (resume.kind === "stale-staging-missing") {
|
||||
console.error(
|
||||
`[sync:memory] previous checkpoint stale (staging dir ${resume.stagingDir} gone), restaging from scratch`,
|
||||
);
|
||||
}
|
||||
|
||||
const ingestPath = join(import.meta.dir, "gstack-memory-ingest.ts");
|
||||
const ingestArgs = ["run", ingestPath];
|
||||
if (args.mode === "full") ingestArgs.push("--bulk");
|
||||
|
|
@ -968,14 +748,10 @@ function runMemoryIngest(args: CliArgs): StageResult {
|
|||
// .env.local footgun affects gstack-memory-ingest.ts too, not just the
|
||||
// direct gbrain spawns in this file). The grandchild calls gbrain import
|
||||
// internally and must see the DATABASE_URL from gbrain's own config.
|
||||
const memoryTimeoutMs = resolveStageTimeoutMs(
|
||||
process.env.GSTACK_SYNC_MEMORY_TIMEOUT_MS,
|
||||
"GSTACK_SYNC_MEMORY_TIMEOUT_MS",
|
||||
);
|
||||
const result = spawnSync("bun", ingestArgs, {
|
||||
encoding: "utf-8",
|
||||
timeout: memoryTimeoutMs,
|
||||
env: childEnv,
|
||||
timeout: 35 * 60 * 1000,
|
||||
env: buildGbrainEnv({ announce: false }),
|
||||
});
|
||||
|
||||
// D6: parse [memory-ingest] lines from the child's stderr. ERR-prefixed
|
||||
|
|
@ -1017,17 +793,13 @@ function runBrainSyncPush(args: CliArgs): StageResult {
|
|||
return { name: "brain-sync", ran: false, ok: true, duration_ms: 0, summary: "skipped (gstack-brain-sync not installed)" };
|
||||
}
|
||||
|
||||
// #1731: gstack-brain-sync is a bash shebang script; Windows can't spawn it
|
||||
// without a shell, which surfaced as "brain-sync exited undefined".
|
||||
spawnSync(brainSyncPath, ["--discover-new"], {
|
||||
stdio: args.quiet ? ["ignore", "ignore", "ignore"] : ["ignore", "inherit", "inherit"],
|
||||
timeout: 60 * 1000,
|
||||
shell: NEEDS_SHELL_ON_WINDOWS,
|
||||
});
|
||||
const result = spawnSync(brainSyncPath, ["--once"], {
|
||||
stdio: args.quiet ? ["ignore", "ignore", "ignore"] : ["ignore", "inherit", "inherit"],
|
||||
timeout: 60 * 1000,
|
||||
shell: NEEDS_SHELL_ON_WINDOWS,
|
||||
});
|
||||
|
||||
return {
|
||||
|
|
|
|||
|
|
@ -273,23 +273,16 @@ function resolveClaudeCodeCwd(
|
|||
return null;
|
||||
}
|
||||
|
||||
export function extractCwdFromJsonl(filePath: string): string | null {
|
||||
// Read a capped prefix so huge JSONL files don't blow up memory. 64KB
|
||||
// comfortably fits the largest observed session headers; the old 8KB cap
|
||||
// would sometimes fall inside a single long line and silently drop the
|
||||
// project (JSON.parse failure on the truncated tail).
|
||||
const MAX_BYTES = 64 * 1024;
|
||||
const MAX_LINES = 30;
|
||||
function extractCwdFromJsonl(filePath: string): string | null {
|
||||
try {
|
||||
// Read only the first 8KB to avoid loading huge JSONL files into memory
|
||||
const fd = openSync(filePath, "r");
|
||||
const buf = Buffer.alloc(MAX_BYTES);
|
||||
const bytesRead = readSync(fd, buf, 0, MAX_BYTES, 0);
|
||||
const buf = Buffer.alloc(8192);
|
||||
const bytesRead = readSync(fd, buf, 0, 8192, 0);
|
||||
closeSync(fd);
|
||||
const text = buf.toString("utf-8", 0, bytesRead);
|
||||
// Drop the final segment — it may be an incomplete line at the cap boundary.
|
||||
const parts = text.split("\n");
|
||||
const completeLines = parts.length > 1 ? parts.slice(0, -1) : parts;
|
||||
for (const line of completeLines.slice(0, MAX_LINES)) {
|
||||
const lines = text.split("\n").slice(0, 15);
|
||||
for (const line of lines) {
|
||||
if (!line.trim()) continue;
|
||||
try {
|
||||
const obj = JSON.parse(line);
|
||||
|
|
|
|||
|
|
@ -1,39 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# gstack-ios-qa-daemon — Mac-side daemon that brokers tailnet/loopback traffic
|
||||
# to a connected iPhone running the in-app StateServer over the CoreDevice USB
|
||||
# tunnel. Single-instance via flock on ~/.gstack/ios-qa-daemon.pid.
|
||||
#
|
||||
# Usage:
|
||||
# gstack-ios-qa-daemon # loopback-only (local USB)
|
||||
# gstack-ios-qa-daemon --tailnet # additionally open tailnet listener
|
||||
#
|
||||
# Environment:
|
||||
# GSTACK_IOS_DAEMON_PORT — loopback listener port (default 9099)
|
||||
# GSTACK_IOS_TARGET_UDID — target iOS device UDID (optional; otherwise
|
||||
# the first paired connected device is used)
|
||||
# GSTACK_IOS_TARGET_BUNDLE_ID — bundle ID of the iOS app hosting StateServer
|
||||
# (default com.gstack.iosqa.fixture)
|
||||
#
|
||||
# Readiness protocol: prints `READY: port=<n> pid=<pid>` to stdout once both
|
||||
# listeners are bound. Spawners read stdin with a ~5s timeout to confirm.
|
||||
#
|
||||
# Exits cleanly when no active loopback clients are connected AND no remote
|
||||
# session tokens are outstanding.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
GSTACK_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
ENTRY="$GSTACK_DIR/ios-qa/daemon/src/index.ts"
|
||||
|
||||
if [ ! -f "$ENTRY" ]; then
|
||||
echo "gstack-ios-qa-daemon: missing $ENTRY (gstack install incomplete?)" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! command -v bun >/dev/null 2>&1; then
|
||||
echo "gstack-ios-qa-daemon: bun runtime not on PATH — install from https://bun.sh" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
exec bun run "$ENTRY" "$@"
|
||||
|
|
@ -1,28 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# gstack-ios-qa-mint — manage the tailnet allowlist for remote iOS QA agents.
|
||||
#
|
||||
# This is the owner-grant path: it writes identities into the local allowlist
|
||||
# so a remote agent on the tailnet can self-service mint a session token via
|
||||
# POST /auth/mint against the daemon.
|
||||
#
|
||||
# Run `gstack-ios-qa-mint --help` for full usage.
|
||||
#
|
||||
# Allowlist file: ~/.gstack/ios-qa-allowlist.json (mode 0600).
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
GSTACK_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
ENTRY="$GSTACK_DIR/ios-qa/daemon/src/cli-mint.ts"
|
||||
|
||||
if [ ! -f "$ENTRY" ]; then
|
||||
echo "gstack-ios-qa-mint: missing $ENTRY (gstack install incomplete?)" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! command -v bun >/dev/null 2>&1; then
|
||||
echo "gstack-ios-qa-mint: bun runtime not on PATH — install from https://bun.sh" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
exec bun run "$ENTRY" "$@"
|
||||
|
|
@ -53,25 +53,18 @@ for path in paths:
|
|||
continue
|
||||
if line in seen:
|
||||
continue
|
||||
# Prefer ISO ts field for sort; fall back to SHA-256. The line
|
||||
# content is the final tiebreaker so the order is total: two
|
||||
# entries sharing a ts must resolve identically regardless of
|
||||
# which side they arrive on. Without it, equal-ts entries fall
|
||||
# back to insertion order (base, ours, theirs), and since ours
|
||||
# and theirs are swapped depending on which machine runs the
|
||||
# merge, the two sides produce divergent files that never
|
||||
# converge.
|
||||
# Prefer ISO ts field for sort; fall back to SHA-256.
|
||||
sort_key = None
|
||||
try:
|
||||
obj = json.loads(line)
|
||||
ts = obj.get('ts') or obj.get('timestamp')
|
||||
if isinstance(ts, str):
|
||||
sort_key = (0, ts, line)
|
||||
sort_key = (0, ts)
|
||||
except (json.JSONDecodeError, ValueError, TypeError):
|
||||
pass
|
||||
if sort_key is None:
|
||||
h = hashlib.sha256(line.encode('utf-8')).hexdigest()
|
||||
sort_key = (1, h, line)
|
||||
sort_key = (1, h)
|
||||
seen[line] = sort_key
|
||||
except FileNotFoundError:
|
||||
# Absent base / absent ours / absent theirs are all valid.
|
||||
|
|
|
|||
|
|
@ -27,53 +27,35 @@ done
|
|||
|
||||
LEARNINGS_FILE="$GSTACK_HOME/projects/$SLUG/learnings.jsonl"
|
||||
|
||||
# Collect cross-project JSONL files separately so the trust gate can distinguish
|
||||
# current-project rows from rows loaded from other projects.
|
||||
CROSS_FILES=()
|
||||
# Collect all JSONL files to search
|
||||
FILES=()
|
||||
[ -f "$LEARNINGS_FILE" ] && FILES+=("$LEARNINGS_FILE")
|
||||
|
||||
if [ "$CROSS_PROJECT" = true ]; then
|
||||
# Add other projects' learnings (max 5)
|
||||
while IFS= read -r f; do
|
||||
CROSS_FILES+=("$f")
|
||||
[ ${#CROSS_FILES[@]} -ge 5 ] && break
|
||||
done < <(find "$GSTACK_HOME/projects" -name "learnings.jsonl" -not -path "*/$SLUG/*" 2>/dev/null)
|
||||
# Add other projects' learnings (max 5, sorted by mtime)
|
||||
for f in $(find "$GSTACK_HOME/projects" -name "learnings.jsonl" -not -path "*/$SLUG/*" 2>/dev/null | head -5); do
|
||||
FILES+=("$f")
|
||||
done
|
||||
fi
|
||||
|
||||
if [ ! -f "$LEARNINGS_FILE" ] && [ ${#CROSS_FILES[@]} -eq 0 ]; then
|
||||
if [ ${#FILES[@]} -eq 0 ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
emit_tagged_file() {
|
||||
local tag="$1"
|
||||
local file="$2"
|
||||
local line
|
||||
while IFS= read -r line || [ -n "$line" ]; do
|
||||
[ -n "$line" ] && printf '%s\t%s\n' "$tag" "$line"
|
||||
done < "$file"
|
||||
}
|
||||
|
||||
# Process all files through bun for JSON parsing, decay, dedup, filtering
|
||||
{
|
||||
[ -f "$LEARNINGS_FILE" ] && emit_tagged_file current "$LEARNINGS_FILE"
|
||||
if [ ${#CROSS_FILES[@]} -gt 0 ]; then
|
||||
for f in "${CROSS_FILES[@]}"; do
|
||||
emit_tagged_file cross "$f"
|
||||
done
|
||||
fi
|
||||
} | GSTACK_SEARCH_TYPE="$TYPE" GSTACK_SEARCH_QUERY="$QUERY" GSTACK_SEARCH_LIMIT="$LIMIT" GSTACK_SEARCH_CROSS="$CROSS_PROJECT" bun -e "
|
||||
GSTACK_SEARCH_TYPE="$TYPE" GSTACK_SEARCH_QUERY="$QUERY" GSTACK_SEARCH_LIMIT="$LIMIT" GSTACK_SEARCH_SLUG="$SLUG" GSTACK_SEARCH_CROSS="$CROSS_PROJECT" \
|
||||
cat "${FILES[@]}" 2>/dev/null | GSTACK_SEARCH_TYPE="$TYPE" GSTACK_SEARCH_QUERY="$QUERY" GSTACK_SEARCH_LIMIT="$LIMIT" GSTACK_SEARCH_SLUG="$SLUG" GSTACK_SEARCH_CROSS="$CROSS_PROJECT" bun -e "
|
||||
const lines = (await Bun.stdin.text()).trim().split('\n').filter(Boolean);
|
||||
const now = Date.now();
|
||||
const type = process.env.GSTACK_SEARCH_TYPE || '';
|
||||
const queryRaw = (process.env.GSTACK_SEARCH_QUERY || '').toLowerCase();
|
||||
const queryTokens = queryRaw.split(/\s+/).filter(Boolean);
|
||||
const limit = parseInt(process.env.GSTACK_SEARCH_LIMIT || '10', 10);
|
||||
const slug = process.env.GSTACK_SEARCH_SLUG || '';
|
||||
|
||||
const entries = [];
|
||||
for (const taggedLine of lines) {
|
||||
for (const line of lines) {
|
||||
try {
|
||||
const tabIndex = taggedLine.indexOf('\t');
|
||||
const sourceTag = tabIndex === -1 ? 'current' : taggedLine.slice(0, tabIndex);
|
||||
const line = tabIndex === -1 ? taggedLine : taggedLine.slice(tabIndex + 1);
|
||||
const e = JSON.parse(line);
|
||||
if (!e.key || !e.type) continue;
|
||||
|
||||
|
|
@ -87,7 +69,7 @@ for (const taggedLine of lines) {
|
|||
|
||||
// Determine if this is from the current project or cross-project
|
||||
// Cross-project entries are tagged for display
|
||||
const isCrossProject = sourceTag === 'cross';
|
||||
const isCrossProject = !line.includes(slug) && process.env.GSTACK_SEARCH_CROSS === 'true';
|
||||
e._crossProject = isCrossProject;
|
||||
|
||||
// Trust gate: cross-project learnings only loaded if trusted (user-stated)
|
||||
|
|
|
|||
|
|
@ -194,7 +194,7 @@ Options:
|
|||
--all-history Walk transcripts older than 90 days too.
|
||||
--sources <list> Comma-separated subset: ${ALL_TYPES.join(",")}
|
||||
--limit <N> Stop after N pages written (smoke testing).
|
||||
--no-write Skip gbrain put calls (still updates state file).
|
||||
--no-write Skip gbrain put_page calls (still updates state file).
|
||||
Used by tests + dry runs without actual ingest.
|
||||
--scan-secrets Opt-in per-file gitleaks scan during prepare. Off by
|
||||
default; gstack-brain-sync already gates the git-push
|
||||
|
|
@ -1061,7 +1061,7 @@ async function probeMode(args: CliArgs): Promise<ProbeReport> {
|
|||
}
|
||||
|
||||
// Per ED2: ~25-35 min for ~11.7K transcripts = ~150ms/page synchronous
|
||||
// (gitleaks + render + put + embedding). Scale linearly.
|
||||
// (gitleaks + render + put_page + embedding). Scale linearly.
|
||||
const estimateMinutes = Math.max(1, Math.round((newCount + updatedCount) * 0.15 / 60));
|
||||
|
||||
return {
|
||||
|
|
@ -1272,39 +1272,13 @@ function cleanupStagingDir(dir: string): void {
|
|||
* 1. forward the signal to the child (otherwise gbrain orphans, holds the
|
||||
* PGLite write lock, and burns CPU — observed during 2026-05-10 cold-run
|
||||
* testing)
|
||||
* 2. PRESERVE the staging dir when gbrain has written an import-checkpoint
|
||||
* pointing at it (the next /sync-gbrain run can resume from
|
||||
* processedIndex+1). Otherwise synchronously clean up before
|
||||
* process.exit, since `finally` blocks in ingestPass never run after
|
||||
* process.exit fires from inside a signal handler.
|
||||
*
|
||||
* Resume semantics added for #1611: prior behavior unconditionally cleaned
|
||||
* up the staging dir on SIGTERM, so the gbrain checkpoint always pointed at
|
||||
* a missing dir and the next run had to restage from scratch.
|
||||
* 2. synchronously clean up the staging dir BEFORE process.exit (otherwise
|
||||
* finally blocks in async callers don't run after process.exit from
|
||||
* inside a signal handler, leaking the staging dir on every interrupt)
|
||||
*/
|
||||
let _activeImportChild: ChildProcess | null = null;
|
||||
let _activeStagingDir: string | null = null;
|
||||
let _signalHandlersInstalled = false;
|
||||
|
||||
/**
|
||||
* Returns true if gbrain has written ~/.gbrain/import-checkpoint.json with
|
||||
* `dir` matching the current active staging dir. Indicates the next run
|
||||
* can resume against this staging dir.
|
||||
*/
|
||||
function stagingDirIsCheckpointed(stagingDir: string): boolean {
|
||||
try {
|
||||
// Read HOME from env so tests can redirect; homedir() caches.
|
||||
const home = process.env.HOME || homedir();
|
||||
const cpPath = join(home, ".gbrain", "import-checkpoint.json");
|
||||
if (!existsSync(cpPath)) return false;
|
||||
const raw = readFileSync(cpPath, "utf-8");
|
||||
const cp = JSON.parse(raw) as { dir?: string };
|
||||
return cp.dir === stagingDir;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function installSignalForwarder(): void {
|
||||
if (_signalHandlersInstalled) return;
|
||||
_signalHandlersInstalled = true;
|
||||
|
|
@ -1316,24 +1290,11 @@ function installSignalForwarder(): void {
|
|||
// child may have already exited between the alive-check and the kill
|
||||
}
|
||||
}
|
||||
// Synchronously clean up the active staging dir before exiting. The async
|
||||
// `finally` blocks in ingestPass never run after process.exit fires from
|
||||
// inside this handler, so cleanup has to happen here.
|
||||
if (_activeStagingDir) {
|
||||
if (stagingDirIsCheckpointed(_activeStagingDir)) {
|
||||
// Preserve for next-run resume. The orchestrator's decideResume()
|
||||
// (in gstack-gbrain-sync.ts) will see the checkpoint + dir and
|
||||
// re-invoke gbrain import against this same staging dir, picking
|
||||
// up from processedIndex+1. See #1611.
|
||||
try {
|
||||
process.stderr.write(
|
||||
`[memory-ingest] ${signal} received — preserving staging dir for resume: ${_activeStagingDir}\n`,
|
||||
);
|
||||
} catch {
|
||||
// best-effort: stderr may be closed already
|
||||
}
|
||||
} else {
|
||||
// No checkpoint pointing here — the import never reached gbrain or
|
||||
// crashed before writing one. Clean up so we don't leak the dir.
|
||||
cleanupStagingDir(_activeStagingDir);
|
||||
}
|
||||
cleanupStagingDir(_activeStagingDir);
|
||||
_activeStagingDir = null;
|
||||
}
|
||||
// Re-raise to default action so the parent actually exits. Without this,
|
||||
|
|
@ -1349,32 +1310,10 @@ function installSignalForwarder(): void {
|
|||
* that kill the child on parent SIGTERM/SIGINT. Returns the same shape as
|
||||
* spawnSync's result so the caller doesn't care which mode was used.
|
||||
*/
|
||||
/**
|
||||
* #1611: the `gbrain import` is the long pole on big brains. Its timeout is
|
||||
* configurable via GSTACK_INGEST_TIMEOUT_MS (default 30 min, 1min–24h) so large
|
||||
* memory corpora aren't SIGTERM'd mid-import. On timeout we SIGTERM the child,
|
||||
* which preserves gbrain's import-checkpoint.json (see installSignalForwarder)
|
||||
* so the next run resumes instead of restarting from scratch.
|
||||
*/
|
||||
const DEFAULT_IMPORT_TIMEOUT_MS = 30 * 60 * 1000;
|
||||
export function resolveImportTimeoutMs(
|
||||
raw: string | undefined = process.env.GSTACK_INGEST_TIMEOUT_MS,
|
||||
): number {
|
||||
if (raw === undefined || raw === "") return DEFAULT_IMPORT_TIMEOUT_MS;
|
||||
const n = Number.parseInt(raw, 10);
|
||||
if (!Number.isFinite(n) || Number.isNaN(n) || n < 60_000 || n > 86_400_000) {
|
||||
console.error(
|
||||
`[memory-ingest] GSTACK_INGEST_TIMEOUT_MS="${raw}" invalid (need 60000–86400000ms); using ${DEFAULT_IMPORT_TIMEOUT_MS}ms`,
|
||||
);
|
||||
return DEFAULT_IMPORT_TIMEOUT_MS;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
function runGbrainImport(
|
||||
stagingDir: string,
|
||||
timeoutMs: number,
|
||||
): Promise<{ status: number | null; stdout: string; stderr: string; timedOut: boolean }> {
|
||||
): Promise<{ status: number | null; stdout: string; stderr: string }> {
|
||||
installSignalForwarder();
|
||||
return new Promise((resolve) => {
|
||||
// Seed DATABASE_URL from gbrain's own config so this stage works
|
||||
|
|
@ -1407,7 +1346,6 @@ function runGbrainImport(
|
|||
status: timedOut ? null : status,
|
||||
stdout,
|
||||
stderr,
|
||||
timedOut,
|
||||
});
|
||||
});
|
||||
child.on("error", (err) => {
|
||||
|
|
@ -1417,7 +1355,6 @@ function runGbrainImport(
|
|||
status: null,
|
||||
stdout,
|
||||
stderr: stderr + `\n[spawn-error] ${(err as Error).message}`,
|
||||
timedOut,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -1437,7 +1374,7 @@ async function ingestPass(args: CliArgs): Promise<BulkResult> {
|
|||
if (args.noWrite) {
|
||||
// --no-write: skip the gbrain import call but still record state for
|
||||
// prepared pages (treat them as ingested for dedup purposes). Matches
|
||||
// the prior contract from --help: "Skip gbrain put calls (still
|
||||
// the prior contract from --help: "Skip gbrain put_page calls (still
|
||||
// updates state file)".
|
||||
const nowIso = new Date().toISOString();
|
||||
for (const p of prep.prepared) {
|
||||
|
|
@ -1507,46 +1444,19 @@ async function ingestPass(args: CliArgs): Promise<BulkResult> {
|
|||
// entirely. gstack-brain-sync push will pick the dir up via its allowlist
|
||||
// and the brain admin's pull job will index transcripts into the remote
|
||||
// brain. Local PGLite (if any) stays code-only.
|
||||
//
|
||||
// Resume branch for #1611: when the orchestrator sets
|
||||
// GSTACK_INGEST_RESUME_DIR (because gbrain's import-checkpoint.json points
|
||||
// at an existing dir from a prior SIGTERM'd run), reuse that staging dir
|
||||
// and skip the prepare/writeStaged phase entirely. gbrain's checkpoint
|
||||
// tells it where to resume.
|
||||
const remoteHttpMode = isRemoteHttpMcpMode();
|
||||
const resumeDir = process.env.GSTACK_INGEST_RESUME_DIR;
|
||||
const resuming = !remoteHttpMode
|
||||
&& typeof resumeDir === "string"
|
||||
&& resumeDir.length > 0
|
||||
&& existsSync(resumeDir);
|
||||
const stagingDir = resuming
|
||||
? resumeDir!
|
||||
: remoteHttpMode
|
||||
? makePersistentTranscriptDir()
|
||||
: makeStagingDir();
|
||||
const stagingDir = remoteHttpMode
|
||||
? makePersistentTranscriptDir()
|
||||
: makeStagingDir();
|
||||
// Register staging dir with the signal forwarder so SIGTERM/SIGINT can
|
||||
// either preserve (when gbrain checkpointed it) or synchronously clean up.
|
||||
// The async finally block below does NOT run after a signal-handler exit.
|
||||
// In remote-http mode we skip registration — the dir is meant to persist.
|
||||
// synchronously clean it up before process.exit (the async finally block
|
||||
// below does NOT run after a signal-handler exit). In remote-http mode we
|
||||
// skip registration — the dir is meant to persist.
|
||||
if (!remoteHttpMode) {
|
||||
_activeStagingDir = stagingDir;
|
||||
}
|
||||
try {
|
||||
let staging: StagingResult;
|
||||
if (resuming) {
|
||||
// Pages are already on disk from the previous run. Skip writeStaged.
|
||||
// The "written" count for the verdict reflects what's on disk now;
|
||||
// gbrain's import will skip already-completed entries via its own
|
||||
// checkpoint (processedIndex+1).
|
||||
if (!args.quiet) {
|
||||
console.error(
|
||||
`[memory-ingest] resuming previous staging dir ${stagingDir} (skipping prepare phase)`,
|
||||
);
|
||||
}
|
||||
staging = { staging_dir: stagingDir, written: prep.prepared.length, errors: [], stagedPathToSource: new Map() };
|
||||
} else {
|
||||
staging = writeStaged(prep.prepared, stagingDir);
|
||||
}
|
||||
const staging = writeStaged(prep.prepared, stagingDir);
|
||||
failed += staging.errors.length;
|
||||
if (!args.quiet && staging.errors.length > 0) {
|
||||
for (const e of staging.errors.slice(0, 5)) {
|
||||
|
|
@ -1632,33 +1542,13 @@ async function ingestPass(args: CliArgs): Promise<BulkResult> {
|
|||
// spawn, parent termination orphans the gbrain process (observed
|
||||
// during 2026-05-10 cold-run testing — gbrain kept running 15 min
|
||||
// after the orchestrator timed out).
|
||||
const importResult = await runGbrainImport(stagingDir, resolveImportTimeoutMs());
|
||||
const importResult = await runGbrainImport(stagingDir, 30 * 60 * 1000);
|
||||
|
||||
const stdout = importResult.stdout || "";
|
||||
const stderr = importResult.stderr || "";
|
||||
const importJson = parseImportJson(stdout);
|
||||
|
||||
if (importResult.status !== 0) {
|
||||
// #1611: on timeout, gbrain's import-checkpoint.json is preserved (the
|
||||
// SIGTERM forwarder keeps the staging dir), so the next /sync-gbrain
|
||||
// resumes rather than restarting. Tell the user instead of looking failed.
|
||||
if (importResult.timedOut) {
|
||||
const mins = Math.round(resolveImportTimeoutMs() / 60000);
|
||||
const msg =
|
||||
`gbrain import timed out after ${mins}min; checkpoint preserved — re-run ` +
|
||||
`/sync-gbrain to resume (raise GSTACK_INGEST_TIMEOUT_MS for big brains)`;
|
||||
console.error(`[memory-ingest] ${msg}`);
|
||||
return {
|
||||
written: 0,
|
||||
skipped_secret: prep.skippedSecret,
|
||||
skipped_dedup: prep.skippedDedup,
|
||||
skipped_unattributed: prep.skippedUnattributed,
|
||||
failed,
|
||||
duration_ms: Date.now() - t0,
|
||||
partial_pages: prep.partialPages,
|
||||
system_error: msg,
|
||||
};
|
||||
}
|
||||
const tail = (stderr.trim().split("\n").pop() || "").slice(0, 300);
|
||||
const msg = `gbrain import exited ${importResult.status}: ${tail}`;
|
||||
console.error(`[memory-ingest] ERR: ${msg}`);
|
||||
|
|
@ -1854,12 +1744,7 @@ async function main(): Promise<void> {
|
|||
if (result.system_error) process.exit(1);
|
||||
}
|
||||
|
||||
// Guard so the module is import-safe for unit tests (e.g. resolveImportTimeoutMs).
|
||||
// The orchestrator runs it as `bun gstack-memory-ingest.ts ...`, where
|
||||
// import.meta.main is true, so the CLI path is unaffected.
|
||||
if (import.meta.main) {
|
||||
main().catch((err) => {
|
||||
console.error(`gstack-memory-ingest fatal: ${err instanceof Error ? err.message : String(err)}`);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
main().catch((err) => {
|
||||
console.error(`gstack-memory-ingest fatal: ${err instanceof Error ? err.message : String(err)}`);
|
||||
process.exit(1);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -40,40 +40,16 @@ const ADAPTER_FACTORIES = {
|
|||
|
||||
type OutputFormat = 'table' | 'json' | 'markdown';
|
||||
|
||||
const CLI_ARGS = process.argv.slice(2);
|
||||
const VALUE_FLAGS = new Set(['--models', '--prompt', '--workdir', '--timeout-ms', '--output']);
|
||||
|
||||
function arg(name: string, def?: string): string | undefined {
|
||||
const idx = CLI_ARGS.findIndex(a => a === name || a.startsWith(name + '='));
|
||||
const idx = process.argv.findIndex(a => a === name || a.startsWith(name + '='));
|
||||
if (idx < 0) return def;
|
||||
const eqIdx = CLI_ARGS[idx].indexOf('=');
|
||||
if (eqIdx >= 0) return CLI_ARGS[idx].slice(eqIdx + 1);
|
||||
return CLI_ARGS[idx + 1];
|
||||
const eqIdx = process.argv[idx].indexOf('=');
|
||||
if (eqIdx >= 0) return process.argv[idx].slice(eqIdx + 1);
|
||||
return process.argv[idx + 1];
|
||||
}
|
||||
|
||||
function flag(name: string): boolean {
|
||||
return CLI_ARGS.includes(name);
|
||||
}
|
||||
|
||||
function positionalArgs(args: string[]): string[] {
|
||||
const positional: string[] = [];
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
const current = args[i];
|
||||
if (current === '--') {
|
||||
positional.push(...args.slice(i + 1));
|
||||
break;
|
||||
}
|
||||
if (current.startsWith('--')) {
|
||||
const eqIdx = current.indexOf('=');
|
||||
const flagName = eqIdx >= 0 ? current.slice(0, eqIdx) : current;
|
||||
if (eqIdx < 0 && VALUE_FLAGS.has(flagName) && i + 1 < args.length) {
|
||||
i++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
positional.push(current);
|
||||
}
|
||||
return positional;
|
||||
return process.argv.includes(name);
|
||||
}
|
||||
|
||||
function parseProviders(s: string | undefined): Array<'claude' | 'gpt' | 'gemini'> {
|
||||
|
|
@ -103,7 +79,7 @@ function resolvePrompt(positional: string | undefined): string {
|
|||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
const positional = positionalArgs(CLI_ARGS)[0];
|
||||
const positional = process.argv.slice(2).find(a => !a.startsWith('--'));
|
||||
const prompt = resolvePrompt(positional);
|
||||
const providers = parseProviders(arg('--models'));
|
||||
const workdir = arg('--workdir', process.cwd())!;
|
||||
|
|
|
|||
|
|
@ -10,14 +10,7 @@
|
|||
//
|
||||
// Usage:
|
||||
// gstack-next-version --base <branch> --bump <major|minor|patch|micro> \
|
||||
// --current-version <X.Y.Z.W> [--workspace-root <path>|null] \
|
||||
// [--version-path <path>] [--json]
|
||||
//
|
||||
// VERSION path resolution (monorepo support):
|
||||
// 1. --version-path <path> CLI flag (highest priority)
|
||||
// 2. .gstack/version-path file at the repo root (single-line relative path,
|
||||
// committed so all collaborators benefit)
|
||||
// 3. "VERSION" at the repo root (default, backward-compatible)
|
||||
// --current-version <X.Y.Z.W> [--workspace-root <path>|null] [--json]
|
||||
//
|
||||
// Exit codes:
|
||||
// 0 — emitted JSON successfully (may include "offline":true or "host":"unknown")
|
||||
|
|
@ -52,7 +45,6 @@ type Output = {
|
|||
version: string;
|
||||
current_version: string;
|
||||
base_version: string;
|
||||
version_path: string;
|
||||
bump: Bump;
|
||||
host: "github" | "gitlab" | "unknown";
|
||||
offline: boolean;
|
||||
|
|
@ -122,28 +114,6 @@ function runCommand(cmd: string, args: string[], timeoutMs = 15000): { ok: boole
|
|||
};
|
||||
}
|
||||
|
||||
// VERSION-path resolution for monorepos. Priority: CLI flag > .gstack/version-path
|
||||
// at repo root > "VERSION". Pure function; takes the repo root as an argument so
|
||||
// tests can drive it with a fixture dir without mocking git.
|
||||
function resolveVersionPath(override: string | undefined, repoRoot: string): string {
|
||||
if (override) return override.trim();
|
||||
const configFile = join(repoRoot, ".gstack", "version-path");
|
||||
if (existsSync(configFile)) {
|
||||
try {
|
||||
const firstLine = readFileSync(configFile, "utf8").split("\n")[0]?.trim() ?? "";
|
||||
if (firstLine) return firstLine;
|
||||
} catch {
|
||||
// fall through to default
|
||||
}
|
||||
}
|
||||
return "VERSION";
|
||||
}
|
||||
|
||||
function repoToplevel(): string {
|
||||
const r = runCommand("git", ["rev-parse", "--show-toplevel"]);
|
||||
return r.ok ? r.stdout.trim() : process.cwd();
|
||||
}
|
||||
|
||||
function detectHost(): "github" | "gitlab" | "unknown" {
|
||||
const remote = runCommand("git", ["remote", "get-url", "origin"]);
|
||||
if (remote.ok) {
|
||||
|
|
@ -158,19 +128,19 @@ function detectHost(): "github" | "gitlab" | "unknown" {
|
|||
return "unknown";
|
||||
}
|
||||
|
||||
function readBaseVersion(base: string, versionPath: string, warnings: string[]): string {
|
||||
function readBaseVersion(base: string, warnings: string[]): string {
|
||||
// git fetch is best-effort; we tolerate failure and fall back to whatever
|
||||
// origin/<base> currently points at.
|
||||
runCommand("git", ["fetch", "origin", base, "--quiet"], 10000);
|
||||
const r = runCommand("git", ["show", `origin/${base}:${versionPath}`]);
|
||||
const r = runCommand("git", ["show", `origin/${base}:VERSION`]);
|
||||
if (!r.ok) {
|
||||
warnings.push(`could not read ${versionPath} at origin/${base}; assuming 0.0.0.0`);
|
||||
warnings.push(`could not read VERSION at origin/${base}; assuming 0.0.0.0`);
|
||||
return "0.0.0.0";
|
||||
}
|
||||
return r.stdout.trim();
|
||||
}
|
||||
|
||||
async function fetchGithubClaimed(base: string, versionPath: string, excludePR: number | null, warnings: string[]): Promise<{ claimed: ClaimedPR[]; offline: boolean }> {
|
||||
async function fetchGithubClaimed(base: string, excludePR: number | null, warnings: string[]): Promise<{ claimed: ClaimedPR[]; offline: boolean }> {
|
||||
const list = runCommand("gh", [
|
||||
"pr",
|
||||
"list",
|
||||
|
|
@ -217,18 +187,14 @@ async function fetchGithubClaimed(base: string, versionPath: string, excludePR:
|
|||
const pr = queue.shift();
|
||||
if (!pr) return;
|
||||
// gh passes branch name via argv, not shell — safe.
|
||||
// encodeURI handles spaces in subproject paths (e.g. "Tinas Second Brain/...")
|
||||
// while leaving "/" untouched so the GitHub Contents API gets the path intact.
|
||||
const content = runCommand("gh", [
|
||||
"api",
|
||||
`repos/{owner}/{repo}/contents/${encodeURI(versionPath)}?ref=${encodeURIComponent(pr.headRefName)}`,
|
||||
`repos/{owner}/{repo}/contents/VERSION?ref=${encodeURIComponent(pr.headRefName)}`,
|
||||
"-q",
|
||||
".content",
|
||||
]);
|
||||
if (!content.ok) {
|
||||
warnings.push(
|
||||
`PR #${pr.number}: could not fetch ${versionPath} (fork, private, or wrong path — try --version-path or .gstack/version-path)`,
|
||||
);
|
||||
warnings.push(`PR #${pr.number}: could not fetch VERSION (fork or private)`);
|
||||
continue;
|
||||
}
|
||||
let versionStr: string;
|
||||
|
|
@ -249,7 +215,7 @@ async function fetchGithubClaimed(base: string, versionPath: string, excludePR:
|
|||
return { claimed: results, offline: false };
|
||||
}
|
||||
|
||||
async function fetchGitlabClaimed(base: string, versionPath: string, excludePR: number | null, warnings: string[]): Promise<{ claimed: ClaimedPR[]; offline: boolean }> {
|
||||
async function fetchGitlabClaimed(base: string, excludePR: number | null, warnings: string[]): Promise<{ claimed: ClaimedPR[]; offline: boolean }> {
|
||||
const list = runCommand("glab", [
|
||||
"mr",
|
||||
"list",
|
||||
|
|
@ -277,15 +243,12 @@ async function fetchGitlabClaimed(base: string, versionPath: string, excludePR:
|
|||
}
|
||||
const results: ClaimedPR[] = [];
|
||||
for (const mr of mrs) {
|
||||
// GitLab files API takes the full path URL-encoded (slashes become %2F).
|
||||
const content = runCommand("glab", [
|
||||
"api",
|
||||
`projects/:id/repository/files/${encodeURIComponent(versionPath)}?ref=${encodeURIComponent(mr.source_branch)}`,
|
||||
`projects/:id/repository/files/VERSION?ref=${encodeURIComponent(mr.source_branch)}`,
|
||||
]);
|
||||
if (!content.ok) {
|
||||
warnings.push(
|
||||
`MR !${mr.iid}: could not fetch ${versionPath} (wrong path? — try --version-path or .gstack/version-path)`,
|
||||
);
|
||||
warnings.push(`MR !${mr.iid}: could not fetch VERSION`);
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
|
|
@ -322,7 +285,7 @@ function currentRepoSlug(): string {
|
|||
return m ? m[1] : "";
|
||||
}
|
||||
|
||||
function scanSiblings(root: string | null, versionPath: string, claimed: ClaimedPR[], warnings: string[]): Sibling[] {
|
||||
function scanSiblings(root: string | null, claimed: ClaimedPR[], warnings: string[]): Sibling[] {
|
||||
if (!root || !existsSync(root)) return [];
|
||||
const mySlug = currentRepoSlug();
|
||||
if (!mySlug) {
|
||||
|
|
@ -345,7 +308,7 @@ function scanSiblings(root: string | null, versionPath: string, claimed: Claimed
|
|||
continue;
|
||||
}
|
||||
if (!existsSync(join(p, ".git")) && !existsSync(join(p, ".git/HEAD"))) continue;
|
||||
const versionFile = join(p, versionPath);
|
||||
const versionFile = join(p, "VERSION");
|
||||
if (!existsSync(versionFile)) continue;
|
||||
let version: string;
|
||||
try {
|
||||
|
|
@ -383,13 +346,12 @@ function markActiveSiblings(siblings: Sibling[], baseVersion: Version): Sibling[
|
|||
});
|
||||
}
|
||||
|
||||
function parseArgs(argv: string[]): { base: string; bump: Bump; current: string; workspaceRoot?: string; excludePR: number | null; versionPath?: string; help: boolean } {
|
||||
function parseArgs(argv: string[]): { base: string; bump: Bump; current: string; workspaceRoot?: string; excludePR: number | null; help: boolean } {
|
||||
let base = "";
|
||||
let bump: Bump | "" = "";
|
||||
let current = "";
|
||||
let workspaceRoot: string | undefined;
|
||||
let excludePR: number | null = null;
|
||||
let versionPath: string | undefined;
|
||||
let help = false;
|
||||
for (let i = 0; i < argv.length; i++) {
|
||||
const a = argv[i];
|
||||
|
|
@ -397,7 +359,6 @@ function parseArgs(argv: string[]): { base: string; bump: Bump; current: string;
|
|||
else if (a === "--bump") bump = (argv[++i] ?? "") as Bump;
|
||||
else if (a === "--current-version") current = argv[++i] ?? "";
|
||||
else if (a === "--workspace-root") workspaceRoot = argv[++i];
|
||||
else if (a === "--version-path") versionPath = argv[++i];
|
||||
else if (a === "--exclude-pr") {
|
||||
const n = Number(argv[++i]);
|
||||
excludePR = Number.isFinite(n) && n > 0 ? n : null;
|
||||
|
|
@ -414,7 +375,7 @@ function parseArgs(argv: string[]): { base: string; bump: Bump; current: string;
|
|||
console.error(`Error: --bump must be major|minor|patch|micro (got ${bump})`);
|
||||
process.exit(2);
|
||||
}
|
||||
return { base, bump: bump as Bump, current, workspaceRoot, excludePR, versionPath, help: false };
|
||||
return { base, bump: bump as Bump, current, workspaceRoot, excludePR, help: false };
|
||||
}
|
||||
|
||||
// Auto-detect: if --exclude-pr wasn't passed, check whether the current branch
|
||||
|
|
@ -431,14 +392,13 @@ async function main() {
|
|||
const args = parseArgs(process.argv.slice(2));
|
||||
if (args.help) {
|
||||
console.log(
|
||||
"Usage: gstack-next-version --base <branch> --bump <level> --current-version <X.Y.Z.W> [--workspace-root <path|null>] [--version-path <path>]",
|
||||
"Usage: gstack-next-version --base <branch> --bump <level> --current-version <X.Y.Z.W> [--workspace-root <path|null>]",
|
||||
);
|
||||
process.exit(0);
|
||||
}
|
||||
const warnings: string[] = [];
|
||||
const host = detectHost();
|
||||
const versionPath = resolveVersionPath(args.versionPath, repoToplevel());
|
||||
const baseVersion = args.current || readBaseVersion(args.base, versionPath, warnings);
|
||||
const baseVersion = args.current || readBaseVersion(args.base, warnings);
|
||||
const baseParsed = parseVersion(baseVersion);
|
||||
if (!baseParsed) {
|
||||
console.error(`Error: could not parse base version '${baseVersion}'`);
|
||||
|
|
@ -453,9 +413,9 @@ async function main() {
|
|||
let claimed: ClaimedPR[] = [];
|
||||
let offline = false;
|
||||
if (host === "github") {
|
||||
({ claimed, offline } = await fetchGithubClaimed(args.base, versionPath, excludePR, warnings));
|
||||
({ claimed, offline } = await fetchGithubClaimed(args.base, excludePR, warnings));
|
||||
} else if (host === "gitlab") {
|
||||
({ claimed, offline } = await fetchGitlabClaimed(args.base, versionPath, excludePR, warnings));
|
||||
({ claimed, offline } = await fetchGitlabClaimed(args.base, excludePR, warnings));
|
||||
} else {
|
||||
warnings.push("host unknown; queue-awareness unavailable");
|
||||
}
|
||||
|
|
@ -473,7 +433,7 @@ async function main() {
|
|||
const { version: picked, reason } = pickNextSlot(baseParsed, claimedVersions, args.bump);
|
||||
|
||||
const workspaceRoot = resolveWorkspaceRoot(args.workspaceRoot);
|
||||
const siblings = markActiveSiblings(scanSiblings(workspaceRoot, versionPath, claimed, warnings), baseParsed);
|
||||
const siblings = markActiveSiblings(scanSiblings(workspaceRoot, claimed, warnings), baseParsed);
|
||||
const activeSiblings = siblings.filter((s) => s.is_active);
|
||||
|
||||
// If an active sibling outranks our pick, bump past it (same bump level).
|
||||
|
|
@ -493,7 +453,6 @@ async function main() {
|
|||
version: fmtVersion(finalVersion),
|
||||
current_version: args.current || baseVersion,
|
||||
base_version: baseVersion,
|
||||
version_path: versionPath,
|
||||
bump: args.bump,
|
||||
host,
|
||||
offline,
|
||||
|
|
@ -507,7 +466,7 @@ async function main() {
|
|||
}
|
||||
|
||||
// Pure-function exports for testing
|
||||
export { parseVersion, fmtVersion, bumpVersion, cmpVersion, pickNextSlot, markActiveSiblings, resolveVersionPath };
|
||||
export { parseVersion, fmtVersion, bumpVersion, cmpVersion, pickNextSlot, markActiveSiblings };
|
||||
|
||||
// Only run main() when invoked as a script, not when imported by tests.
|
||||
if (import.meta.main) {
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@
|
|||
# CI / container env where HOME may be unset.
|
||||
#
|
||||
# Chains:
|
||||
# GSTACK_STATE_ROOT: GSTACK_HOME -> CLAUDE_PLUGIN_DATA (only when CLAUDE_PLUGIN_ROOT=*gstack*) -> $HOME/.gstack -> .gstack
|
||||
# GSTACK_STATE_ROOT: GSTACK_HOME -> CLAUDE_PLUGIN_DATA -> $HOME/.gstack -> .gstack
|
||||
# PLAN_ROOT: GSTACK_PLAN_DIR -> CLAUDE_PLANS_DIR -> $HOME/.claude/plans -> .claude/plans
|
||||
# TMP_ROOT: TMPDIR -> TMP -> .gstack/tmp (and mkdir -p, best-effort)
|
||||
#
|
||||
|
|
@ -21,11 +21,7 @@ set -u
|
|||
# State root: where gstack writes projects/, sessions/, analytics/.
|
||||
if [ -n "${GSTACK_HOME:-}" ]; then
|
||||
_state_root="$GSTACK_HOME"
|
||||
elif [ -n "${CLAUDE_PLUGIN_DATA:-}" ] && echo "${CLAUDE_PLUGIN_ROOT:-}" | grep -qi "gstack"; then
|
||||
# Guard: only trust CLAUDE_PLUGIN_DATA when CLAUDE_PLUGIN_ROOT confirms we are
|
||||
# running as the gstack plugin. Without this, a CLAUDE_PLUGIN_DATA from another
|
||||
# plugin (e.g. codex) that leaked into the session env via CLAUDE_ENV_FILE would
|
||||
# be picked up, writing all gstack state into the wrong directory.
|
||||
elif [ -n "${CLAUDE_PLUGIN_DATA:-}" ]; then
|
||||
_state_root="$CLAUDE_PLUGIN_DATA"
|
||||
elif [ -n "${HOME:-}" ]; then
|
||||
_state_root="$HOME/.gstack"
|
||||
|
|
|
|||
|
|
@ -28,8 +28,7 @@
|
|||
set -euo pipefail
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
eval "$("$SCRIPT_DIR/gstack-slug" 2>/dev/null)"
|
||||
# GSTACK_STATE_ROOT takes precedence over GSTACK_HOME (test isolation per D16).
|
||||
GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}"
|
||||
GSTACK_HOME="${GSTACK_HOME:-$HOME/.gstack}"
|
||||
mkdir -p "$GSTACK_HOME/projects/$SLUG"
|
||||
|
||||
INPUT="$1"
|
||||
|
|
@ -50,48 +49,12 @@ if (!j.skill || !/^[a-z0-9-]+\$/.test(j.skill)) {
|
|||
process.exit(1);
|
||||
}
|
||||
|
||||
// Required: question_id (kebab-case, <=64 chars).
|
||||
// Cathedral T5: hook-sourced events use 'hook-<10-char-hash>' which is
|
||||
// kebab-case-compatible and passes the same regex.
|
||||
// Required: question_id (kebab-case, <=64 chars)
|
||||
if (!j.question_id || !/^[a-z0-9-]+\$/.test(j.question_id) || j.question_id.length > 64) {
|
||||
process.stderr.write('gstack-question-log: invalid question_id, must be kebab-case <=64 chars\n');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Optional: source — tags which writer produced this event.
|
||||
// 'agent' (default) — preamble-driven write from inside the running agent
|
||||
// 'hook' — PostToolUse hook captured it deterministically (T5)
|
||||
// 'auq-other' — user picked 'Other' and typed free text (Layer 8)
|
||||
// 'auto-decided' — PreToolUse enforcement hook substituted the answer (T6)
|
||||
// 'codex-import-marker' / 'codex-import-pattern' — T9 backfill from Codex
|
||||
const ALLOWED_SOURCES = ['agent', 'hook', 'auq-other', 'auto-decided', 'codex-import-marker', 'codex-import-pattern'];
|
||||
if (j.source !== undefined) {
|
||||
if (!ALLOWED_SOURCES.includes(j.source)) {
|
||||
process.stderr.write('gstack-question-log: invalid source, must be one of: ' + ALLOWED_SOURCES.join(', ') + '\n');
|
||||
process.exit(1);
|
||||
}
|
||||
} else {
|
||||
j.source = 'agent';
|
||||
}
|
||||
|
||||
// Optional: tool_use_id — Claude Code hook stdin field; used for dedup.
|
||||
if (j.tool_use_id !== undefined) {
|
||||
if (typeof j.tool_use_id !== 'string' || j.tool_use_id.length > 128) {
|
||||
process.stderr.write('gstack-question-log: tool_use_id must be string <=128 chars\n');
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Optional: free_text — sanitize (no newlines, <=300 chars).
|
||||
if (j.free_text !== undefined) {
|
||||
if (typeof j.free_text !== 'string') {
|
||||
process.stderr.write('gstack-question-log: free_text must be string\n');
|
||||
process.exit(1);
|
||||
}
|
||||
if (j.free_text.length > 300) j.free_text = j.free_text.slice(0, 300);
|
||||
j.free_text = j.free_text.replace(/\n+/g, ' ');
|
||||
}
|
||||
|
||||
// Required: question_summary (non-empty, <=200 chars, no newlines)
|
||||
if (typeof j.question_summary !== 'string' || !j.question_summary.length) {
|
||||
process.stderr.write('gstack-question-log: question_summary required\n');
|
||||
|
|
@ -201,49 +164,7 @@ if [ $VALIDATE_RC -ne 0 ] || [ -z "$VALIDATED" ]; then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
LOG_FILE="$GSTACK_HOME/projects/$SLUG/question-log.jsonl"
|
||||
|
||||
# Cathedral T5: composite-source dedup. If this exact (source, tool_use_id)
|
||||
# was already logged within the last 100 lines, skip — protects against
|
||||
# hook + agent both writing the same fire (D3 plan-tune cathedral decision).
|
||||
# Lookup is bounded so the bin stays cheap on hot paths.
|
||||
DEDUP_SKIP=""
|
||||
if [ -f "$LOG_FILE" ]; then
|
||||
DEDUP_SKIP=$(VALIDATED_JSON="$VALIDATED" LOG_FILE_PATH="$LOG_FILE" bun -e '
|
||||
const fs = require("fs");
|
||||
const j = JSON.parse(process.env.VALIDATED_JSON);
|
||||
if (!j.tool_use_id) { console.log(""); process.exit(0); }
|
||||
const want = j.source + ":" + j.tool_use_id;
|
||||
const lines = fs.readFileSync(process.env.LOG_FILE_PATH, "utf-8").trim().split("\n").slice(-100);
|
||||
for (const ln of lines) {
|
||||
try {
|
||||
const p = JSON.parse(ln);
|
||||
if (p.source && p.tool_use_id && (p.source + ":" + p.tool_use_id) === want) {
|
||||
console.log("dup");
|
||||
process.exit(0);
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
console.log("");
|
||||
' 2>/dev/null)
|
||||
fi
|
||||
|
||||
if [ "$DEDUP_SKIP" = "dup" ]; then
|
||||
echo "DEDUP: skipped (source=$(echo "$VALIDATED" | bun -e 'const j=JSON.parse(await Bun.stdin.text()); console.log(j.source);'), tool_use_id duplicate)"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "$VALIDATED" >> "$LOG_FILE"
|
||||
|
||||
# Cathedral T5: fire-and-forget --derive so inferred dimensions stay current
|
||||
# without per-event latency (D17). Sub-second op; output suppressed; never
|
||||
# blocks the hook caller. Skipped via GSTACK_QUESTION_LOG_NO_DERIVE=1 for
|
||||
# tests that don't want the side effect.
|
||||
if [ -z "${GSTACK_QUESTION_LOG_NO_DERIVE:-}" ]; then
|
||||
(
|
||||
nohup "$SCRIPT_DIR/gstack-developer-profile" --derive >/dev/null 2>&1 &
|
||||
) >/dev/null 2>&1
|
||||
fi
|
||||
echo "$VALIDATED" >> "$GSTACK_HOME/projects/$SLUG/question-log.jsonl"
|
||||
|
||||
# NOTE: question-log.jsonl is deliberately NOT enqueued for gbrain-sync.
|
||||
# Per Codex v2 review, audit/derivation data stays local alongside the
|
||||
|
|
|
|||
|
|
@ -23,8 +23,7 @@ set -euo pipefail
|
|||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
# GSTACK_STATE_ROOT takes precedence over GSTACK_HOME (test isolation per D16).
|
||||
GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}"
|
||||
GSTACK_HOME="${GSTACK_HOME:-$HOME/.gstack}"
|
||||
eval "$("$SCRIPT_DIR/gstack-slug" 2>/dev/null || true)"
|
||||
SLUG="${SLUG:-unknown}"
|
||||
PREF_FILE="$GSTACK_HOME/projects/$SLUG/question-preferences.json"
|
||||
|
|
@ -69,21 +68,6 @@ do_check() {
|
|||
return;
|
||||
}
|
||||
|
||||
// Split-chain carve-out: per-option calls in N-option splits emit
|
||||
// question_ids of the form <skill>-split-<option-slug>. These are
|
||||
// NEVER AUTO_DECIDE-eligible regardless of stored preferences — the
|
||||
// whole point of splitting is restoring user sovereignty over the
|
||||
// option set. See scripts/resolvers/preamble/generate-ask-user-format.ts
|
||||
// \"Handling 5+ options — split, never drop\" for the surrounding
|
||||
// mechanism that generates these ids.
|
||||
if (/-split-/.test(qid)) {
|
||||
console.log('ASK_NORMALLY');
|
||||
if (pref === 'never-ask' || pref === 'ask-only-for-one-way') {
|
||||
console.log('NOTE: split-chain per-option calls always ASK_NORMALLY; your ' + pref + ' preference does not apply to options inside a sequential split.');
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
switch (pref) {
|
||||
case 'never-ask':
|
||||
console.log('AUTO_DECIDE');
|
||||
|
|
|
|||
|
|
@ -1,228 +0,0 @@
|
|||
#!/usr/bin/env bun
|
||||
/**
|
||||
* gstack-redact — scan text for secrets/PII/legal content via the shared engine.
|
||||
*
|
||||
* Skill-facing CLI over lib/redact-engine.ts. Reads from stdin (default) or
|
||||
* --from-file, scans, and prints findings as JSON (--json) or a human table.
|
||||
*
|
||||
* Exit codes (consumed by skill bash to gate dispatch/file/edit/commit):
|
||||
* 0 clean (no HIGH, no MEDIUM)
|
||||
* 2 MEDIUM present (no HIGH) — skill runs the per-finding AskUserQuestion
|
||||
* 3 HIGH present — skill blocks
|
||||
*
|
||||
* WARN findings (tool-fence-degraded credentials) never change the exit code.
|
||||
*
|
||||
* Flags:
|
||||
* --json Emit JSON {findings, counts, repoVisibility, oversize}
|
||||
* --repo-visibility V public | private | unknown (default unknown=public-strict wording)
|
||||
* --from-file PATH Read input from PATH instead of stdin
|
||||
* --allowlist PATH Newline-delimited exact spans to suppress
|
||||
* --self-email EMAIL Suppress this email (the invoking user's own)
|
||||
* --repo-public-emails PATH Newline-delimited repo-public emails to suppress
|
||||
* --auto-redact IDS Comma-separated finding ids to auto-redact;
|
||||
* prints the redacted body to stdout + diff to stderr.
|
||||
* --max-bytes N Override the fail-closed size cap (default 1 MiB).
|
||||
*
|
||||
* Security note: this is a GUARDRAIL, not airtight enforcement. A determined
|
||||
* user can always bypass it (direct gh/git). It catches accidents.
|
||||
*/
|
||||
import * as fs from "fs";
|
||||
import * as path from "path";
|
||||
import { spawnSync } from "child_process";
|
||||
import {
|
||||
scan,
|
||||
applyRedactions,
|
||||
exitCodeFor,
|
||||
type RepoVisibility,
|
||||
type ScanOptions,
|
||||
type Finding,
|
||||
} from "../lib/redact-engine";
|
||||
|
||||
const MAX_STDIN_BYTES = 16 * 1024 * 1024; // hard ceiling before the engine cap
|
||||
|
||||
// ── pre-push hook install/uninstall (chains any existing hook) ────────────────
|
||||
|
||||
const MANAGED_MARKER = "# gstack-redact pre-push (managed)";
|
||||
|
||||
function hooksPath(): string {
|
||||
const r = spawnSync("git", ["rev-parse", "--git-path", "hooks"], { encoding: "utf8" });
|
||||
if (r.status !== 0) {
|
||||
process.stderr.write("gstack-redact: not in a git repo\n");
|
||||
process.exit(1);
|
||||
}
|
||||
return r.stdout.trim();
|
||||
}
|
||||
|
||||
function installPrepushHook(): void {
|
||||
const dir = hooksPath();
|
||||
fs.mkdirSync(dir, { recursive: true });
|
||||
const hookPath = path.join(dir, "pre-push");
|
||||
const prepushBin = path.join(import.meta.dir, "gstack-redact-prepush");
|
||||
|
||||
// If a non-managed hook exists, preserve it as pre-push.local and chain it.
|
||||
if (fs.existsSync(hookPath)) {
|
||||
const existing = fs.readFileSync(hookPath, "utf8");
|
||||
if (existing.includes(MANAGED_MARKER)) {
|
||||
process.stdout.write("gstack-redact: pre-push hook already installed.\n");
|
||||
return;
|
||||
}
|
||||
const localPath = path.join(dir, "pre-push.local");
|
||||
fs.renameSync(hookPath, localPath);
|
||||
fs.chmodSync(localPath, 0o755);
|
||||
process.stdout.write("gstack-redact: preserved existing hook as pre-push.local (chained).\n");
|
||||
}
|
||||
|
||||
// stdin is single-consume: capture it once, feed both the chained hook and ours.
|
||||
const wrapper = `#!/usr/bin/env bash
|
||||
${MANAGED_MARKER}
|
||||
set -euo pipefail
|
||||
_input="$(cat)"
|
||||
_local="$(git rev-parse --git-path hooks/pre-push.local)"
|
||||
if [ -x "$_local" ]; then
|
||||
printf '%s' "$_input" | "$_local" "$@" || exit $?
|
||||
fi
|
||||
printf '%s' "$_input" | bun "${prepushBin}" "$@"
|
||||
`;
|
||||
fs.writeFileSync(hookPath, wrapper, { mode: 0o755 });
|
||||
fs.chmodSync(hookPath, 0o755);
|
||||
process.stdout.write(`gstack-redact: installed pre-push hook at ${hookPath}\n`);
|
||||
}
|
||||
|
||||
function uninstallPrepushHook(): void {
|
||||
const dir = hooksPath();
|
||||
const hookPath = path.join(dir, "pre-push");
|
||||
const localPath = path.join(dir, "pre-push.local");
|
||||
if (!fs.existsSync(hookPath) || !fs.readFileSync(hookPath, "utf8").includes(MANAGED_MARKER)) {
|
||||
process.stdout.write("gstack-redact: no managed pre-push hook to remove.\n");
|
||||
return;
|
||||
}
|
||||
if (fs.existsSync(localPath)) {
|
||||
fs.renameSync(localPath, hookPath); // restore the chained original
|
||||
process.stdout.write("gstack-redact: removed managed hook, restored pre-push.local.\n");
|
||||
} else {
|
||||
fs.unlinkSync(hookPath);
|
||||
process.stdout.write("gstack-redact: removed managed pre-push hook.\n");
|
||||
}
|
||||
}
|
||||
|
||||
function arg(name: string): string | undefined {
|
||||
const i = process.argv.indexOf(name);
|
||||
return i >= 0 ? process.argv[i + 1] : undefined;
|
||||
}
|
||||
function flag(name: string): boolean {
|
||||
return process.argv.includes(name);
|
||||
}
|
||||
|
||||
function readInput(): string {
|
||||
const file = arg("--from-file");
|
||||
if (file) {
|
||||
const st = fs.statSync(file);
|
||||
if (st.size > MAX_STDIN_BYTES) {
|
||||
// Don't even read it — fail closed at the CLI boundary.
|
||||
process.stderr.write(`gstack-redact: input file too large (${st.size} bytes)\n`);
|
||||
process.exit(3);
|
||||
}
|
||||
return fs.readFileSync(file, "utf8");
|
||||
}
|
||||
// stdin
|
||||
const chunks: Buffer[] = [];
|
||||
let total = 0;
|
||||
const fd = 0;
|
||||
const buf = Buffer.alloc(65536);
|
||||
while (true) {
|
||||
let n = 0;
|
||||
try {
|
||||
n = fs.readSync(fd, buf, 0, buf.length, null);
|
||||
} catch (e: any) {
|
||||
if (e.code === "EAGAIN") continue;
|
||||
if (e.code === "EOF") break;
|
||||
throw e;
|
||||
}
|
||||
if (n === 0) break;
|
||||
total += n;
|
||||
if (total > MAX_STDIN_BYTES) {
|
||||
process.stderr.write("gstack-redact: stdin too large\n");
|
||||
process.exit(3);
|
||||
}
|
||||
chunks.push(Buffer.from(buf.subarray(0, n)));
|
||||
}
|
||||
return Buffer.concat(chunks).toString("utf8");
|
||||
}
|
||||
|
||||
function readLines(path: string | undefined): string[] | undefined {
|
||||
if (!path || !fs.existsSync(path)) return undefined;
|
||||
return fs
|
||||
.readFileSync(path, "utf8")
|
||||
.split("\n")
|
||||
.map((l) => l.trim())
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
function buildOpts(): ScanOptions {
|
||||
const vis = (arg("--repo-visibility") as RepoVisibility) || "unknown";
|
||||
const maxBytes = arg("--max-bytes");
|
||||
return {
|
||||
repoVisibility: ["public", "private", "unknown"].includes(vis) ? vis : "unknown",
|
||||
allowlist: readLines(arg("--allowlist")),
|
||||
selfEmail: arg("--self-email"),
|
||||
repoPublicEmails: readLines(arg("--repo-public-emails")),
|
||||
...(maxBytes ? { maxBytes: parseInt(maxBytes, 10) } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
function humanTable(findings: Finding[]): string {
|
||||
if (!findings.length) return " (no findings)";
|
||||
const rows = findings.map(
|
||||
(f) =>
|
||||
` ${f.severity.padEnd(6)} ${f.id.padEnd(24)} ${String(f.line).padStart(4)}:${String(
|
||||
f.col,
|
||||
).padEnd(3)} ${f.preview}`,
|
||||
);
|
||||
return rows.join("\n");
|
||||
}
|
||||
|
||||
function main() {
|
||||
// Subcommands (positional, not flags).
|
||||
const sub = process.argv[2];
|
||||
if (sub === "install-prepush-hook") return installPrepushHook();
|
||||
if (sub === "uninstall-prepush-hook") return uninstallPrepushHook();
|
||||
|
||||
const opts = buildOpts();
|
||||
const input = readInput();
|
||||
|
||||
// Auto-redact mode: print redacted body to stdout, diff to stderr, exit 0.
|
||||
const autoIds = arg("--auto-redact");
|
||||
if (autoIds) {
|
||||
const { body, diff, skipped } = applyRedactions(input, autoIds.split(","), opts);
|
||||
process.stdout.write(body);
|
||||
if (diff) process.stderr.write(diff + "\n");
|
||||
if (skipped.length) {
|
||||
process.stderr.write(
|
||||
`\ngstack-redact: ${skipped.length} finding(s) could not be auto-redacted (structural) — edit manually:\n` +
|
||||
skipped.map((f) => ` ${f.id} @ ${f.line}:${f.col}`).join("\n") +
|
||||
"\n",
|
||||
);
|
||||
}
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const result = scan(input, opts);
|
||||
const code = exitCodeFor(result);
|
||||
|
||||
if (flag("--json")) {
|
||||
process.stdout.write(JSON.stringify(result, null, 2) + "\n");
|
||||
} else {
|
||||
const vis = result.repoVisibility.toUpperCase();
|
||||
process.stdout.write(`gstack-redact scan — repo ${vis}\n`);
|
||||
if (result.oversize) {
|
||||
process.stdout.write(" BLOCKED — input too large to scan safely (fail-closed)\n");
|
||||
} else {
|
||||
process.stdout.write(humanTable(result.findings) + "\n");
|
||||
const { HIGH, MEDIUM, LOW, WARN } = result.counts;
|
||||
process.stdout.write(` HIGH=${HIGH} MEDIUM=${MEDIUM} LOW=${LOW} WARN=${WARN}\n`);
|
||||
}
|
||||
}
|
||||
process.exit(code);
|
||||
}
|
||||
|
||||
main();
|
||||
|
|
@ -1,146 +0,0 @@
|
|||
#!/usr/bin/env bun
|
||||
/**
|
||||
* gstack-redact-prepush — git pre-push hook that scans the diff being pushed for
|
||||
* HIGH-severity credentials and blocks the push on a hit.
|
||||
*
|
||||
* THIS IS A GUARDRAIL, NOT ENFORCEMENT. `git push --no-verify` bypasses it, as
|
||||
* does `GSTACK_REDACT_PREPUSH=skip`. It catches accidental credential pushes,
|
||||
* the most common real-world leak. It does NOT scan history, binary/LFS/submodule
|
||||
* files, or non-added lines. History scanning is /cso's job.
|
||||
*
|
||||
* Git pre-push interface: refs are read from STDIN, one per line:
|
||||
* <local ref> <local sha> <remote ref> <remote sha>
|
||||
* We scan the ADDED lines of <remote sha>..<local sha> per ref (what's being
|
||||
* pushed). Special cases:
|
||||
* - remote sha all-zeroes → new branch: diff against merge-base with the
|
||||
* remote's default branch (fallback: scan all commits unique to local ref).
|
||||
* - local sha all-zeroes → branch delete: nothing to scan, skip.
|
||||
* - force-push → remote..local still gives the net new content.
|
||||
*
|
||||
* Behavior:
|
||||
* - HIGH finding in added lines → print + exit 1 (block), for public AND private.
|
||||
* - MEDIUM → warn (non-blocking). LOW/WARN → silent.
|
||||
* - GSTACK_REDACT_PREPUSH=skip → log + exit 0 (escape valve).
|
||||
*
|
||||
* Installed/uninstalled via `gstack-redact install-prepush-hook` (see the
|
||||
* gstack-redact CLI), which chains any pre-existing hook.
|
||||
*/
|
||||
import { spawnSync } from "child_process";
|
||||
import * as fs from "fs";
|
||||
import * as os from "os";
|
||||
import * as path from "path";
|
||||
import { scan, type Finding } from "../lib/redact-engine";
|
||||
|
||||
const ZERO = /^0+$/;
|
||||
// The canonical empty-tree object; diffing against it yields all content as added.
|
||||
const EMPTY_TREE = "4b825dc642cb6eb9a060e54bf8d69288fbee4904";
|
||||
|
||||
function git(args: string[]): string {
|
||||
const r = spawnSync("git", args, { encoding: "utf8", maxBuffer: 64 * 1024 * 1024 });
|
||||
return r.status === 0 ? (r.stdout ?? "") : "";
|
||||
}
|
||||
|
||||
function defaultRemoteBranch(): string {
|
||||
// origin/HEAD → origin/main, fall back to main/master.
|
||||
const sym = git(["symbolic-ref", "refs/remotes/origin/HEAD"]).trim();
|
||||
if (sym) return sym.replace("refs/remotes/", "");
|
||||
for (const b of ["origin/main", "origin/master"]) {
|
||||
if (git(["rev-parse", "--verify", b]).trim()) return b;
|
||||
}
|
||||
return "origin/main";
|
||||
}
|
||||
|
||||
/** Return the added-line text for a ref update being pushed. */
|
||||
function addedLinesFor(localSha: string, remoteSha: string): string {
|
||||
let range: string;
|
||||
if (ZERO.test(remoteSha)) {
|
||||
// New branch: prefer what's unique to localSha vs the remote default branch.
|
||||
// With no merge-base (e.g. no remote yet), diff against the empty tree so ALL
|
||||
// branch content is scanned as added — fail-safe (scans more, never less).
|
||||
const base = git(["merge-base", localSha, defaultRemoteBranch()]).trim();
|
||||
range = base ? `${base}..${localSha}` : `${EMPTY_TREE}..${localSha}`;
|
||||
} else {
|
||||
// Existing branch (incl. force-push): net new content remote..local.
|
||||
range = `${remoteSha}..${localSha}`;
|
||||
}
|
||||
// -U0: only changed lines; we keep lines starting with '+' (added), drop the
|
||||
// +++ file header. Unified diff added lines start with a single '+'.
|
||||
const diff = git(["diff", "--unified=0", "--no-color", range]);
|
||||
const added: string[] = [];
|
||||
for (const line of diff.split("\n")) {
|
||||
if (line.startsWith("+") && !line.startsWith("+++")) {
|
||||
added.push(line.slice(1));
|
||||
}
|
||||
}
|
||||
return added.join("\n");
|
||||
}
|
||||
|
||||
function logSkip(reason: string): void {
|
||||
try {
|
||||
const home = process.env.GSTACK_HOME || path.join(os.homedir(), ".gstack");
|
||||
const dir = path.join(home, "security");
|
||||
fs.mkdirSync(dir, { recursive: true });
|
||||
fs.appendFileSync(
|
||||
path.join(dir, "prepush-skip.jsonl"),
|
||||
JSON.stringify({ ts: new Date().toISOString(), reason }) + "\n",
|
||||
);
|
||||
} catch {
|
||||
// best-effort; never block a push because logging failed
|
||||
}
|
||||
}
|
||||
|
||||
function main() {
|
||||
if ((process.env.GSTACK_REDACT_PREPUSH || "").toLowerCase() === "skip") {
|
||||
logSkip(process.env.GSTACK_REDACT_PREPUSH_REASON || "env-skip");
|
||||
process.stderr.write("gstack-redact-prepush: skipped via GSTACK_REDACT_PREPUSH=skip\n");
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const stdin = fs.readFileSync(0, "utf8");
|
||||
const refs = stdin
|
||||
.split("\n")
|
||||
.map((l) => l.trim())
|
||||
.filter(Boolean)
|
||||
.map((l) => l.split(/\s+/));
|
||||
|
||||
const allHigh: Finding[] = [];
|
||||
let mediumCount = 0;
|
||||
|
||||
for (const [, localSha, , remoteSha] of refs) {
|
||||
if (!localSha || ZERO.test(localSha)) continue; // branch delete → nothing pushed
|
||||
const added = addedLinesFor(localSha, remoteSha || "0");
|
||||
if (!added.trim()) continue;
|
||||
// Visibility doesn't change HIGH behavior; pass private so nothing is treated
|
||||
// as public-strict (HIGH blocks regardless either way).
|
||||
const result = scan(added, { repoVisibility: "private" });
|
||||
for (const f of result.findings) {
|
||||
if (f.severity === "HIGH") allHigh.push(f);
|
||||
else if (f.severity === "MEDIUM") mediumCount++;
|
||||
}
|
||||
}
|
||||
|
||||
if (mediumCount > 0) {
|
||||
process.stderr.write(
|
||||
`gstack-redact-prepush: ${mediumCount} MEDIUM finding(s) in pushed diff (PII/internal). ` +
|
||||
"Not blocking. Review before this becomes public.\n",
|
||||
);
|
||||
}
|
||||
|
||||
if (allHigh.length > 0) {
|
||||
process.stderr.write(
|
||||
"\n⛔ gstack-redact-prepush BLOCKED the push — credential(s) in the pushed diff:\n\n",
|
||||
);
|
||||
for (const f of allHigh) {
|
||||
process.stderr.write(` HIGH ${f.id} ${f.preview}\n`);
|
||||
}
|
||||
process.stderr.write(
|
||||
"\nRotate the credential (a pushed secret is compromised) and remove it from the diff.\n" +
|
||||
"This is a guardrail: `git push --no-verify` or `GSTACK_REDACT_PREPUSH=skip git push` bypass it.\n",
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
main();
|
||||
|
|
@ -46,17 +46,6 @@ _cleanup_skill_entry() {
|
|||
fi
|
||||
}
|
||||
|
||||
_link_root_skill_alias() {
|
||||
local target="$SKILLS_DIR/_gstack-command"
|
||||
|
||||
[ -f "$INSTALL_DIR/SKILL.md" ] || return 0
|
||||
[ -L "$target" ] && rm -f "$target"
|
||||
mkdir -p "$target"
|
||||
ln -snf "$INSTALL_DIR/SKILL.md" "$target/SKILL.md"
|
||||
}
|
||||
|
||||
_link_root_skill_alias
|
||||
|
||||
# Discover skills (directories with SKILL.md, excluding meta dirs)
|
||||
SKILL_COUNT=0
|
||||
for skill_dir in "$INSTALL_DIR"/*/; do
|
||||
|
|
|
|||
|
|
@ -1,44 +1,21 @@
|
|||
#!/usr/bin/env bash
|
||||
# gstack-settings-hook — manage Claude Code hooks in ~/.claude/settings.json
|
||||
# gstack-settings-hook — add/remove SessionStart hooks in Claude Code settings.json
|
||||
#
|
||||
# Two shapes:
|
||||
#
|
||||
# 1. Legacy (SessionStart only — used by setup --team and gstack-uninstall):
|
||||
# gstack-settings-hook add <cmd> # adds SessionStart hook
|
||||
# gstack-settings-hook remove <cmd> # removes matching SessionStart hook
|
||||
#
|
||||
# 2. Schema-aware (plan-tune cathedral T3 — supports PreToolUse + PostToolUse):
|
||||
# gstack-settings-hook add-event --event <SessionStart|PreToolUse|PostToolUse> \
|
||||
# --command <cmd> --source <tag> [--matcher <regex>] [--timeout <s>]
|
||||
# gstack-settings-hook remove-source --source <tag>
|
||||
# gstack-settings-hook diff-event --event ... --command ... --source ... [--matcher ...]
|
||||
# gstack-settings-hook rollback # restore latest backup
|
||||
# gstack-settings-hook list-sources # show all gstack-tagged hook entries
|
||||
#
|
||||
# Every add-event/remove-source writes a backup to ~/.claude/settings.json.bak.<ts>
|
||||
# before mutating (Codex correction — silent settings.json mutation is wrong).
|
||||
#
|
||||
# Dedup: legacy `add`/`remove` dedupe by the historical `gstack-session-update`
|
||||
# substring. Schema-aware `add-event` dedupes by (event, matcher, _gstack_source) so
|
||||
# multiple gstack registrations (plan-tune, ...) don't collide.
|
||||
# Usage:
|
||||
# gstack-settings-hook add <hook-command> # add SessionStart hook
|
||||
# gstack-settings-hook remove <hook-command> # remove SessionStart hook
|
||||
#
|
||||
# Requires: bun (already a gstack hard dependency)
|
||||
# Writes atomically: .tmp + rename to prevent corruption on crash/disk-full.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
ACTION="${1:-}"
|
||||
HOOK_CMD="${2:-}"
|
||||
SETTINGS_FILE="${GSTACK_SETTINGS_FILE:-$HOME/.claude/settings.json}"
|
||||
|
||||
if [ -z "$ACTION" ]; then
|
||||
cat <<EOF >&2
|
||||
Usage:
|
||||
gstack-settings-hook add <hook-command> # legacy SessionStart add
|
||||
gstack-settings-hook remove <hook-command> # legacy SessionStart remove
|
||||
gstack-settings-hook add-event --event <name> --command <cmd> --source <tag> [--matcher <re>] [--timeout <s>]
|
||||
gstack-settings-hook remove-source --source <tag>
|
||||
gstack-settings-hook diff-event --event <name> --command <cmd> --source <tag> [--matcher <re>] [--timeout <s>]
|
||||
gstack-settings-hook rollback
|
||||
gstack-settings-hook list-sources
|
||||
EOF
|
||||
if [ -z "$ACTION" ] || [ -z "$HOOK_CMD" ]; then
|
||||
echo "Usage: gstack-settings-hook {add|remove} <hook-command>" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
|
@ -47,239 +24,59 @@ if ! command -v bun >/dev/null 2>&1; then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
backup_settings() {
|
||||
if [ -f "$SETTINGS_FILE" ]; then
|
||||
local ts
|
||||
ts=$(date +%Y%m%d-%H%M%S)
|
||||
cp "$SETTINGS_FILE" "$SETTINGS_FILE.bak.$ts"
|
||||
echo "$SETTINGS_FILE.bak.$ts" > "$SETTINGS_FILE.bak-latest"
|
||||
fi
|
||||
}
|
||||
|
||||
# --- legacy SessionStart add/remove (backwards compat) -----------------
|
||||
|
||||
case "$ACTION" in
|
||||
add)
|
||||
HOOK_CMD="${2:-}"
|
||||
if [ -z "$HOOK_CMD" ]; then
|
||||
echo "Usage: gstack-settings-hook add <hook-command>" >&2
|
||||
exit 1
|
||||
fi
|
||||
backup_settings
|
||||
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" GSTACK_HOOK_CMD="$HOOK_CMD" bun -e '
|
||||
const fs = require("fs");
|
||||
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" GSTACK_HOOK_CMD="$HOOK_CMD" bun -e "
|
||||
const fs = require('fs');
|
||||
const settingsPath = process.env.GSTACK_SETTINGS_PATH;
|
||||
const hookCmd = process.env.GSTACK_HOOK_CMD;
|
||||
|
||||
let settings = {};
|
||||
try { settings = JSON.parse(fs.readFileSync(settingsPath, "utf8")); } catch {}
|
||||
try { settings = JSON.parse(fs.readFileSync(settingsPath, 'utf8')); } catch {}
|
||||
|
||||
if (!settings.hooks) settings.hooks = {};
|
||||
if (!settings.hooks.SessionStart) settings.hooks.SessionStart = [];
|
||||
|
||||
// Dedup: check if hook command already registered
|
||||
const exists = settings.hooks.SessionStart.some(entry =>
|
||||
entry.hooks && entry.hooks.some(h => h.command && h.command.includes("gstack-session-update"))
|
||||
entry.hooks && entry.hooks.some(h => h.command && h.command.includes('gstack-session-update'))
|
||||
);
|
||||
|
||||
if (!exists) {
|
||||
settings.hooks.SessionStart.push({
|
||||
hooks: [{ type: "command", command: hookCmd }]
|
||||
hooks: [{ type: 'command', command: hookCmd }]
|
||||
});
|
||||
}
|
||||
const tmp = settingsPath + ".tmp";
|
||||
fs.writeFileSync(tmp, JSON.stringify(settings, null, 2) + "\n");
|
||||
fs.renameSync(tmp, settingsPath);
|
||||
' 2>/dev/null
|
||||
;;
|
||||
|
||||
const tmp = settingsPath + '.tmp';
|
||||
fs.writeFileSync(tmp, JSON.stringify(settings, null, 2) + '\n');
|
||||
fs.renameSync(tmp, settingsPath);
|
||||
" 2>/dev/null
|
||||
;;
|
||||
remove)
|
||||
HOOK_CMD="${2:-}"
|
||||
if [ -z "$HOOK_CMD" ]; then
|
||||
echo "Usage: gstack-settings-hook remove <hook-command>" >&2
|
||||
exit 1
|
||||
fi
|
||||
[ -f "$SETTINGS_FILE" ] || exit 1
|
||||
backup_settings
|
||||
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" bun -e '
|
||||
const fs = require("fs");
|
||||
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" bun -e "
|
||||
const fs = require('fs');
|
||||
const settingsPath = process.env.GSTACK_SETTINGS_PATH;
|
||||
|
||||
let settings = {};
|
||||
try { settings = JSON.parse(fs.readFileSync(settingsPath, "utf8")); } catch { process.exit(0); }
|
||||
try { settings = JSON.parse(fs.readFileSync(settingsPath, 'utf8')); } catch { process.exit(0); }
|
||||
|
||||
if (settings.hooks && settings.hooks.SessionStart) {
|
||||
settings.hooks.SessionStart = settings.hooks.SessionStart.filter(entry =>
|
||||
!(entry.hooks && entry.hooks.some(h => h.command && h.command.includes("gstack-session-update")))
|
||||
!(entry.hooks && entry.hooks.some(h => h.command && h.command.includes('gstack-session-update')))
|
||||
);
|
||||
if (settings.hooks.SessionStart.length === 0) delete settings.hooks.SessionStart;
|
||||
if (Object.keys(settings.hooks).length === 0) delete settings.hooks;
|
||||
}
|
||||
const tmp = settingsPath + ".tmp";
|
||||
fs.writeFileSync(tmp, JSON.stringify(settings, null, 2) + "\n");
|
||||
|
||||
const tmp = settingsPath + '.tmp';
|
||||
fs.writeFileSync(tmp, JSON.stringify(settings, null, 2) + '\n');
|
||||
fs.renameSync(tmp, settingsPath);
|
||||
' 2>/dev/null
|
||||
" 2>/dev/null
|
||||
;;
|
||||
|
||||
add-event|diff-event)
|
||||
EVENT=""
|
||||
COMMAND=""
|
||||
SOURCE=""
|
||||
MATCHER=""
|
||||
TIMEOUT=""
|
||||
shift
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--event) EVENT="$2"; shift 2 ;;
|
||||
--command) COMMAND="$2"; shift 2 ;;
|
||||
--source) SOURCE="$2"; shift 2 ;;
|
||||
--matcher) MATCHER="$2"; shift 2 ;;
|
||||
--timeout) TIMEOUT="$2"; shift 2 ;;
|
||||
*) echo "unknown flag: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
if [ -z "$EVENT" ] || [ -z "$COMMAND" ] || [ -z "$SOURCE" ]; then
|
||||
echo "add-event/diff-event require --event, --command, --source" >&2
|
||||
exit 1
|
||||
fi
|
||||
case "$EVENT" in
|
||||
SessionStart|PreToolUse|PostToolUse|UserPromptSubmit|Stop|Notification) ;;
|
||||
*) echo "invalid --event '$EVENT'; must be one of SessionStart|PreToolUse|PostToolUse|UserPromptSubmit|Stop|Notification" >&2; exit 1 ;;
|
||||
esac
|
||||
if [ "$ACTION" = "add-event" ]; then
|
||||
backup_settings
|
||||
fi
|
||||
DIFF_ONLY=""
|
||||
if [ "$ACTION" = "diff-event" ]; then DIFF_ONLY=1; fi
|
||||
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" \
|
||||
GSTACK_EVENT="$EVENT" \
|
||||
GSTACK_COMMAND="$COMMAND" \
|
||||
GSTACK_SOURCE="$SOURCE" \
|
||||
GSTACK_MATCHER="$MATCHER" \
|
||||
GSTACK_TIMEOUT="$TIMEOUT" \
|
||||
GSTACK_DIFF_ONLY="$DIFF_ONLY" \
|
||||
bun -e '
|
||||
const fs = require("fs");
|
||||
const settingsPath = process.env.GSTACK_SETTINGS_PATH;
|
||||
const event = process.env.GSTACK_EVENT;
|
||||
const cmd = process.env.GSTACK_COMMAND;
|
||||
const source = process.env.GSTACK_SOURCE;
|
||||
const matcher = process.env.GSTACK_MATCHER || "";
|
||||
const timeoutRaw = process.env.GSTACK_TIMEOUT || "";
|
||||
const diffOnly = process.env.GSTACK_DIFF_ONLY === "1";
|
||||
|
||||
let settings = {};
|
||||
try { settings = JSON.parse(fs.readFileSync(settingsPath, "utf8")); } catch {}
|
||||
|
||||
const before = JSON.stringify(settings, null, 2);
|
||||
|
||||
if (!settings.hooks) settings.hooks = {};
|
||||
if (!settings.hooks[event]) settings.hooks[event] = [];
|
||||
|
||||
const matchesEntry = (entry) => {
|
||||
const sameMatcher = (entry.matcher || "") === matcher;
|
||||
const sameSource = entry._gstack_source === source;
|
||||
return sameMatcher && sameSource;
|
||||
};
|
||||
|
||||
let existing = settings.hooks[event].find(matchesEntry);
|
||||
const hookEntry = { type: "command", command: cmd };
|
||||
if (timeoutRaw) {
|
||||
const n = Number(timeoutRaw);
|
||||
if (Number.isFinite(n) && n > 0) hookEntry.timeout = n;
|
||||
}
|
||||
|
||||
if (existing) {
|
||||
existing.hooks = [hookEntry];
|
||||
} else {
|
||||
const newEntry = { _gstack_source: source, hooks: [hookEntry] };
|
||||
if (matcher) newEntry.matcher = matcher;
|
||||
settings.hooks[event].push(newEntry);
|
||||
}
|
||||
|
||||
const after = JSON.stringify(settings, null, 2);
|
||||
|
||||
if (diffOnly) {
|
||||
console.log("--- BEFORE");
|
||||
console.log(before);
|
||||
console.log("--- AFTER");
|
||||
console.log(after);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const tmp = settingsPath + ".tmp";
|
||||
fs.writeFileSync(tmp, after + "\n");
|
||||
fs.renameSync(tmp, settingsPath);
|
||||
console.log("OK: " + event + " hook registered (source: " + source + ")");
|
||||
'
|
||||
;;
|
||||
|
||||
remove-source)
|
||||
SOURCE=""
|
||||
shift
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--source) SOURCE="$2"; shift 2 ;;
|
||||
*) echo "unknown flag: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
if [ -z "$SOURCE" ]; then
|
||||
echo "remove-source requires --source <tag>" >&2
|
||||
exit 1
|
||||
fi
|
||||
[ -f "$SETTINGS_FILE" ] || exit 0
|
||||
backup_settings
|
||||
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" GSTACK_SOURCE="$SOURCE" bun -e '
|
||||
const fs = require("fs");
|
||||
const settingsPath = process.env.GSTACK_SETTINGS_PATH;
|
||||
const source = process.env.GSTACK_SOURCE;
|
||||
let settings = {};
|
||||
try { settings = JSON.parse(fs.readFileSync(settingsPath, "utf8")); } catch { process.exit(0); }
|
||||
if (!settings.hooks) { process.exit(0); }
|
||||
let removed = 0;
|
||||
for (const event of Object.keys(settings.hooks)) {
|
||||
const before = settings.hooks[event].length;
|
||||
settings.hooks[event] = settings.hooks[event].filter(entry => entry._gstack_source !== source);
|
||||
removed += before - settings.hooks[event].length;
|
||||
if (settings.hooks[event].length === 0) delete settings.hooks[event];
|
||||
}
|
||||
if (Object.keys(settings.hooks).length === 0) delete settings.hooks;
|
||||
const tmp = settingsPath + ".tmp";
|
||||
fs.writeFileSync(tmp, JSON.stringify(settings, null, 2) + "\n");
|
||||
fs.renameSync(tmp, settingsPath);
|
||||
console.log("OK: removed " + removed + " hook entry/entries tagged source=" + source);
|
||||
'
|
||||
;;
|
||||
|
||||
rollback)
|
||||
if [ ! -f "$SETTINGS_FILE.bak-latest" ]; then
|
||||
echo "rollback: no backup pointer at $SETTINGS_FILE.bak-latest" >&2
|
||||
exit 1
|
||||
fi
|
||||
LATEST=$(cat "$SETTINGS_FILE.bak-latest")
|
||||
if [ ! -f "$LATEST" ]; then
|
||||
echo "rollback: pointer references missing backup $LATEST" >&2
|
||||
exit 1
|
||||
fi
|
||||
cp "$LATEST" "$SETTINGS_FILE"
|
||||
echo "OK: restored $SETTINGS_FILE from $LATEST"
|
||||
;;
|
||||
|
||||
list-sources)
|
||||
[ -f "$SETTINGS_FILE" ] || { echo "(no settings file)"; exit 0; }
|
||||
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" bun -e '
|
||||
const fs = require("fs");
|
||||
let settings = {};
|
||||
try { settings = JSON.parse(fs.readFileSync(process.env.GSTACK_SETTINGS_PATH, "utf8")); } catch { process.exit(0); }
|
||||
const hooks = settings.hooks || {};
|
||||
let any = false;
|
||||
for (const event of Object.keys(hooks)) {
|
||||
for (const entry of hooks[event]) {
|
||||
if (entry._gstack_source) {
|
||||
any = true;
|
||||
console.log(event + "\t" + entry._gstack_source + "\t" + (entry.matcher || "(no matcher)"));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!any) console.log("(no gstack-tagged hooks)");
|
||||
'
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "Unknown action: $ACTION" >&2
|
||||
echo "Unknown action: $ACTION (expected add or remove)" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
|
|
|||
|
|
@ -64,14 +64,6 @@ fi
|
|||
# 4. Fallback to basename only when there is no usable override, repo, or cache.
|
||||
SLUG="${SLUG:-$(sanitize_slug "$(basename "$PROJECT_DIR")")}"
|
||||
|
||||
# 4b. Unconditional final sanitize before the value is echoed into `eval`/`source`
|
||||
# output or written to cache. Every source above (override, remote, basename,
|
||||
# and the cache read at step 3) already runs sanitize_slug, but filtering here
|
||||
# too keeps the [a-zA-Z0-9._-] invariant promised in the header on every path —
|
||||
# preserving the defense against a poisoned ~/.gstack/slug-cache/<key> injecting
|
||||
# shell into `eval "$(gstack-slug)"` — and heals such a cache on the next write.
|
||||
SLUG=$(sanitize_slug "${SLUG:-}")
|
||||
|
||||
# 5. Cache the slug for future sessions (atomic write, fail silently)
|
||||
if [[ -n "$SLUG" ]]; then
|
||||
mkdir -p "$CACHE_DIR" 2>/dev/null || true
|
||||
|
|
|
|||
|
|
@ -107,13 +107,7 @@ BATCH="$BATCH]"
|
|||
[ "$COUNT" -eq 0 ] && exit 0
|
||||
|
||||
# ─── POST to edge function ───────────────────────────────────
|
||||
# Create response file atomically. If mktemp fails, refuse to continue rather
|
||||
# than fall back to a predictable $$-based path (race + overwrite footgun).
|
||||
RESP_FILE="$(mktemp "${TMPDIR:-/tmp}/gstack-sync-XXXXXX")" || {
|
||||
echo "gstack-telemetry-sync: mktemp failed — skipping this run" >&2
|
||||
exit 0
|
||||
}
|
||||
trap 'rm -f "$RESP_FILE"' EXIT
|
||||
RESP_FILE="$(mktemp /tmp/gstack-sync-XXXXXX 2>/dev/null || echo "/tmp/gstack-sync-$$")"
|
||||
HTTP_CODE="$(curl -s -w '%{http_code}' --max-time 10 \
|
||||
-X POST "${SUPABASE_URL}/functions/v1/telemetry-ingest" \
|
||||
-H "Content-Type: application/json" \
|
||||
|
|
|
|||
|
|
@ -29,13 +29,11 @@ if [ ! -f "$TIMELINE_FILE" ]; then
|
|||
exit 0
|
||||
fi
|
||||
|
||||
cat "$TIMELINE_FILE" 2>/dev/null | GSTACK_TIMELINE_SINCE="$SINCE" GSTACK_TIMELINE_BRANCH="$BRANCH" GSTACK_TIMELINE_LIMIT="$LIMIT" bun -e "
|
||||
cat "$TIMELINE_FILE" 2>/dev/null | bun -e "
|
||||
const lines = (await Bun.stdin.text()).trim().split('\n').filter(Boolean);
|
||||
const since = process.env.GSTACK_TIMELINE_SINCE || '';
|
||||
const branch = process.env.GSTACK_TIMELINE_BRANCH || '';
|
||||
const limitRaw = process.env.GSTACK_TIMELINE_LIMIT || '20';
|
||||
const parsedLimit = Number.parseInt(limitRaw, 10);
|
||||
const limit = Number.isSafeInteger(parsedLimit) && parsedLimit > 0 ? parsedLimit : 20;
|
||||
const since = '${SINCE}';
|
||||
const branch = '${BRANCH}';
|
||||
const limit = ${LIMIT};
|
||||
|
||||
let sinceMs = 0;
|
||||
if (since) {
|
||||
|
|
|
|||
|
|
@ -232,10 +232,6 @@ SETTINGS_HOOK="$(dirname "$0")/gstack-settings-hook"
|
|||
SESSION_UPDATE="$(dirname "$0")/gstack-session-update"
|
||||
if [ -x "$SETTINGS_HOOK" ]; then
|
||||
"$SETTINGS_HOOK" remove "$SESSION_UPDATE" 2>/dev/null && REMOVED+=("SessionStart hook") || true
|
||||
# Cathedral T8 cleanup: also remove plan-tune PreToolUse + PostToolUse hooks.
|
||||
if "$SETTINGS_HOOK" remove-source --source plan-tune-cathedral 2>/dev/null | grep -q "removed [1-9]"; then
|
||||
REMOVED+=("plan-tune cathedral hooks")
|
||||
fi
|
||||
fi
|
||||
|
||||
# ─── Remove global state ────────────────────────────────────
|
||||
|
|
|
|||
|
|
@ -1,212 +0,0 @@
|
|||
#!/usr/bin/env bun
|
||||
// gstack-version-bump — deterministic version-state classifier + writer for /ship.
|
||||
//
|
||||
// Extracted from ship Step 12 prose (v2 plan T9, hybrid CLI extraction). The
|
||||
// idempotency classification and the dual-write to VERSION + package.json are
|
||||
// pure deterministic logic; running them as tested code removes the single
|
||||
// worst /ship footgun — re-bumping an already-shipped branch — from prose the
|
||||
// agent could skip or misread when the step lives in a lazy-loaded section.
|
||||
//
|
||||
// What STAYS agent judgment (NOT here): the bump-LEVEL decision (micro/patch vs
|
||||
// minor/major, which may AskUserQuestion on feature signals) and the queue
|
||||
// collision prompt. The slot pick itself is bin/gstack-next-version. This CLI
|
||||
// only answers "what state am I in?" and "write this exact version".
|
||||
//
|
||||
// Subcommands:
|
||||
// classify --base <branch> [--version-path <p>]
|
||||
// Compares VERSION vs origin/<base>:VERSION vs package.json.version.
|
||||
// Emits JSON: { state, baseVersion, currentVersion, pkgVersion, pkgExists }
|
||||
// state ∈ FRESH | ALREADY_BUMPED | DRIFT_STALE_PKG | DRIFT_UNEXPECTED
|
||||
// Exit 0 on a decidable state (incl. DRIFT_UNEXPECTED — it's a real state
|
||||
// the caller must handle), exit 2 on bad args / unresolvable base.
|
||||
//
|
||||
// write --version <X.Y.Z.W> [--version-path <p>]
|
||||
// Validates the 4-digit pattern, writes VERSION + package.json.version.
|
||||
// Use for the FRESH bump (or an approved queue rebump). Exit 3 on a
|
||||
// half-write (VERSION written, package.json failed) so the caller knows
|
||||
// drift exists; the next classify() will report DRIFT_STALE_PKG.
|
||||
//
|
||||
// repair [--version-path <p>]
|
||||
// DRIFT_STALE_PKG path: sync package.json.version to the current VERSION
|
||||
// file. No bump. Validates the VERSION pattern first.
|
||||
//
|
||||
// Contract: classify NEVER writes. write/repair mutate VERSION + package.json
|
||||
// only. No git mutation, no network. Mirrors gstack-next-version's reader/writer
|
||||
// split so /ship composes them.
|
||||
|
||||
import { existsSync, readFileSync, writeFileSync } from "node:fs";
|
||||
import { execFileSync } from "node:child_process";
|
||||
import { join } from "node:path";
|
||||
|
||||
const VERSION_RE = /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/;
|
||||
const DEFAULT = "0.0.0.0";
|
||||
|
||||
type State = "FRESH" | "ALREADY_BUMPED" | "DRIFT_STALE_PKG" | "DRIFT_UNEXPECTED";
|
||||
|
||||
function fail(msg: string, code = 2): never {
|
||||
process.stderr.write(`gstack-version-bump: ${msg}\n`);
|
||||
process.exit(code);
|
||||
}
|
||||
|
||||
function argVal(args: string[], flag: string): string | undefined {
|
||||
const i = args.indexOf(flag);
|
||||
return i >= 0 && i + 1 < args.length ? args[i + 1] : undefined;
|
||||
}
|
||||
|
||||
/** Resolve the VERSION file path: --version-path, else .gstack/version-path, else "VERSION". */
|
||||
function resolveVersionPath(cwd: string, explicit?: string): string {
|
||||
if (explicit) return join(cwd, explicit);
|
||||
const pin = join(cwd, ".gstack", "version-path");
|
||||
if (existsSync(pin)) {
|
||||
const p = readFileSync(pin, "utf-8").trim();
|
||||
if (p) return join(cwd, p);
|
||||
}
|
||||
return join(cwd, "VERSION");
|
||||
}
|
||||
|
||||
function readVersionFile(p: string): string {
|
||||
try {
|
||||
const v = readFileSync(p, "utf-8").replace(/[\r\n\s]/g, "");
|
||||
return v || DEFAULT;
|
||||
} catch {
|
||||
return DEFAULT;
|
||||
}
|
||||
}
|
||||
|
||||
/** package.json version + existence, parsed without spawning node. */
|
||||
function readPkgVersion(cwd: string): { exists: boolean; version: string } {
|
||||
const pkgPath = join(cwd, "package.json");
|
||||
if (!existsSync(pkgPath)) return { exists: false, version: "" };
|
||||
let raw: string;
|
||||
try {
|
||||
raw = readFileSync(pkgPath, "utf-8");
|
||||
} catch {
|
||||
return { exists: true, version: "" };
|
||||
}
|
||||
let parsed: unknown;
|
||||
try {
|
||||
parsed = JSON.parse(raw);
|
||||
} catch {
|
||||
fail("package.json is not valid JSON. Fix the file before re-running /ship.", 2);
|
||||
}
|
||||
const version = (parsed as { version?: unknown })?.version;
|
||||
return { exists: true, version: typeof version === "string" ? version : "" };
|
||||
}
|
||||
|
||||
function writePkgVersion(cwd: string, version: string): void {
|
||||
const pkgPath = join(cwd, "package.json");
|
||||
const raw = readFileSync(pkgPath, "utf-8");
|
||||
const parsed = JSON.parse(raw) as Record<string, unknown>;
|
||||
parsed.version = version;
|
||||
writeFileSync(pkgPath, JSON.stringify(parsed, null, 2) + "\n");
|
||||
}
|
||||
|
||||
function baseVersion(cwd: string, base: string, versionRel: string): string {
|
||||
// Verify the base ref resolves, mirroring the Step 12 guard.
|
||||
try {
|
||||
execFileSync("git", ["rev-parse", "--verify", `origin/${base}`], { cwd, stdio: "ignore" });
|
||||
} catch {
|
||||
fail(`Unable to resolve origin/${base}. Run 'git fetch origin' or verify the base branch exists.`, 2);
|
||||
}
|
||||
try {
|
||||
const out = execFileSync("git", ["show", `origin/${base}:${versionRel}`], { cwd }).toString();
|
||||
const v = out.replace(/[\r\n\s]/g, "");
|
||||
return v || DEFAULT;
|
||||
} catch {
|
||||
// VERSION absent on base (new repo / new file) → treat as 0.0.0.0.
|
||||
return DEFAULT;
|
||||
}
|
||||
}
|
||||
|
||||
function classifyState(current: string, base: string, pkgExists: boolean, pkgVersion: string): State {
|
||||
if (current === base) {
|
||||
// VERSION unchanged vs base. A diverging package.json means someone hand-edited
|
||||
// package.json bypassing /ship — unsafe to guess which is authoritative.
|
||||
if (pkgExists && pkgVersion && pkgVersion !== current) return "DRIFT_UNEXPECTED";
|
||||
return "FRESH";
|
||||
}
|
||||
// VERSION already moved past base.
|
||||
if (pkgExists && pkgVersion && pkgVersion !== current) return "DRIFT_STALE_PKG";
|
||||
return "ALREADY_BUMPED";
|
||||
}
|
||||
|
||||
function cmdClassify(args: string[], cwd: string): void {
|
||||
const base = argVal(args, "--base");
|
||||
if (!base) fail("classify requires --base <branch>", 2);
|
||||
const versionPath = resolveVersionPath(cwd, argVal(args, "--version-path"));
|
||||
const versionRel = argVal(args, "--version-path") ?? "VERSION";
|
||||
const current = readVersionFile(versionPath);
|
||||
const baseV = baseVersion(cwd, base!, versionRel);
|
||||
const pkg = readPkgVersion(cwd);
|
||||
const state = classifyState(current, baseV, pkg.exists, pkg.version);
|
||||
process.stdout.write(
|
||||
JSON.stringify({
|
||||
state,
|
||||
baseVersion: baseV,
|
||||
currentVersion: current,
|
||||
pkgVersion: pkg.version || null,
|
||||
pkgExists: pkg.exists,
|
||||
}) + "\n",
|
||||
);
|
||||
// DRIFT_UNEXPECTED is a real, decidable state — the caller stops on it, but the
|
||||
// classification itself succeeded, so exit 0. (Bad args / unresolvable base are
|
||||
// the only exit-2 cases.)
|
||||
}
|
||||
|
||||
function cmdWrite(args: string[], cwd: string): void {
|
||||
const version = argVal(args, "--version");
|
||||
if (!version) fail("write requires --version <X.Y.Z.W>", 2);
|
||||
if (!VERSION_RE.test(version!)) {
|
||||
fail(`NEW_VERSION (${version}) does not match MAJOR.MINOR.PATCH.MICRO. Aborting.`, 2);
|
||||
}
|
||||
const versionPath = resolveVersionPath(cwd, argVal(args, "--version-path"));
|
||||
writeFileSync(versionPath, version + "\n");
|
||||
if (existsSync(join(cwd, "package.json"))) {
|
||||
try {
|
||||
writePkgVersion(cwd, version!);
|
||||
} catch {
|
||||
fail(
|
||||
"failed to update package.json. VERSION was written but package.json is now stale. " +
|
||||
"Re-run — classify will report DRIFT_STALE_PKG and repair will sync it.",
|
||||
3,
|
||||
);
|
||||
}
|
||||
}
|
||||
process.stdout.write(JSON.stringify({ wrote: version, packageJson: existsSync(join(cwd, "package.json")) }) + "\n");
|
||||
}
|
||||
|
||||
function cmdRepair(args: string[], cwd: string): void {
|
||||
const versionPath = resolveVersionPath(cwd, argVal(args, "--version-path"));
|
||||
const current = readVersionFile(versionPath);
|
||||
if (!VERSION_RE.test(current)) {
|
||||
fail(
|
||||
`VERSION file contents (${current}) do not match MAJOR.MINOR.PATCH.MICRO. ` +
|
||||
"Refusing to propagate invalid semver into package.json. Fix VERSION, then re-run /ship.",
|
||||
2,
|
||||
);
|
||||
}
|
||||
if (!existsSync(join(cwd, "package.json"))) {
|
||||
fail("repair: no package.json to sync.", 2);
|
||||
}
|
||||
try {
|
||||
writePkgVersion(cwd, current);
|
||||
} catch {
|
||||
fail("drift repair failed — could not update package.json.", 3);
|
||||
}
|
||||
process.stdout.write(JSON.stringify({ repaired: current }) + "\n");
|
||||
}
|
||||
|
||||
// Exported for unit tests (pure logic, no I/O).
|
||||
export { classifyState, VERSION_RE, type State };
|
||||
|
||||
if (import.meta.main) {
|
||||
const [sub, ...rest] = process.argv.slice(2);
|
||||
const cwd = process.cwd();
|
||||
switch (sub) {
|
||||
case "classify": cmdClassify(rest, cwd); break;
|
||||
case "write": cmdWrite(rest, cwd); break;
|
||||
case "repair": cmdRepair(rest, cwd); break;
|
||||
default:
|
||||
fail("usage: gstack-version-bump <classify|write|repair> [flags]", 2);
|
||||
}
|
||||
}
|
||||
|
|
@ -2,7 +2,13 @@
|
|||
name: browse
|
||||
preamble-tier: 1
|
||||
version: 1.1.0
|
||||
description: Fast headless browser for QA testing and site dogfooding. (gstack)
|
||||
description: |
|
||||
Fast headless browser for QA testing and site dogfooding. Navigate any URL, interact with
|
||||
elements, verify page state, diff before/after actions, take annotated screenshots, check
|
||||
responsive layouts, test forms and uploads, handle dialogs, and assert element states.
|
||||
~100ms per command. Use when you need to test a feature, verify a deployment, dogfood a
|
||||
user flow, or file a bug with evidence. Use when asked to "open in browser", "test the
|
||||
site", "take a screenshot", or "dogfood this". (gstack)
|
||||
triggers:
|
||||
- browse a page
|
||||
- headless browser
|
||||
|
|
@ -16,16 +22,6 @@ allowed-tools:
|
|||
<!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly -->
|
||||
<!-- Regenerate: bun run gen:skill-docs -->
|
||||
|
||||
|
||||
## When to invoke this skill
|
||||
|
||||
Navigate any URL, interact with
|
||||
elements, verify page state, diff before/after actions, take annotated screenshots, check
|
||||
responsive layouts, test forms and uploads, handle dialogs, and assert element states.
|
||||
~100ms per command. Use when you need to test a feature, verify a deployment, dogfood a
|
||||
user flow, or file a bug with evidence. Use when asked to "open in browser", "test the
|
||||
site", "take a screenshot", or "dogfood this".
|
||||
|
||||
## Preamble (run first)
|
||||
|
||||
```bash
|
||||
|
|
@ -61,7 +57,7 @@ _QUESTION_TUNING=$(~/.claude/skills/gstack/bin/gstack-config get question_tuning
|
|||
echo "QUESTION_TUNING: $_QUESTION_TUNING"
|
||||
mkdir -p ~/.gstack/analytics
|
||||
if [ "$_TEL" != "off" ]; then
|
||||
echo '{"skill":"browse","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(_repo=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null | tr -cd 'a-zA-Z0-9._-'); echo "${_repo:-unknown}")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
|
||||
echo '{"skill":"browse","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
|
||||
fi
|
||||
for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do
|
||||
if [ -f "$_PF" ]; then
|
||||
|
|
@ -103,19 +99,6 @@ _CHECKPOINT_MODE=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_mode
|
|||
_CHECKPOINT_PUSH=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_push 2>/dev/null || echo "false")
|
||||
echo "CHECKPOINT_MODE: $_CHECKPOINT_MODE"
|
||||
echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH"
|
||||
# Plan-mode hint for skills like /spec that branch behavior on plan-mode state.
|
||||
# Claude Code exposes plan mode via system reminders; we detect best-effort
|
||||
# from CLAUDE_PLAN_FILE (set by the harness when plan mode is active) and
|
||||
# fall back to "inactive". Codex hosts and Claude execution mode both end up
|
||||
# inactive, which is the safe default (defaults to file+execute pipeline).
|
||||
if [ -n "${CLAUDE_PLAN_FILE:-}${GSTACK_PLAN_MODE_FORCE:-}" ]; then
|
||||
export GSTACK_PLAN_MODE="active"
|
||||
elif [ "${GSTACK_PLAN_MODE:-}" = "active" ]; then
|
||||
export GSTACK_PLAN_MODE="active"
|
||||
else
|
||||
export GSTACK_PLAN_MODE="inactive"
|
||||
fi
|
||||
echo "GSTACK_PLAN_MODE: $GSTACK_PLAN_MODE"
|
||||
[ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true
|
||||
```
|
||||
|
||||
|
|
@ -171,7 +154,7 @@ Only run `open` if yes. Always run `touch`.
|
|||
|
||||
If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: ask telemetry once via AskUserQuestion:
|
||||
|
||||
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code or file paths. Your repo name is recorded locally only and stripped before any upload.
|
||||
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code, file paths, or repo names.
|
||||
|
||||
Options:
|
||||
- A) Help gstack get better! (recommended)
|
||||
|
|
@ -247,7 +230,6 @@ Key routing rules:
|
|||
- Ship/deploy/PR → invoke /ship or /land-and-deploy
|
||||
- Save progress → invoke /context-save
|
||||
- Resume context → invoke /context-restore
|
||||
- Author a backlog-ready spec/issue → invoke /spec
|
||||
```
|
||||
|
||||
Then commit the change: `git add CLAUDE.md && git commit -m "chore: add gstack skill routing rules to CLAUDE.md"`
|
||||
|
|
@ -921,7 +903,6 @@ $B prettyscreenshot --cleanup --scroll-to ".pricing" --width 1440 ~/Desktop/hero
|
|||
| `disconnect` | Disconnect headed browser, return to headless mode |
|
||||
| `focus [@ref]` | Bring headed browser window to foreground (macOS) |
|
||||
| `handoff [message]` | Open visible Chrome at current page for user takeover |
|
||||
| `memory [--json]` | Snapshot Bun heap + per-tab JS heap + Chromium process tree + bounded buffer sizes. JSON output with --json. |
|
||||
| `restart` | Restart server |
|
||||
| `resume` | Re-snapshot after user takeover, return control to AI |
|
||||
| `state save|load <name>` | Save/load browser state (cookies + URLs) |
|
||||
|
|
|
|||
|
|
@ -18,12 +18,9 @@
|
|||
import { chromium, type Browser, type BrowserContext, type BrowserContextOptions, type Page, type Locator, type Cookie } from 'playwright';
|
||||
import { writeSecureFile, mkdirSecure } from './file-permissions';
|
||||
import { addConsoleEntry, addNetworkEntry, addDialogEntry, networkBuffer, type DialogEntry } from './buffers';
|
||||
import { emitActivity } from './activity';
|
||||
import { validateNavigationUrl } from './url-validation';
|
||||
import { TabSession, type RefEntry } from './tab-session';
|
||||
import { resolveChromiumProfile, cleanSingletonLocks } from './config';
|
||||
import { withCdpSession } from './cdp-bridge';
|
||||
import type { MemorySnapshot, MemoryStructureStats, MemoryTabSnapshot, MemoryProcess } from './memory-snapshot';
|
||||
|
||||
/**
|
||||
* Detect whether GSTACK_CHROMIUM_PATH points at a custom Chromium build that
|
||||
|
|
@ -43,83 +40,6 @@ export function isCustomChromium(): boolean {
|
|||
return p.includes('GBrowser') || p.includes('gbrowser');
|
||||
}
|
||||
|
||||
/**
|
||||
* Decide whether Playwright should request Chromium's sandbox.
|
||||
*
|
||||
* Returns false on Windows (Bun→Node→Chromium chain breaks the sandbox,
|
||||
* GitHub #276) and on Linux under root / CI / container (sandbox needs
|
||||
* unprivileged user namespaces, which are missing for root and typically
|
||||
* disabled in containers).
|
||||
*
|
||||
* When false, Playwright auto-adds --no-sandbox to the launch args — the
|
||||
* desired behavior in those environments. When true, Playwright does NOT
|
||||
* add --no-sandbox, which keeps Chromium's "unsupported command-line flag"
|
||||
* yellow infobar from appearing on every headed launch.
|
||||
*
|
||||
* The headless launch path also pushes an explicit '--no-sandbox' into args
|
||||
* when CI/CONTAINER/root is set; that push is now defensively redundant
|
||||
* (Playwright will add it anyway when this returns false) and harmless.
|
||||
*/
|
||||
export function shouldEnableChromiumSandbox(): boolean {
|
||||
if (process.platform === 'win32') return false;
|
||||
// Explicit user override for Ubuntu/AppArmor and similar environments where
|
||||
// unprivileged Chromium sandboxing is blocked even for normal users (the
|
||||
// sandbox needs unprivileged user namespaces that the host policy denies,
|
||||
// so /qa hangs without --no-sandbox). Setting GSTACK_CHROMIUM_NO_SANDBOX=1
|
||||
// forces the sandbox off without changing the default for everyone else.
|
||||
// See #1562.
|
||||
if (process.env.GSTACK_CHROMIUM_NO_SANDBOX === '1') return false;
|
||||
const isRoot = typeof process.getuid === 'function' && process.getuid() === 0;
|
||||
return !(process.env.CI || process.env.CONTAINER || isRoot);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve why the underlying Chromium ChildProcess is going away.
|
||||
*
|
||||
* The 'disconnected' Playwright event fires before the child process emits
|
||||
* its own 'exit' in most cases, so .exitCode is null at that moment. Wait
|
||||
* briefly (capped at 1s) for the exit then read .exitCode + .signalCode:
|
||||
*
|
||||
* exitCode === 0 && no signal → 'clean' (user Cmd+Q, normal shutdown)
|
||||
* anything else → 'crash' (signal-kill, SIGSEGV, OOM, non-zero exit)
|
||||
*
|
||||
* Process supervisors (gbrowser's gbd HealthMonitor in cmd/gbd/health.go)
|
||||
* read our exit code to decide whether to restart. The two callers in this
|
||||
* file ride on top of this: a 'clean' result exits with code 0 (gbd skips
|
||||
* restart, treats as user-intent); a 'crash' result keeps the existing
|
||||
* per-path exit semantics (launch→1, launchHeaded→2, handoff→1) and gbd
|
||||
* restarts on backoff.
|
||||
*/
|
||||
export async function resolveDisconnectCause(browser: Browser | null): Promise<'clean' | 'crash'> {
|
||||
const proc = browser?.process();
|
||||
if (proc && proc.exitCode === null && proc.signalCode === null) {
|
||||
await new Promise<void>((resolve) => {
|
||||
const timer = setTimeout(resolve, 1000);
|
||||
proc.once('exit', () => {
|
||||
clearTimeout(timer);
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
return proc?.exitCode === 0 && proc?.signalCode == null ? 'clean' : 'crash';
|
||||
}
|
||||
|
||||
/**
|
||||
* Headless `launch()` disconnect handler. Exits 0 on clean user-quit, 1 on
|
||||
* crash. Inlined into the launch() body via a one-line dispatch so
|
||||
* browser-manager's flow stays grep-friendly.
|
||||
*/
|
||||
export async function handleChromiumDisconnect(browser: Browser | null): Promise<void> {
|
||||
const cause = await resolveDisconnectCause(browser);
|
||||
if (cause === 'clean') {
|
||||
console.error('[browse] Chromium closed cleanly (user-initiated quit). Server exiting (0).');
|
||||
process.exit(0);
|
||||
}
|
||||
console.error('[browse] FATAL: Chromium process crashed or was killed. Server exiting (1).');
|
||||
console.error('[browse] Console/network logs flushed to .gstack/browse-*.log');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
export type { RefEntry };
|
||||
|
||||
// Re-export TabSession for consumers
|
||||
|
|
@ -197,60 +117,11 @@ export class BrowserManager {
|
|||
private connectionMode: 'launched' | 'headed' = 'launched';
|
||||
private intentionalDisconnect = false;
|
||||
|
||||
// ─── Tab Count Guardrail (D5 + Codex single-tab flag) ───────
|
||||
// Idempotent threshold trackers: each guardrail fires exactly once per
|
||||
// upward crossing of its threshold and re-arms when the tab count drops
|
||||
// back below. Pre-guardrail, nothing tracked tab count growth and a
|
||||
// user could accumulate hundreds of tabs (each holding 50–300 MB of
|
||||
// Chromium-side RSS) without warning until the OS OOM-killer fired.
|
||||
// The toast UX lives in the sidebar (extension/sidepanel.js); the
|
||||
// server-side responsibility is the audit-trail activity entry that
|
||||
// appears in the activity feed even when the sidebar is closed.
|
||||
private static readonly TAB_GUARDRAIL_SOFT = 50;
|
||||
private static readonly TAB_GUARDRAIL_HARD = 200;
|
||||
private tabGuardrailSoftHit = false;
|
||||
private tabGuardrailHardHit = false;
|
||||
|
||||
/**
|
||||
* Called from context.on('page') after a new tab is tracked. Emits at
|
||||
* most one activity entry per upward crossing of each threshold.
|
||||
*/
|
||||
private checkTabGuardrails(): void {
|
||||
const total = this.pages.size;
|
||||
if (!this.tabGuardrailSoftHit && total >= BrowserManager.TAB_GUARDRAIL_SOFT) {
|
||||
this.tabGuardrailSoftHit = true;
|
||||
const msg = `Tab count crossed ${BrowserManager.TAB_GUARDRAIL_SOFT} (now ${total}). Consider closing unused tabs — each Chromium tab holds 50–300 MB.`;
|
||||
console.warn(`[browse] ${msg}`);
|
||||
emitActivity({ type: 'error', command: 'tab-guardrail', error: msg, tabs: total });
|
||||
}
|
||||
if (!this.tabGuardrailHardHit && total >= BrowserManager.TAB_GUARDRAIL_HARD) {
|
||||
this.tabGuardrailHardHit = true;
|
||||
const msg = `Tab count crossed ${BrowserManager.TAB_GUARDRAIL_HARD} (now ${total}). OOM risk imminent. Open the sidebar to see top RAM consumers.`;
|
||||
console.error(`[browse] ${msg}`);
|
||||
emitActivity({ type: 'error', command: 'tab-guardrail', error: msg, tabs: total });
|
||||
}
|
||||
}
|
||||
|
||||
/** Called from page.on('close') so the guardrails re-arm. */
|
||||
private recheckTabGuardrailsOnClose(): void {
|
||||
const total = this.pages.size;
|
||||
if (this.tabGuardrailSoftHit && total < BrowserManager.TAB_GUARDRAIL_SOFT) {
|
||||
this.tabGuardrailSoftHit = false;
|
||||
}
|
||||
if (this.tabGuardrailHardHit && total < BrowserManager.TAB_GUARDRAIL_HARD) {
|
||||
this.tabGuardrailHardHit = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Called when the headed browser disconnects without intentional teardown
|
||||
// (user closed the window). Wired up by server.ts to run full cleanup
|
||||
// (sidebar-agent, state file, profile locks) before exiting with code 2.
|
||||
// Returns void or a Promise; rejections are caught and fall back to exit(2).
|
||||
// `exitCode` is the resolved process exit code from the disconnect cause:
|
||||
// 0 on clean user-initiated quit (e.g., Cmd+Q on headed Chromium), 2 on
|
||||
// crash/signal-kill. Callers (server.ts) forward it to their shutdown
|
||||
// pipeline so process supervisors (gbrowser's gbd) read the right signal.
|
||||
public onDisconnect: ((exitCode?: number) => void | Promise<void>) | null = null;
|
||||
public onDisconnect: (() => void | Promise<void>) | null = null;
|
||||
|
||||
getConnectionMode(): 'launched' | 'headed' { return this.connectionMode; }
|
||||
|
||||
|
|
@ -355,16 +226,12 @@ export class BrowserManager {
|
|||
}
|
||||
|
||||
if (extensionsDir) {
|
||||
// Skip --load-extension when running against a custom Chromium build that
|
||||
// already bakes the extension in (e.g., GBrowser / GStack Browser.app).
|
||||
// Loading it twice causes a ServiceWorkerState::SetWorkerId DCHECK crash.
|
||||
if (!isCustomChromium()) {
|
||||
launchArgs.push(
|
||||
`--disable-extensions-except=${extensionsDir}`,
|
||||
`--load-extension=${extensionsDir}`,
|
||||
);
|
||||
}
|
||||
launchArgs.push('--window-position=-9999,-9999', '--window-size=1,1');
|
||||
launchArgs.push(
|
||||
`--disable-extensions-except=${extensionsDir}`,
|
||||
`--load-extension=${extensionsDir}`,
|
||||
'--window-position=-9999,-9999',
|
||||
'--window-size=1,1',
|
||||
);
|
||||
useHeadless = false; // extensions require headed mode; off-screen window simulates headless
|
||||
console.log(`[browse] Extensions loaded from: ${extensionsDir}`);
|
||||
}
|
||||
|
|
@ -373,25 +240,17 @@ export class BrowserManager {
|
|||
headless: useHeadless,
|
||||
// On Windows, Chromium's sandbox fails when the server is spawned through
|
||||
// the Bun→Node process chain (GitHub #276). Disable it — local daemon
|
||||
// browsing user-specified URLs has marginal sandbox benefit. Also disabled
|
||||
// on Linux root/CI/container, where the sandbox requires unprivileged user
|
||||
// namespaces that aren't available.
|
||||
chromiumSandbox: shouldEnableChromiumSandbox(),
|
||||
// browsing user-specified URLs has marginal sandbox benefit.
|
||||
chromiumSandbox: process.platform !== 'win32',
|
||||
...(launchArgs.length > 0 ? { args: launchArgs } : {}),
|
||||
...(this.proxyConfig ? { proxy: this.proxyConfig } : {}),
|
||||
});
|
||||
|
||||
// Chromium disconnect → distinguish clean user-quit from crash. Both
|
||||
// events look identical to Playwright (one 'disconnected' fires), but
|
||||
// the underlying ChildProcess exit code separates them:
|
||||
// exitCode === 0 → clean quit (user Cmd+Q on macOS, normal shutdown)
|
||||
// exitCode !== 0 → crash, signal-kill, or OOM
|
||||
// Process supervisors (gbrowser's gbd) consume our exit code: code 0
|
||||
// means "user wanted this, don't restart"; non-zero means "crash, please
|
||||
// bring me back." Without this distinction every Cmd+Q gets treated as
|
||||
// a crash and the user-visible window keeps respawning.
|
||||
// Chromium crash → exit with clear message
|
||||
this.browser.on('disconnected', () => {
|
||||
void handleChromiumDisconnect(this.browser);
|
||||
console.error('[browse] FATAL: Chromium process crashed or was killed. Server exiting.');
|
||||
console.error('[browse] Console/network logs flushed to .gstack/browse-*.log');
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
const contextOptions: BrowserContextOptions = {
|
||||
|
|
@ -556,10 +415,6 @@ export class BrowserManager {
|
|||
|
||||
this.context = await chromium.launchPersistentContext(userDataDir, {
|
||||
headless: false,
|
||||
// Match the sandbox policy used by launch() above. Without this,
|
||||
// Playwright auto-adds --no-sandbox on every headed launch and the user
|
||||
// sees Chromium's "unsupported command-line flag" yellow infobar.
|
||||
chromiumSandbox: shouldEnableChromiumSandbox(),
|
||||
args: launchArgs,
|
||||
viewport: null, // Use browser's default viewport (real window size)
|
||||
userAgent: this.customUserAgent || customUA,
|
||||
|
|
@ -668,7 +523,6 @@ export class BrowserManager {
|
|||
// Inject indicator on the new tab
|
||||
page.evaluate(indicatorScript).catch(() => {});
|
||||
console.log(`[browse] New tab detected (id=${id}, total=${this.pages.size})`);
|
||||
this.checkTabGuardrails();
|
||||
});
|
||||
|
||||
// Persistent context opens a default page — adopt it instead of creating a new one
|
||||
|
|
@ -688,45 +542,32 @@ export class BrowserManager {
|
|||
await this.newTab();
|
||||
}
|
||||
|
||||
// Browser disconnect handler — distinguish user Cmd+Q from real crash.
|
||||
// Clean exit (Chromium exit code 0) → process.exit(0) so process
|
||||
// supervisors (gbrowser's gbd) treat it as user intent and skip the
|
||||
// restart loop. Crash → process.exit(2) preserves the legacy headed
|
||||
// semantics that's distinct from launch()'s code 1.
|
||||
// Always calls onDisconnect() first to trigger full shutdown (kill
|
||||
// sidebar-agent, save session, clean profile locks + state file) so
|
||||
// crashes don't strand resources either.
|
||||
// Browser disconnect handler — exit code 2 distinguishes from crashes (1).
|
||||
// Calls onDisconnect() to trigger full shutdown (kill sidebar-agent, save
|
||||
// session, clean profile locks + state file) before exit. Falls back to
|
||||
// direct process.exit(2) if no callback is wired up, or if the callback
|
||||
// throws/rejects — never leave the process running with a dead browser.
|
||||
if (this.browser) {
|
||||
this.browser.on('disconnected', () => {
|
||||
if (this.intentionalDisconnect) return;
|
||||
const browserRef = this.browser;
|
||||
void (async () => {
|
||||
const cause = await resolveDisconnectCause(browserRef);
|
||||
const exitCode = cause === 'clean' ? 0 : 2;
|
||||
if (cause === 'clean') {
|
||||
console.error('[browse] Real browser closed cleanly (user-initiated quit). Server exiting (0).');
|
||||
} else {
|
||||
console.error('[browse] Real browser disconnected (crash or kill). Server exiting (2).');
|
||||
console.error('[browse] Run `$B connect` to reconnect.');
|
||||
console.error('[browse] Real browser disconnected (user closed or crashed).');
|
||||
console.error('[browse] Run `$B connect` to reconnect.');
|
||||
if (!this.onDisconnect) {
|
||||
process.exit(2);
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const result = this.onDisconnect();
|
||||
if (result && typeof (result as Promise<void>).catch === 'function') {
|
||||
(result as Promise<void>).catch((err) => {
|
||||
console.error('[browse] onDisconnect rejected:', err);
|
||||
process.exit(2);
|
||||
});
|
||||
}
|
||||
if (!this.onDisconnect) {
|
||||
process.exit(exitCode);
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const result = this.onDisconnect(exitCode);
|
||||
if (result && typeof (result as Promise<void>).catch === 'function') {
|
||||
(result as Promise<void>).catch((err) => {
|
||||
console.error('[browse] onDisconnect rejected:', err);
|
||||
process.exit(exitCode);
|
||||
});
|
||||
}
|
||||
// onDisconnect is responsible for exit on the success path.
|
||||
} catch (err) {
|
||||
console.error('[browse] onDisconnect threw:', err);
|
||||
process.exit(exitCode);
|
||||
}
|
||||
})();
|
||||
} catch (err) {
|
||||
console.error('[browse] onDisconnect threw:', err);
|
||||
process.exit(2);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -1053,116 +894,6 @@ export class BrowserManager {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Diagnostic for `$B memory` and the /memory endpoint.
|
||||
*
|
||||
* Collects:
|
||||
* - Bun process memory (cross-platform, accurate, no shelling).
|
||||
* - Per-tab JS heap via CDP Performance.getMetrics — the most portable
|
||||
* per-tab signal CDP exposes. Misses native/GPU/Skia/cache memory
|
||||
* (Codex flag on the eng-review; see follow-up TODO "native/GPU
|
||||
* memory breakdown").
|
||||
* - Chromium process tree via SystemInfo.getProcessInfo — PID + type
|
||||
* + CPU time. Per-process RSS is NOT exposed via CDP and the eng
|
||||
* review (D2 USE_CDP) explicitly chose CDP over shelling to `ps`,
|
||||
* so RSS columns are absent and `notes[]` says why.
|
||||
*
|
||||
* `structures` is passed in by the caller (read-commands / server) so
|
||||
* browser-manager doesn't take a hard dep on every buffer-owning module.
|
||||
*/
|
||||
async getMemorySnapshot(structures: MemoryStructureStats): Promise<MemorySnapshot> {
|
||||
const bunMem = process.memoryUsage();
|
||||
const notes: string[] = [];
|
||||
|
||||
// Per-tab JS heap. Lazy: only the pages we already track. A target
|
||||
// that died mid-snapshot is omitted, never throws.
|
||||
const tabs: MemoryTabSnapshot[] = [];
|
||||
for (const [id, page] of this.pages) {
|
||||
try {
|
||||
const url = (() => { try { return page.url(); } catch { return ''; } })();
|
||||
const title = await page.title().catch(() => '');
|
||||
const metrics = await withCdpSession(page, async (session) => {
|
||||
await session.send('Performance.enable').catch(() => undefined);
|
||||
const result = await session.send('Performance.getMetrics');
|
||||
return ((result as { metrics?: Array<{ name: string; value: number }> }).metrics) ?? [];
|
||||
});
|
||||
const mm: Record<string, number> = {};
|
||||
for (const m of metrics) mm[m.name] = m.value;
|
||||
tabs.push({
|
||||
id,
|
||||
url,
|
||||
title,
|
||||
jsHeapUsed: mm.JSHeapUsedSize ?? 0,
|
||||
jsHeapTotal: mm.JSHeapTotalSize ?? 0,
|
||||
documents: mm.Documents ?? 0,
|
||||
nodes: mm.Nodes ?? 0,
|
||||
listeners: mm.JSEventListeners ?? 0,
|
||||
});
|
||||
} catch {
|
||||
// Target died or CDP unavailable mid-snapshot — skip this tab.
|
||||
}
|
||||
}
|
||||
|
||||
// Chromium process tree. Browser handle may be on the `browser` field
|
||||
// (launched mode) or accessible via `context.browser()` (persistent
|
||||
// context / headed mode); try both.
|
||||
let processes: MemoryProcess[] | null = null;
|
||||
const browser: Browser | null = this.browser ?? (this.context ? this.context.browser() : null);
|
||||
if (browser) {
|
||||
try {
|
||||
// `newBrowserCDPSession` is browser-wide. Not exposed on every
|
||||
// Playwright TypeScript surface, but present at runtime on the
|
||||
// Browser instance — use a typed cast to avoid the @ts-expect-error.
|
||||
type BrowserWithCDP = Browser & {
|
||||
newBrowserCDPSession?: () => Promise<{
|
||||
send: (method: string, params?: unknown) => Promise<unknown>;
|
||||
detach: () => Promise<void>;
|
||||
}>;
|
||||
};
|
||||
const maybeFactory = (browser as BrowserWithCDP).newBrowserCDPSession;
|
||||
if (typeof maybeFactory === 'function') {
|
||||
const browserSession = await maybeFactory.call(browser);
|
||||
try {
|
||||
const info = (await browserSession.send('SystemInfo.getProcessInfo')) as {
|
||||
processInfo?: Array<{ id: number; type: string; cpuTime: number }>;
|
||||
};
|
||||
processes = (info.processInfo ?? []).map((p) => ({
|
||||
id: p.id,
|
||||
type: p.type,
|
||||
cpuTime: p.cpuTime,
|
||||
}));
|
||||
notes.push(
|
||||
'Per-Chromium-process RSS not collected — SystemInfo.getProcessInfo exposes PID+type+CPU only. ' +
|
||||
'See follow-up TODO "native/GPU memory breakdown" for the deferred fix.',
|
||||
);
|
||||
} finally {
|
||||
await browserSession.detach().catch(() => undefined);
|
||||
}
|
||||
} else {
|
||||
notes.push('Playwright build does not expose newBrowserCDPSession; per-process info skipped.');
|
||||
}
|
||||
} catch (err: any) {
|
||||
notes.push(`CDP browser session unavailable: ${err?.message ?? String(err)}`);
|
||||
}
|
||||
} else {
|
||||
notes.push('Browser handle unavailable (server connection mode); per-process info skipped.');
|
||||
}
|
||||
|
||||
return {
|
||||
bunServer: {
|
||||
rss: bunMem.rss,
|
||||
heapUsed: bunMem.heapUsed,
|
||||
heapTotal: bunMem.heapTotal,
|
||||
external: bunMem.external,
|
||||
},
|
||||
tabs,
|
||||
processes,
|
||||
structures,
|
||||
capturedAt: Date.now(),
|
||||
notes,
|
||||
};
|
||||
}
|
||||
|
||||
// ─── Ref Map (delegates to active session) ──────────────────
|
||||
setRefMap(refs: Map<string, RefEntry>) {
|
||||
this.getActiveSession().setRefMap(refs);
|
||||
|
|
@ -1572,10 +1303,6 @@ export class BrowserManager {
|
|||
|
||||
newContext = await chromium.launchPersistentContext(userDataDir, {
|
||||
headless: false,
|
||||
// Match the sandbox policy used by launchHeaded() / launch(). The
|
||||
// handoff path is the headless→headed re-launch and shares the same
|
||||
// anti-detection posture, including no spurious --no-sandbox infobar.
|
||||
chromiumSandbox: shouldEnableChromiumSandbox(),
|
||||
args: launchArgs,
|
||||
viewport: null,
|
||||
...(this.proxyConfig ? { proxy: this.proxyConfig } : {}),
|
||||
|
|
@ -1605,14 +1332,12 @@ export class BrowserManager {
|
|||
await newContext.setExtraHTTPHeaders(this.extraHeaders);
|
||||
}
|
||||
|
||||
// Register disconnect handler on new browser. Same clean-vs-crash
|
||||
// discrimination as launch() / launchHeaded() above so a user-initiated
|
||||
// Cmd+Q after a handoff doesn't trigger gbd's restart loop.
|
||||
// Register crash handler on new browser
|
||||
if (this.browser) {
|
||||
const browserRef = this.browser;
|
||||
this.browser.on('disconnected', () => {
|
||||
if (this.intentionalDisconnect) return;
|
||||
void handleChromiumDisconnect(browserRef);
|
||||
console.error('[browse] FATAL: Chromium process crashed or was killed. Server exiting.');
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -1689,7 +1414,6 @@ export class BrowserManager {
|
|||
break;
|
||||
}
|
||||
}
|
||||
this.recheckTabGuardrailsOnClose();
|
||||
});
|
||||
|
||||
// Clear ref map on navigation — refs point to stale elements after page change
|
||||
|
|
@ -1758,38 +1482,23 @@ export class BrowserManager {
|
|||
}
|
||||
});
|
||||
|
||||
// Capture response sizes via requestfinished — but DO NOT call
|
||||
// response.body() here. Pre-fix, this listener materialized every
|
||||
// response body across CDP just to read .length: multi-GB/hour of
|
||||
// Buffer churn on long-lived headed Chromium with media-heavy
|
||||
// pages, the primary Bun-side accelerant on the gbrowser-OOM
|
||||
// investigation. req.sizes() pulls from the Network.loadingFinished
|
||||
// event Chromium already emits — accurate for chunked transfer,
|
||||
// gzip-compressed responses, and streaming media, all the cases
|
||||
// where the previous Content-Length-header approach would have
|
||||
// missed the size.
|
||||
//
|
||||
// The "single context-level CDP listener" architecture (D10's
|
||||
// stretch goal — would reduce per-page listener count from N to 1
|
||||
// via Target.setAutoAttach) is deferred. TODOS.md tracks it.
|
||||
// Capture response sizes via response finished
|
||||
page.on('requestfinished', async (req) => {
|
||||
try {
|
||||
const sizes = await req.sizes().catch(() => null);
|
||||
if (!sizes) return;
|
||||
const url = req.url();
|
||||
const size = sizes.responseBodySize ?? 0;
|
||||
for (let i = networkBuffer.length - 1; i >= 0; i--) {
|
||||
const entry = networkBuffer.get(i);
|
||||
if (entry && entry.url === url && !entry.size) {
|
||||
networkBuffer.set(i, { ...entry, size });
|
||||
break;
|
||||
const res = await req.response();
|
||||
if (res) {
|
||||
const url = req.url();
|
||||
const body = await res.body().catch(() => null);
|
||||
const size = body ? body.length : 0;
|
||||
for (let i = networkBuffer.length - 1; i >= 0; i--) {
|
||||
const entry = networkBuffer.get(i);
|
||||
if (entry && entry.url === url && !entry.size) {
|
||||
networkBuffer.set(i, { ...entry, size });
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Best-effort: requestfinished fires for aborted/cached requests too,
|
||||
// where sizes() is unavailable. Missing size is acceptable; an
|
||||
// unbounded throw would noise the console for every cache hit.
|
||||
}
|
||||
} catch {}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,84 +25,18 @@ import { logTelemetry } from './telemetry';
|
|||
const CDP_TIMEOUT_MS = 5000;
|
||||
const CDP_ACQUIRE_TIMEOUT_MS = 5000;
|
||||
|
||||
// ─── CDP session lifecycle helpers ─────────────────────────────
|
||||
//
|
||||
// Every direct `newCDPSession(page)` call needs a matching `session.detach()`
|
||||
// to release the Chromium-side CDP target. Forgetting the detach leaves the
|
||||
// target attached until the underlying transport drops (often process exit),
|
||||
// which on a long-lived headed browser shows up as steadily-climbing
|
||||
// browser-process RSS. To make the leak class unforgettable, callers should
|
||||
// go through one of these two helpers and a static-grep test
|
||||
// (browse/test/cdp-session-cleanup.test.ts) fails CI if any source file
|
||||
// calls `newCDPSession(` outside this module.
|
||||
|
||||
/**
|
||||
* Ephemeral CDP session with try/finally detach. Use for one-shot CDP work
|
||||
* where the caller doesn't need session reuse — e.g. archive snapshots,
|
||||
* `$B memory`, a single `Page.captureScreenshot`. The session is detached
|
||||
* in `finally` regardless of whether `fn` threw, so the Chromium target
|
||||
* doesn't leak on the error path.
|
||||
*
|
||||
* For repeated use of the same page (e.g. the `$B cdp` bridge or the
|
||||
* inspector), use `getOrCreateCdpSession` instead — it caches and detaches
|
||||
* on page close.
|
||||
*/
|
||||
export async function withCdpSession<T>(
|
||||
page: Page,
|
||||
fn: (session: any) => Promise<T>,
|
||||
): Promise<T> {
|
||||
const session = await page.context().newCDPSession(page);
|
||||
try {
|
||||
return await fn(session);
|
||||
} finally {
|
||||
try {
|
||||
await session.detach();
|
||||
} catch {
|
||||
// Best-effort cleanup. Session may already be detached (target closed,
|
||||
// context recreated, browser disconnect). Swallowing all errors is the
|
||||
// correct cleanup posture per CLAUDE.md "best-effort cleanup paths".
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cached long-lived CDP session keyed by Page. First call creates the
|
||||
* session and registers a `page.once('close', ...)` hook that removes the
|
||||
* cache entry AND calls `session.detach()`. Pre-helper code only removed
|
||||
* the cache entry, leaving the Chromium-side target attached.
|
||||
*
|
||||
* Pass a caller-owned WeakMap so this helper doesn't impose a single global
|
||||
* cache — the `$B cdp` bridge and the inspector each keep their own session
|
||||
* pool with different invariants (e.g. the inspector also detaches on
|
||||
* `framenavigated` because DOM/CSS domain state is tied to the document).
|
||||
*/
|
||||
export async function getOrCreateCdpSession(
|
||||
page: Page,
|
||||
cache: WeakMap<Page, any>,
|
||||
): Promise<any> {
|
||||
let session = cache.get(page);
|
||||
if (session) return session;
|
||||
session = await page.context().newCDPSession(page);
|
||||
cache.set(page, session);
|
||||
page.once('close', () => {
|
||||
cache.delete(page);
|
||||
session.detach().catch(() => {
|
||||
// Best-effort cleanup — see withCdpSession finally block.
|
||||
});
|
||||
});
|
||||
return session;
|
||||
}
|
||||
|
||||
// ─── $B cdp bridge ─────────────────────────────────────────────
|
||||
|
||||
// Per-page CDPSession cache. Lifecycle delegated to getOrCreateCdpSession
|
||||
// which registers a close hook that BOTH removes the cache entry AND calls
|
||||
// session.detach() — pre-helper code only did the former, leaving the
|
||||
// Chromium-side target attached.
|
||||
// Per-page CDPSession cache. Created lazily on first allow-listed call,
|
||||
// cleaned up when the page closes.
|
||||
const sessionCache: WeakMap<Page, any> = new WeakMap();
|
||||
|
||||
async function getCdpSession(page: Page): Promise<any> {
|
||||
return getOrCreateCdpSession(page, sessionCache);
|
||||
let s = sessionCache.get(page);
|
||||
if (s) return s;
|
||||
s = await page.context().newCDPSession(page);
|
||||
sessionCache.set(page, s);
|
||||
// Clear cache on detach so we don't hold a stale handle.
|
||||
page.once('close', () => sessionCache.delete(page));
|
||||
return s;
|
||||
}
|
||||
|
||||
export interface CdpDispatchInput {
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@
|
|||
*/
|
||||
|
||||
import type { Page } from 'playwright';
|
||||
import { getOrCreateCdpSession } from './cdp-bridge';
|
||||
|
||||
// ─── Types ──────────────────────────────────────────────────────
|
||||
|
||||
|
|
@ -107,23 +106,15 @@ async function getOrCreateSession(page: Page): Promise<any> {
|
|||
}
|
||||
}
|
||||
|
||||
session = await getOrCreateCdpSession(page, cdpSessions);
|
||||
session = await page.context().newCDPSession(page);
|
||||
cdpSessions.set(page, session);
|
||||
|
||||
// Enable DOM and CSS domains on first init for this page. The session
|
||||
// itself is cached + close-detached by getOrCreateCdpSession; the
|
||||
// initializedPages WeakSet is inspector-layer state that needs its
|
||||
// own close hook to stay in sync.
|
||||
if (!initializedPages.has(page)) {
|
||||
await session.send('DOM.enable');
|
||||
await session.send('CSS.enable');
|
||||
initializedPages.add(page);
|
||||
page.once('close', () => initializedPages.delete(page));
|
||||
}
|
||||
// Enable DOM and CSS domains
|
||||
await session.send('DOM.enable');
|
||||
await session.send('CSS.enable');
|
||||
initializedPages.add(page);
|
||||
|
||||
// Auto-detach on navigation — DOM/CSS domain state is tied to the
|
||||
// document. Close-detach (from getOrCreateCdpSession) handles the
|
||||
// tab-close case; framenavigated catches in-tab navigation that
|
||||
// invalidates inspector state without closing the tab.
|
||||
// Auto-detach on navigation
|
||||
page.once('framenavigated', () => {
|
||||
try {
|
||||
session.detach().catch(() => {});
|
||||
|
|
@ -139,41 +130,7 @@ async function getOrCreateSession(page: Page): Promise<any> {
|
|||
|
||||
// ─── Modification History ───────────────────────────────────────
|
||||
|
||||
// Bounded FIFO of style modifications. Pre-cap, this was an unbounded
|
||||
// module-scoped array that grew for every CSS edit made through $B css
|
||||
// across the whole browser session — small per-entry footprint but no
|
||||
// upper bound, the kind of slow leak that compounds over multi-day
|
||||
// inspector use. The cap is 200 because per-session undo workflows
|
||||
// rarely walk back more than a handful of edits, and a user who really
|
||||
// wants to roll a long change back can `$B css reset` to revert all of
|
||||
// them. totalPushed is monotonic across the session so undoModification
|
||||
// can tell the user when their target index has been evicted, instead
|
||||
// of just "no modification at index N".
|
||||
const MOD_HISTORY_CAP = 200;
|
||||
const modificationHistory: StyleModification[] = [];
|
||||
let modHistoryTotalPushed = 0;
|
||||
|
||||
function pushModification(mod: StyleModification): void {
|
||||
modificationHistory.push(mod);
|
||||
modHistoryTotalPushed++;
|
||||
while (modificationHistory.length > MOD_HISTORY_CAP) {
|
||||
modificationHistory.shift();
|
||||
}
|
||||
}
|
||||
|
||||
// Test-only entry: exposes the history-cap mechanics (push, reset, cap value)
|
||||
// without requiring a CDP-driven Page. Production code must go through
|
||||
// modifyStyle / undoModification / resetModifications.
|
||||
export const __testInternals = {
|
||||
pushModification,
|
||||
MOD_HISTORY_CAP,
|
||||
getRawHistory: () => modificationHistory.slice(),
|
||||
getTotalPushed: () => modHistoryTotalPushed,
|
||||
resetForTest: () => {
|
||||
modificationHistory.length = 0;
|
||||
modHistoryTotalPushed = 0;
|
||||
},
|
||||
};
|
||||
|
||||
// ─── Specificity Calculation ────────────────────────────────────
|
||||
|
||||
|
|
@ -602,7 +559,7 @@ export async function modifyStyle(
|
|||
method,
|
||||
};
|
||||
|
||||
pushModification(modification);
|
||||
modificationHistory.push(modification);
|
||||
return modification;
|
||||
}
|
||||
|
||||
|
|
@ -612,12 +569,7 @@ export async function modifyStyle(
|
|||
export async function undoModification(page: Page, index?: number): Promise<void> {
|
||||
const idx = index ?? modificationHistory.length - 1;
|
||||
if (idx < 0 || idx >= modificationHistory.length) {
|
||||
const evictedNote = modHistoryTotalPushed > MOD_HISTORY_CAP
|
||||
? ` (most recent ${MOD_HISTORY_CAP} only — ${modHistoryTotalPushed - MOD_HISTORY_CAP} earlier entries evicted at the cap)`
|
||||
: '';
|
||||
throw new Error(
|
||||
`No modification at index ${idx}. History has ${modificationHistory.length} entries${evictedNote}.`,
|
||||
);
|
||||
throw new Error(`No modification at index ${idx}. History has ${modificationHistory.length} entries.`);
|
||||
}
|
||||
|
||||
const mod = modificationHistory[idx];
|
||||
|
|
@ -670,23 +622,6 @@ export function getModificationHistory(): StyleModification[] {
|
|||
return [...modificationHistory];
|
||||
}
|
||||
|
||||
/**
|
||||
* Diagnostic accessor for the $B memory snapshot. Returns current buffer
|
||||
* occupancy, the cap, and how many entries have been evicted since the
|
||||
* last reset.
|
||||
*/
|
||||
export function getModificationHistoryStats(): {
|
||||
current: number;
|
||||
cap: number;
|
||||
evicted: number;
|
||||
} {
|
||||
return {
|
||||
current: modificationHistory.length,
|
||||
cap: MOD_HISTORY_CAP,
|
||||
evicted: Math.max(0, modHistoryTotalPushed - MOD_HISTORY_CAP),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset all modifications, restoring original values.
|
||||
*/
|
||||
|
|
@ -713,7 +648,6 @@ export async function resetModifications(page: Page): Promise<void> {
|
|||
}
|
||||
}
|
||||
modificationHistory.length = 0;
|
||||
modHistoryTotalPushed = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -11,13 +11,11 @@
|
|||
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { spawn as nodeSpawn } from 'child_process';
|
||||
import { safeUnlink, safeUnlinkQuiet, safeKill, isProcessAlive } from './error-handling';
|
||||
import { writeSecureFile, mkdirSecure } from './file-permissions';
|
||||
import { resolveConfig, ensureStateDir, readVersionHash } from './config';
|
||||
import { parseProxyConfig, computeConfigHash, ProxyConfigError } from './proxy-config';
|
||||
import { redactProxyUrl } from './proxy-redact';
|
||||
import { spawnTerminalAgent } from './terminal-agent-control';
|
||||
|
||||
const config = resolveConfig();
|
||||
const IS_WINDOWS = process.platform === 'win32';
|
||||
|
|
@ -211,86 +209,6 @@ function cleanupLegacyState(): void {
|
|||
}
|
||||
}
|
||||
|
||||
// ─── Chromium profile lock helpers (#1781) ─────────────────────
|
||||
/** Profile dir used by headed/connect Chromium sessions. */
|
||||
function chromiumProfileDir(): string {
|
||||
return path.join(process.env.HOME || '/tmp', '.gstack', 'chromium-profile');
|
||||
}
|
||||
|
||||
/** Remove Chromium SingletonLock/Socket/Cookie so a relaunch can acquire the
|
||||
* profile. Safe to call when absent. */
|
||||
function cleanChromiumProfileLocks(profileDir: string = chromiumProfileDir()): void {
|
||||
for (const lockFile of ['SingletonLock', 'SingletonSocket', 'SingletonCookie']) {
|
||||
safeUnlinkQuiet(path.join(profileDir, lockFile));
|
||||
}
|
||||
}
|
||||
|
||||
/** Kill an orphaned Chromium that still holds the profile's SingletonLock. The
|
||||
* lock symlink target is "hostname-PID"; killing that PID tears down its
|
||||
* renderer tree so the next launch starts clean. No-op when absent/stale. */
|
||||
async function killOrphanChromium(profileDir: string = chromiumProfileDir()): Promise<void> {
|
||||
try {
|
||||
const lockTarget = fs.readlinkSync(path.join(profileDir, 'SingletonLock')); // "hostname-12345"
|
||||
const orphanPid = parseInt(lockTarget.split('-').pop() || '', 10);
|
||||
if (orphanPid && isProcessAlive(orphanPid)) {
|
||||
safeKill(orphanPid, 'SIGTERM');
|
||||
await new Promise(r => setTimeout(r, 1000));
|
||||
if (isProcessAlive(orphanPid)) {
|
||||
safeKill(orphanPid, 'SIGKILL');
|
||||
await new Promise(r => setTimeout(r, 500));
|
||||
}
|
||||
}
|
||||
} catch (err: any) {
|
||||
if (err?.code !== 'ENOENT' && err?.code !== 'EINVAL') throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/** Bounded /health probe. Returns true if the server answers within `attempts`
|
||||
* tries spaced `backoffMs` apart — distinguishes a busy-but-alive daemon from a
|
||||
* dead one (#1781) so a slow server isn't killed and restarted into a crash-loop. */
|
||||
async function probeHealthWithBackoff(port: number, attempts = 3, backoffMs = 250): Promise<boolean> {
|
||||
for (let i = 0; i < attempts; i++) {
|
||||
if (await isServerHealthy(port)) return true;
|
||||
if (i < attempts - 1) await Bun.sleep(backoffMs);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the env for an auto-restart after a crash. headed/proxy/configHash are
|
||||
* reapplied from THIS invocation OR the persisted server state, so a restart
|
||||
* triggered by a plain command (goto/status, no --headed flag) never silently
|
||||
* downgrades a headed session to headless (#1781). Pure + exported for tests.
|
||||
*/
|
||||
export function buildRestartEnv(
|
||||
globalFlags: GlobalFlags | null | undefined,
|
||||
oldState: ServerState | null,
|
||||
): Record<string, string> {
|
||||
const env: Record<string, string> = {};
|
||||
if (globalFlags?.proxyUrl) env.BROWSE_PROXY_URL = globalFlags.proxyUrl;
|
||||
if (globalFlags?.headed || oldState?.mode === 'headed') env.BROWSE_HEADED = '1';
|
||||
const configHash = globalFlags?.configHash || oldState?.configHash;
|
||||
if (configHash) env.BROWSE_CONFIG_HASH = configHash;
|
||||
return env;
|
||||
}
|
||||
|
||||
/** macOS only: pull the headed Chromium window to the user's current Space.
|
||||
* "Google Chrome for Testing" frequently opens behind the active window or on
|
||||
* another Space — the first thing users read as "I can't see the browser"
|
||||
* (#1781). Best-effort, fire-and-forget, never throws. The app name is a fixed
|
||||
* literal (no interpolation). */
|
||||
function raiseHeadedWindowMacOS(): void {
|
||||
if (process.platform !== 'darwin') return;
|
||||
try {
|
||||
nodeSpawn('osascript', ['-e', 'tell application "Google Chrome for Testing" to activate'], {
|
||||
stdio: 'ignore',
|
||||
detached: true,
|
||||
}).unref();
|
||||
} catch {
|
||||
// osascript missing or app not present — non-fatal
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Server Lifecycle ──────────────────────────────────────────
|
||||
async function startServer(extraEnv?: Record<string, string>): Promise<ServerState> {
|
||||
ensureStateDir(config);
|
||||
|
|
@ -299,12 +217,7 @@ async function startServer(extraEnv?: Record<string, string>): Promise<ServerSta
|
|||
safeUnlink(config.stateFile);
|
||||
safeUnlink(path.join(config.stateDir, 'browse-startup-error.log'));
|
||||
|
||||
// #1781: clear a stale Chromium profile lock (and kill the orphan still
|
||||
// holding it) before launch, so an auto-restart after an abrupt kill isn't
|
||||
// blocked by the previous Chromium's SingletonLock — the self-inflicted
|
||||
// crash-loop. Previously only the manual connect preamble did this.
|
||||
await killOrphanChromium();
|
||||
cleanChromiumProfileLocks();
|
||||
let proc: any = null;
|
||||
|
||||
// Allow the caller to opt out of the parent-process watchdog by setting
|
||||
// BROWSE_PARENT_PID=0 in the environment. Useful for CI, non-interactive
|
||||
|
|
@ -327,22 +240,12 @@ async function startServer(extraEnv?: Record<string, string>): Promise<ServerSta
|
|||
`${extraEnvStr})}).unref()`;
|
||||
Bun.spawnSync(['node', '-e', launcherCode], { stdio: ['ignore', 'ignore', 'ignore'] });
|
||||
} else {
|
||||
// macOS/Linux: Bun.spawn().unref() only removes the child from Bun's event
|
||||
// loop — it does NOT call setsid(), so the spawned server stays in the
|
||||
// parent's process session. When the CLI runs inside a session-managed
|
||||
// shell (e.g. Claude Code's per-command Bash sandbox, Conductor, CI
|
||||
// step runners), the session leader's exit sends SIGHUP to every PID in
|
||||
// the session, killing the bun server (and its Chromium grandchildren).
|
||||
// Even with BROWSE_PARENT_PID=0 disabling the watchdog, SIGHUP still
|
||||
// reaps the server. Use Node's child_process.spawn with detached:true,
|
||||
// which calls setsid() so the server becomes its own session leader
|
||||
// (PPID=1, STAT=Ss) and survives the spawning shell's exit. Mirrors
|
||||
// the Windows path's rationale — same root cause, different OS API.
|
||||
nodeSpawn('bun', ['run', SERVER_SCRIPT], {
|
||||
detached: true,
|
||||
stdio: ['ignore', 'ignore', 'ignore'],
|
||||
// macOS/Linux: Bun.spawn + unref works correctly
|
||||
proc = Bun.spawn(['bun', 'run', SERVER_SCRIPT], {
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
env: { ...process.env, BROWSE_STATE_FILE: config.stateFile, BROWSE_PARENT_PID: parentPid, ...extraEnv },
|
||||
}).unref();
|
||||
});
|
||||
proc.unref();
|
||||
}
|
||||
|
||||
// Wait for server to become healthy.
|
||||
|
|
@ -357,17 +260,27 @@ async function startServer(extraEnv?: Record<string, string>): Promise<ServerSta
|
|||
await Bun.sleep(100);
|
||||
}
|
||||
|
||||
// Server didn't start in time — check the on-disk startup error log.
|
||||
// Both platforms now spawn with stdio: 'ignore', so the server writes
|
||||
// errors to disk for the CLI to read (see server.ts start().catch).
|
||||
const errorLogPath = path.join(config.stateDir, 'browse-startup-error.log');
|
||||
try {
|
||||
const errorLog = fs.readFileSync(errorLogPath, 'utf-8').trim();
|
||||
if (errorLog) {
|
||||
throw new Error(`Server failed to start:\n${errorLog}`);
|
||||
// Server didn't start in time — try to get error details
|
||||
if (proc?.stderr) {
|
||||
// macOS/Linux: read stderr from the spawned process
|
||||
const reader = proc.stderr.getReader();
|
||||
const { value } = await reader.read();
|
||||
if (value) {
|
||||
const errText = new TextDecoder().decode(value);
|
||||
throw new Error(`Server failed to start:\n${errText}`);
|
||||
}
|
||||
} else {
|
||||
// Windows: check startup error log (server writes errors to disk since
|
||||
// stderr is unavailable due to stdio: 'ignore' for detachment)
|
||||
const errorLogPath = path.join(config.stateDir, 'browse-startup-error.log');
|
||||
try {
|
||||
const errorLog = fs.readFileSync(errorLogPath, 'utf-8').trim();
|
||||
if (errorLog) {
|
||||
throw new Error(`Server failed to start:\n${errorLog}`);
|
||||
}
|
||||
} catch (e: any) {
|
||||
if (e.code !== 'ENOENT') throw e;
|
||||
}
|
||||
} catch (e: any) {
|
||||
if (e.code !== 'ENOENT') throw e;
|
||||
}
|
||||
throw new Error(`Server failed to start within ${MAX_START_WAIT / 1000}s`);
|
||||
}
|
||||
|
|
@ -573,42 +486,26 @@ async function sendCommand(state: ServerState, command: string, args: string[],
|
|||
}
|
||||
} catch (err: any) {
|
||||
if (err.name === 'AbortError') {
|
||||
// #1781: a 30s timeout on a heavy page usually means busy, not dead.
|
||||
// Don't kill a live server (that's what triggered the crash-loop) — report
|
||||
// and exit so the user can retry rather than losing their (headed) window.
|
||||
const ts = readState();
|
||||
const alive = ts?.pid ? isProcessAlive(ts.pid) : false;
|
||||
console.error(alive
|
||||
? '[browse] Command timed out after 30s (server still alive — busy, not restarting). Retry, or raise load.'
|
||||
: '[browse] Command timed out after 30s');
|
||||
console.error('[browse] Command timed out after 30s');
|
||||
process.exit(1);
|
||||
}
|
||||
// Connection error — server may have crashed, OR may just be busy.
|
||||
// Connection error — server may have crashed
|
||||
if (err.code === 'ECONNREFUSED' || err.code === 'ECONNRESET' || err.message?.includes('fetch failed')) {
|
||||
const oldState = readState();
|
||||
// #1781 busy-vs-dead: a single-threaded daemon under beacon/extension load
|
||||
// can briefly stop answering HTTP while still alive. Before declaring a
|
||||
// crash, if the process is alive give /health a bounded chance to recover
|
||||
// and just retry the command — never kill+restart a live-but-busy server.
|
||||
if (oldState?.pid && isProcessAlive(oldState.pid) && await probeHealthWithBackoff(oldState.port)) {
|
||||
if (retries >= 1) throw new Error('[browse] Server unresponsive after retry — aborting');
|
||||
console.error('[browse] Server was briefly unresponsive (busy); retrying command...');
|
||||
return sendCommand(oldState, command, args, retries + 1);
|
||||
}
|
||||
// Truly dead (or health never recovered) → restart.
|
||||
if (retries >= 1) throw new Error('[browse] Server crashed twice in a row — aborting');
|
||||
console.error('[browse] Server connection lost. Restarting...');
|
||||
// Kill the old server to avoid orphaned chromium processes
|
||||
const oldState = readState();
|
||||
if (oldState && oldState.pid) {
|
||||
await killServer(oldState.pid);
|
||||
}
|
||||
// startServer() now clears the Chromium SingletonLock + reaps the orphan,
|
||||
// so the relaunch isn't blocked by the dead Chromium's profile lock (#1781).
|
||||
//
|
||||
// Reapply --proxy / --headed when restarting. headed comes from THIS
|
||||
// invocation OR the persisted server mode, so a restart triggered by a
|
||||
// plain command (goto/status, no --headed) never silently downgrades a
|
||||
// headed session to headless (#1781). Same for proxy/configHash.
|
||||
const restartEnv = buildRestartEnv(_globalFlags, oldState);
|
||||
// Reapply --proxy / --headed flags from this invocation when restarting
|
||||
// after a crash. Without this, a proxied daemon that dies mid-command
|
||||
// would silently restart in default direct/headless mode and bypass
|
||||
// the SOCKS bridge.
|
||||
const restartEnv: Record<string, string> = {};
|
||||
if (_globalFlags?.proxyUrl) restartEnv.BROWSE_PROXY_URL = _globalFlags.proxyUrl;
|
||||
if (_globalFlags?.headed) restartEnv.BROWSE_HEADED = '1';
|
||||
if (_globalFlags?.configHash) restartEnv.BROWSE_CONFIG_HASH = _globalFlags.configHash;
|
||||
const newState = await startServer(Object.keys(restartEnv).length ? restartEnv : undefined);
|
||||
return sendCommand(newState, command, args, retries + 1);
|
||||
}
|
||||
|
|
@ -1069,11 +966,30 @@ Refs: After 'snapshot', use @e1, @e2... as selectors:
|
|||
}
|
||||
}
|
||||
|
||||
// Kill an orphaned Chromium still holding the profile lock (the Bun server
|
||||
// PID's Chromium child can outlive an abrupt kill/crash), then clear the
|
||||
// lock files so the launch is clean. Shared with the auto-restart path (#1781).
|
||||
await killOrphanChromium();
|
||||
cleanChromiumProfileLocks();
|
||||
// Kill orphaned Chromium processes that may still hold the profile lock.
|
||||
// The server PID is the Bun process; Chromium is a child that can outlive it
|
||||
// if the server is killed abruptly (SIGKILL, crash, manual rm of state file).
|
||||
const profileDir = path.join(process.env.HOME || '/tmp', '.gstack', 'chromium-profile');
|
||||
try {
|
||||
const singletonLock = path.join(profileDir, 'SingletonLock');
|
||||
const lockTarget = fs.readlinkSync(singletonLock); // e.g. "hostname-12345"
|
||||
const orphanPid = parseInt(lockTarget.split('-').pop() || '', 10);
|
||||
if (orphanPid && isProcessAlive(orphanPid)) {
|
||||
safeKill(orphanPid, 'SIGTERM');
|
||||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||
if (isProcessAlive(orphanPid)) {
|
||||
safeKill(orphanPid, 'SIGKILL');
|
||||
await new Promise(resolve => setTimeout(resolve, 500));
|
||||
}
|
||||
}
|
||||
} catch (err: any) {
|
||||
if (err?.code !== 'ENOENT' && err?.code !== 'EINVAL') throw err;
|
||||
}
|
||||
|
||||
// Clean up Chromium profile locks (can persist after crashes)
|
||||
for (const lockFile of ['SingletonLock', 'SingletonSocket', 'SingletonCookie']) {
|
||||
safeUnlinkQuiet(path.join(profileDir, lockFile));
|
||||
}
|
||||
|
||||
// Delete stale state file
|
||||
safeUnlinkQuiet(config.stateFile);
|
||||
|
|
@ -1111,29 +1027,38 @@ Refs: After 'snapshot', use @e1, @e2... as selectors:
|
|||
});
|
||||
const status = await resp.text();
|
||||
console.log(`Connected to real Chrome\n${status}`);
|
||||
// #1781: surface the window — it often opens behind/on another Space.
|
||||
raiseHeadedWindowMacOS();
|
||||
if (process.platform === 'darwin') {
|
||||
console.log('(If you still don\'t see it, check Mission Control / other Spaces.)');
|
||||
}
|
||||
|
||||
// sidebar-agent.ts spawn was here. Ripped alongside the chat queue —
|
||||
// the Terminal pane runs an interactive PTY now, no more one-shot
|
||||
// claude -p subprocesses to multiplex.
|
||||
|
||||
// Auto-start terminal agent (non-compiled bun process). Owns the PTY
|
||||
// WebSocket for the sidebar Terminal pane. Routes through the shared
|
||||
// spawnTerminalAgent helper so the CLI cold-start path and the
|
||||
// server.ts watchdog respawn path share one implementation. The
|
||||
// helper handles prior-PID cleanup, script lookup, and env wiring.
|
||||
// WebSocket for the sidebar Terminal pane.
|
||||
let termAgentScript = path.resolve(__dirname, 'terminal-agent.ts');
|
||||
if (!fs.existsSync(termAgentScript)) {
|
||||
termAgentScript = path.resolve(path.dirname(process.execPath), '..', 'src', 'terminal-agent.ts');
|
||||
}
|
||||
try {
|
||||
const newPid = spawnTerminalAgent({
|
||||
stateFile: config.stateFile,
|
||||
serverPort: newState.port,
|
||||
cwd: config.projectDir,
|
||||
});
|
||||
if (newPid) {
|
||||
console.log(`[browse] Terminal agent started (PID: ${newPid})`);
|
||||
if (fs.existsSync(termAgentScript)) {
|
||||
// Kill old terminal-agents so a stale port file can't trick the
|
||||
// server into routing /pty-session at a dead listener.
|
||||
try {
|
||||
const { spawnSync } = require('child_process');
|
||||
spawnSync('pkill', ['-f', 'terminal-agent\\.ts'], { stdio: 'ignore', timeout: 3000 });
|
||||
} catch (err: any) {
|
||||
if (err?.code !== 'ENOENT') throw err;
|
||||
}
|
||||
const termProc = Bun.spawn(['bun', 'run', termAgentScript], {
|
||||
cwd: config.projectDir,
|
||||
env: {
|
||||
...process.env,
|
||||
BROWSE_STATE_FILE: config.stateFile,
|
||||
BROWSE_SERVER_PORT: String(newState.port),
|
||||
},
|
||||
stdio: ['ignore', 'ignore', 'ignore'],
|
||||
});
|
||||
termProc.unref();
|
||||
console.log(`[browse] Terminal agent started (PID: ${termProc.pid})`);
|
||||
}
|
||||
} catch (err: any) {
|
||||
// Non-fatal: chat still works without the terminal agent.
|
||||
|
|
@ -1143,96 +1068,6 @@ Refs: After 'snapshot', use @e1, @e2... as selectors:
|
|||
console.error(`[browse] Connect failed: ${err.message}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// ─── Outer Supervisor (v1.44+, opt-in) ──────────────────────────
|
||||
//
|
||||
// Default: fire-and-forget (CLI exits, server runs detached). This is
|
||||
// the contract every existing call site relies on, including Claude
|
||||
// Code's Bash tool which expects `$B connect` to return promptly.
|
||||
//
|
||||
// Opt-in via `--supervise` flag or BROWSE_SUPERVISE=1 env: the CLI
|
||||
// stays attached, polls the spawned server's PID every 30s, and
|
||||
// respawns it through the same headed-mode startServer path on
|
||||
// unexpected exit. Crash-loop guard: 5 respawns inside 5 min →
|
||||
// give up and exit 1 with a clear error. SIGINT / SIGTERM cleanly
|
||||
// tear down the supervised server before exit.
|
||||
//
|
||||
// Out of scope for v1.44 minimum: routing the Chromium-disconnect
|
||||
// exit-code-1 path back through this supervisor. The terminal-agent
|
||||
// watchdog (T5) already covers the highest-frequency restart case;
|
||||
// Chromium-crash-respawn is documented as a follow-up so the
|
||||
// supervisor stays a tight, testable primitive.
|
||||
const superviseRequested = commandArgs.includes('--supervise')
|
||||
|| process.env.BROWSE_SUPERVISE === '1';
|
||||
if (!superviseRequested) {
|
||||
process.exit(0);
|
||||
}
|
||||
console.log('[browse] Supervisor mode: monitoring server. Ctrl-C to stop.');
|
||||
let supervisorExiting = false;
|
||||
const teardownAndExit = (signal: string) => {
|
||||
if (supervisorExiting) return;
|
||||
supervisorExiting = true;
|
||||
console.log(`\n[browse] ${signal} received — stopping server.`);
|
||||
const state = readState();
|
||||
if (state?.pid && isProcessAlive(state.pid)) {
|
||||
safeKill(state.pid, 'SIGTERM');
|
||||
}
|
||||
process.exit(0);
|
||||
};
|
||||
process.on('SIGINT', () => teardownAndExit('SIGINT'));
|
||||
process.on('SIGTERM', () => teardownAndExit('SIGTERM'));
|
||||
|
||||
const SUPERVISOR_TICK_MS = parseInt(
|
||||
process.env.GSTACK_SUPERVISOR_TICK_MS || '30000',
|
||||
10,
|
||||
);
|
||||
const SUPERVISOR_GUARD_WINDOW_MS = 5 * 60_000;
|
||||
const SUPERVISOR_GUARD_MAX = 5;
|
||||
const SUPERVISOR_BACKOFF_MS = (process.env.GSTACK_SUPERVISOR_BACKOFF || '1000,2000,4000,8000,30000')
|
||||
.split(',').map(s => parseInt(s.trim(), 10)).filter(n => Number.isFinite(n));
|
||||
const respawns: number[] = [];
|
||||
|
||||
while (!supervisorExiting) {
|
||||
await new Promise(resolve => setTimeout(resolve, SUPERVISOR_TICK_MS));
|
||||
if (supervisorExiting) break;
|
||||
const state = readState();
|
||||
if (state?.pid && isProcessAlive(state.pid)) continue;
|
||||
// Server died. Prune rolling window and check guard.
|
||||
const now = Date.now();
|
||||
while (respawns.length && now - respawns[0] > SUPERVISOR_GUARD_WINDOW_MS) {
|
||||
respawns.shift();
|
||||
}
|
||||
if (respawns.length >= SUPERVISOR_GUARD_MAX) {
|
||||
console.error(
|
||||
`[browse] Supervisor: ${SUPERVISOR_GUARD_MAX} crashes in ${SUPERVISOR_GUARD_WINDOW_MS / 1000}s — giving up.`,
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
const attempt = respawns.length;
|
||||
respawns.push(now);
|
||||
const backoff = SUPERVISOR_BACKOFF_MS[Math.min(attempt, SUPERVISOR_BACKOFF_MS.length - 1)] ?? 30_000;
|
||||
console.warn(`[browse] Supervisor: server PID gone — respawning in ${backoff}ms (attempt ${attempt + 1}/${SUPERVISOR_GUARD_MAX})...`);
|
||||
await new Promise(resolve => setTimeout(resolve, backoff));
|
||||
if (supervisorExiting) break;
|
||||
try {
|
||||
const respawned = await startServer(serverEnv);
|
||||
console.log(`[browse] Supervisor: server respawned (PID ${respawned.pid}, port ${respawned.port}).`);
|
||||
// Re-spawn the terminal-agent too; same env wiring as the initial connect.
|
||||
try {
|
||||
spawnTerminalAgent({
|
||||
stateFile: config.stateFile,
|
||||
serverPort: respawned.port,
|
||||
cwd: config.projectDir,
|
||||
});
|
||||
} catch (err: any) {
|
||||
console.warn(`[browse] Supervisor: terminal-agent respawn failed: ${err?.message || err}`);
|
||||
}
|
||||
} catch (err: any) {
|
||||
console.error(`[browse] Supervisor: server respawn failed: ${err?.message || err}`);
|
||||
// Let the next tick try again — the crash-loop guard already
|
||||
// bounded the retries via the rolling window.
|
||||
}
|
||||
}
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
|
|
@ -1283,11 +1118,11 @@ Refs: After 'snapshot', use @e1, @e2... as selectors:
|
|||
safeKill(existingState.pid, 'SIGKILL');
|
||||
}
|
||||
}
|
||||
// #1781: killing the daemon can orphan its Chromium child tree, which keeps
|
||||
// holding the SingletonLock and makes the next `connect` fail to launch.
|
||||
// Reap the orphan via the lock, then clear the lock files + state.
|
||||
await killOrphanChromium();
|
||||
cleanChromiumProfileLocks();
|
||||
// Clean profile locks and state file
|
||||
const profileDir = path.join(process.env.HOME || '/tmp', '.gstack', 'chromium-profile');
|
||||
for (const lockFile of ['SingletonLock', 'SingletonSocket', 'SingletonCookie']) {
|
||||
safeUnlinkQuiet(path.join(profileDir, lockFile));
|
||||
}
|
||||
// Xvfb orphan cleanup: if the recorded PID still matches our Xvfb (by
|
||||
// cmdline AND start-time), kill it. PID-only would risk killing a
|
||||
// recycled PID belonging to an unrelated process.
|
||||
|
|
@ -1347,11 +1182,6 @@ Refs: After 'snapshot', use @e1, @e2... as selectors:
|
|||
}
|
||||
|
||||
await sendCommand(state, command, commandArgs);
|
||||
|
||||
// #1781: `focus` means "show me the window". The server-side focus activates
|
||||
// the page via CDP, but on macOS the app can still sit on another Space — pull
|
||||
// it to the user's current Space too.
|
||||
if (command === 'focus') raiseHeadedWindowMacOS();
|
||||
}
|
||||
|
||||
if (import.meta.main) {
|
||||
|
|
|
|||
|
|
@ -45,7 +45,6 @@ export const META_COMMANDS = new Set([
|
|||
'domain-skill',
|
||||
'skill',
|
||||
'cdp',
|
||||
'memory',
|
||||
]);
|
||||
|
||||
export const ALL_COMMANDS = new Set([...READ_COMMANDS, ...WRITE_COMMANDS, ...META_COMMANDS]);
|
||||
|
|
@ -90,7 +89,6 @@ export function wrapUntrustedContent(result: string, url: string): string {
|
|||
|
||||
export const COMMAND_DESCRIPTIONS: Record<string, { category: string; description: string; usage?: string }> = {
|
||||
// Navigation
|
||||
'memory': { category: 'Server', description: 'Snapshot Bun heap + per-tab JS heap + Chromium process tree + bounded buffer sizes. JSON output with --json.', usage: 'memory [--json]' },
|
||||
'goto': { category: 'Navigation', description: 'Navigate to URL (http://, https://, or file:// scoped to cwd/TEMP_DIR)', usage: 'goto <url>' },
|
||||
'load-html': { category: 'Navigation', description: 'Load HTML via setContent. Accepts a file path under safe-dirs (validated), OR --from-file <payload.json> with {"html":"...","waitUntil":"..."} for large inline HTML (Windows argv safe).', usage: 'load-html <file> [--wait-until load|domcontentloaded|networkidle] [--tab-id <N>] | load-html --from-file <payload.json> [--tab-id <N>]' },
|
||||
'back': { category: 'Navigation', description: 'History back' },
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
* Outputs the absolute path to the browse binary on stdout, or exits 1 if not found.
|
||||
*/
|
||||
|
||||
import { accessSync, constants } from 'fs';
|
||||
import { existsSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
import { homedir } from 'os';
|
||||
|
||||
|
|
@ -24,35 +24,6 @@ function getGitRoot(): string | null {
|
|||
}
|
||||
}
|
||||
|
||||
// Probe a path for executability. accessSync(X_OK) checks the executable
|
||||
// bit on Linux/macOS and degrades to an existence check on Windows (no
|
||||
// true execute bit). Mirrors make-pdf/src/browseClient.ts:159 /
|
||||
// make-pdf/src/pdftotext.ts:117.
|
||||
function isExecutable(p: string): boolean {
|
||||
try {
|
||||
accessSync(p, constants.X_OK);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Resolve a bare binary path to the actual file on disk. On Windows, `bun
|
||||
// build --compile` appends `.exe` to the output filename, so `browse` on
|
||||
// disk is actually `browse.exe`. After a bare-path probe, try the Windows
|
||||
// extensions. Linux/macOS behavior is unchanged. Mirrors the helper in
|
||||
// make-pdf/src/browseClient.ts:89 and make-pdf/src/pdftotext.ts:52.
|
||||
function findExecutable(base: string): string | null {
|
||||
if (isExecutable(base)) return base;
|
||||
if (process.platform === 'win32') {
|
||||
for (const ext of ['.exe', '.cmd', '.bat']) {
|
||||
const withExt = base + ext;
|
||||
if (isExecutable(withExt)) return withExt;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
export function locateBinary(): string | null {
|
||||
const root = getGitRoot();
|
||||
const home = homedir();
|
||||
|
|
@ -62,26 +33,14 @@ export function locateBinary(): string | null {
|
|||
if (root) {
|
||||
for (const m of markers) {
|
||||
const local = join(root, m, 'skills', 'gstack', 'browse', 'dist', 'browse');
|
||||
const found = findExecutable(local);
|
||||
if (found) return found;
|
||||
if (existsSync(local)) return local;
|
||||
}
|
||||
|
||||
// Source-checkout fallback (no installed skill layout — the binary
|
||||
// lives directly at <repo>/browse/dist/browse[.exe]). Hit by:
|
||||
// - gstack repo dev workflow before `./setup` runs
|
||||
// - the windows-setup-e2e.yml CI workflow which builds binaries
|
||||
// in place but never installs them under a marker dir
|
||||
// - make-pdf consumers running from a sibling source checkout
|
||||
const sourceCheckout = join(root, 'browse', 'dist', 'browse');
|
||||
const sourceFound = findExecutable(sourceCheckout);
|
||||
if (sourceFound) return sourceFound;
|
||||
}
|
||||
|
||||
// Global fallback
|
||||
for (const m of markers) {
|
||||
const global = join(home, m, 'skills', 'gstack', 'browse', 'dist', 'browse');
|
||||
const found = findExecutable(global);
|
||||
if (found) return found;
|
||||
if (existsSync(global)) return global;
|
||||
}
|
||||
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -1,78 +0,0 @@
|
|||
/**
|
||||
* find-security-sidecar — resolve the Node entry that runs the L4 ML
|
||||
* classifier sidecar.
|
||||
*
|
||||
* The sidecar can't be bundled into the compiled browse binary because
|
||||
* onnxruntime-node fails to dlopen from Bun's compile extract dir. It runs
|
||||
* as a separate Node subprocess instead. This module resolves the right
|
||||
* path + interpreter on each platform:
|
||||
*
|
||||
* 1. Prefer node on PATH + a bundled JS entry at
|
||||
* browse/dist/security-sidecar.js (built by package.json's
|
||||
* build:security-sidecar script).
|
||||
* 2. Dev fallback: node + browse/src/security-sidecar-entry.ts via tsx
|
||||
* (only available in the source checkout, not the compiled install).
|
||||
* 3. If Node is missing or no entry resolves, return null. The /pty-inject-scan
|
||||
* endpoint then responds with l4 { available: false } and the extension
|
||||
* degrades to WARN+confirm (D7).
|
||||
*/
|
||||
|
||||
import { existsSync } from "fs";
|
||||
import { join, dirname } from "path";
|
||||
import { execFileSync } from "child_process";
|
||||
|
||||
export interface SidecarLocation {
|
||||
node: string;
|
||||
entry: string;
|
||||
/** "compiled" if running from browse/dist/, "dev" if running from src */
|
||||
mode: "compiled" | "dev";
|
||||
}
|
||||
|
||||
function nodeOnPath(): string | null {
|
||||
try {
|
||||
execFileSync("node", ["--version"], { stdio: "ignore", timeout: 2000 });
|
||||
return "node";
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function browseRoot(): string {
|
||||
// When running compiled, __dirname (via import.meta.dir) points at the
|
||||
// Bun extract temp. Walk up until we find a directory containing
|
||||
// browse/dist/ or browse/src/.
|
||||
let candidate = dirname(import.meta.path || "");
|
||||
for (let i = 0; i < 6; i += 1) {
|
||||
if (existsSync(join(candidate, "browse", "dist", "security-sidecar.js"))) {
|
||||
return candidate;
|
||||
}
|
||||
if (existsSync(join(candidate, "src", "security-sidecar-entry.ts"))) {
|
||||
return candidate;
|
||||
}
|
||||
const next = dirname(candidate);
|
||||
if (next === candidate) break;
|
||||
candidate = next;
|
||||
}
|
||||
return process.cwd();
|
||||
}
|
||||
|
||||
export function findSecuritySidecar(): SidecarLocation | null {
|
||||
const node = nodeOnPath();
|
||||
if (!node) return null;
|
||||
|
||||
const root = browseRoot();
|
||||
|
||||
const compiled = join(root, "browse", "dist", "security-sidecar.js");
|
||||
if (existsSync(compiled)) {
|
||||
return { node, entry: compiled, mode: "compiled" };
|
||||
}
|
||||
|
||||
// Dev fallback. Compiled installs won't have src/ on disk so this only
|
||||
// resolves when running from the source checkout.
|
||||
const devEntry = join(root, "src", "security-sidecar-entry.ts");
|
||||
if (existsSync(devEntry)) {
|
||||
return { node, entry: devEntry, mode: "dev" };
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
|
@ -1,115 +0,0 @@
|
|||
// `$B memory` — diagnostic snapshot of Bun heap + per-tab JS heap +
|
||||
// Chromium process tree + bounded buffer sizes. Lives in its own file
|
||||
// because the meta-commands dispatcher imports it lazily — projects
|
||||
// that never run the diagnostic don't pay the import-graph cost (CDP
|
||||
// bridge, memory-snapshot types, buffer accessors).
|
||||
|
||||
import type { BrowserManager } from './browser-manager';
|
||||
import { formatBytes, type MemorySnapshot, type MemoryStructureStats } from './memory-snapshot';
|
||||
import { getModificationHistoryStats } from './cdp-inspector';
|
||||
import { getSubscriberCount as getActivitySubscriberCount } from './activity';
|
||||
import { getInspectorSubscriberCount } from './server';
|
||||
import { consoleBuffer, networkBuffer, dialogBuffer } from './buffers';
|
||||
import { getCaptureBuffer } from './network-capture';
|
||||
|
||||
/**
|
||||
* Assemble the MemoryStructureStats from the modules that own each buffer.
|
||||
* Browser-manager doesn't take a hard dep on every buffer-owning module —
|
||||
* the snapshot caller passes them in.
|
||||
*/
|
||||
function collectStructureStats(): MemoryStructureStats {
|
||||
return {
|
||||
modificationHistory: getModificationHistoryStats(),
|
||||
activitySubscribers: getActivitySubscriberCount(),
|
||||
inspectorSubscribers: getInspectorSubscriberCount(),
|
||||
consoleBufferLen: consoleBuffer.length,
|
||||
networkBufferLen: networkBuffer.length,
|
||||
dialogBufferLen: dialogBuffer.length,
|
||||
captureBufferBytes: getCaptureBuffer().byteSize,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Pretty-print the snapshot for terminal output. JSON mode (--json) goes
|
||||
* straight through JSON.stringify so the extension footer and any test
|
||||
* harness can consume it programmatically.
|
||||
*/
|
||||
function formatSnapshotText(s: MemorySnapshot): string {
|
||||
const lines: string[] = [];
|
||||
lines.push(
|
||||
`Bun server: RSS: ${formatBytes(s.bunServer.rss)} ` +
|
||||
`heap: ${formatBytes(s.bunServer.heapUsed)} / ${formatBytes(s.bunServer.heapTotal)} ` +
|
||||
`external: ${formatBytes(s.bunServer.external)}`,
|
||||
);
|
||||
|
||||
if (s.processes && s.processes.length > 0) {
|
||||
// Group by type so the user sees "renderer: 12" vs listing 12 separate rows.
|
||||
const byType: Record<string, number> = {};
|
||||
for (const p of s.processes) byType[p.type] = (byType[p.type] ?? 0) + 1;
|
||||
const typeSummary = Object.entries(byType)
|
||||
.map(([t, n]) => `${t}=${n}`)
|
||||
.join(' ');
|
||||
lines.push(`Chromium processes: ${s.processes.length} total (${typeSummary})`);
|
||||
} else if (s.processes === null) {
|
||||
lines.push('Chromium processes: (unavailable — see notes)');
|
||||
} else {
|
||||
lines.push('Chromium processes: 0');
|
||||
}
|
||||
|
||||
if (s.tabs.length > 0) {
|
||||
// Sort by JS heap descending; show top 10 plus "...N more" tail.
|
||||
const sorted = [...s.tabs].sort((a, b) => b.jsHeapUsed - a.jsHeapUsed);
|
||||
const shown = sorted.slice(0, 10);
|
||||
lines.push(`Renderers: ${s.tabs.length} tabs (top by JS heap):`);
|
||||
for (const t of shown) {
|
||||
const urlShort = t.url.length > 80 ? t.url.slice(0, 77) + '...' : t.url;
|
||||
lines.push(
|
||||
` [${formatBytes(t.jsHeapUsed).padStart(8)} JS, ` +
|
||||
`${String(t.nodes).padStart(6)} nodes, ` +
|
||||
`${String(t.listeners).padStart(5)} listeners] ` +
|
||||
`tab #${t.id} — ${urlShort}`,
|
||||
);
|
||||
}
|
||||
if (sorted.length > shown.length) {
|
||||
lines.push(` ...and ${sorted.length - shown.length} more`);
|
||||
}
|
||||
} else {
|
||||
lines.push('Renderers: (no tabs tracked)');
|
||||
}
|
||||
|
||||
lines.push('─────────────────────────────────────────────────');
|
||||
lines.push('In-memory structures (Bun side):');
|
||||
const m = s.structures.modificationHistory;
|
||||
lines.push(
|
||||
` modificationHistory: ${m.current} / ${m.cap} entries` +
|
||||
(m.evicted > 0 ? ` (${m.evicted} evicted since reset)` : ''),
|
||||
);
|
||||
lines.push(` inspectorSubscribers: ${s.structures.inspectorSubscribers}`);
|
||||
lines.push(` activitySubscribers: ${s.structures.activitySubscribers}`);
|
||||
lines.push(` consoleBuffer: ${s.structures.consoleBufferLen} entries`);
|
||||
lines.push(` networkBuffer: ${s.structures.networkBufferLen} entries`);
|
||||
lines.push(` dialogBuffer: ${s.structures.dialogBufferLen} entries`);
|
||||
lines.push(` captureBuffer: ${formatBytes(s.structures.captureBufferBytes)}`);
|
||||
|
||||
if (s.notes.length > 0) {
|
||||
lines.push('');
|
||||
lines.push('Notes:');
|
||||
for (const n of s.notes) lines.push(` - ${n}`);
|
||||
}
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
export async function handleMemoryCommand(args: string[], bm: BrowserManager): Promise<string> {
|
||||
const jsonMode = args.includes('--json');
|
||||
const structures = collectStructureStats();
|
||||
const snapshot = await bm.getMemorySnapshot(structures);
|
||||
if (jsonMode) return JSON.stringify(snapshot);
|
||||
return formatSnapshotText(snapshot);
|
||||
}
|
||||
|
||||
/** Entry point used by the /memory HTTP endpoint — same data, always JSON. */
|
||||
export async function buildMemorySnapshotJson(bm: BrowserManager): Promise<MemorySnapshot> {
|
||||
const structures = collectStructureStats();
|
||||
return bm.getMemorySnapshot(structures);
|
||||
}
|
||||
|
|
@ -1,73 +0,0 @@
|
|||
// Shared types for the $B memory diagnostic command and the /memory
|
||||
// endpoint. Lives in its own module so server.ts, read-commands.ts, and
|
||||
// the extension footer poll can import without taking a circular dep on
|
||||
// browser-manager.ts.
|
||||
//
|
||||
// Background: the gbrowser-OOM investigation (160 GB Activity Monitor
|
||||
// reading on a friend's machine) needed a diagnostic that could land
|
||||
// before the next incident — measurement comes first, fixes come after.
|
||||
// $B memory is that diagnostic.
|
||||
|
||||
/** Counts/bytes for the bounded in-memory structures on the Bun side. */
|
||||
export interface MemoryStructureStats {
|
||||
modificationHistory: { current: number; cap: number; evicted: number };
|
||||
activitySubscribers: number;
|
||||
inspectorSubscribers: number;
|
||||
consoleBufferLen: number;
|
||||
networkBufferLen: number;
|
||||
dialogBufferLen: number;
|
||||
captureBufferBytes: number;
|
||||
}
|
||||
|
||||
/** Per-tab JS heap snapshot (CDP Performance.getMetrics). */
|
||||
export interface MemoryTabSnapshot {
|
||||
id: number;
|
||||
url: string;
|
||||
title: string;
|
||||
jsHeapUsed: number;
|
||||
jsHeapTotal: number;
|
||||
documents: number;
|
||||
nodes: number;
|
||||
listeners: number;
|
||||
}
|
||||
|
||||
/** Chromium process metadata via CDP SystemInfo.getProcessInfo. */
|
||||
export interface MemoryProcess {
|
||||
/** Chromium-internal process id (not OS PID). */
|
||||
id: number;
|
||||
/** 'browser' | 'renderer' | 'gpu' | 'utility' | 'extension' | ... */
|
||||
type: string;
|
||||
/** CPU time accumulated since process start (seconds). */
|
||||
cpuTime: number;
|
||||
}
|
||||
|
||||
export interface MemorySnapshot {
|
||||
bunServer: {
|
||||
rss: number;
|
||||
heapUsed: number;
|
||||
heapTotal: number;
|
||||
external: number;
|
||||
};
|
||||
tabs: MemoryTabSnapshot[];
|
||||
/**
|
||||
* Chromium process tree. `null` when no browser handle is available
|
||||
* (server in connection mode, or browser not yet launched).
|
||||
*
|
||||
* Per-process RSS is NOT included: SystemInfo.getProcessInfo returns
|
||||
* id+type+cpuTime but Chromium does not expose RSS via CDP. The
|
||||
* `notes[]` field tells the caller why — see the follow-up TODO
|
||||
* "native/GPU memory breakdown" for the deferred fix.
|
||||
*/
|
||||
processes: MemoryProcess[] | null;
|
||||
structures: MemoryStructureStats;
|
||||
capturedAt: number;
|
||||
notes: string[];
|
||||
}
|
||||
|
||||
/** Format bytes as a short human string ("1.4 GB", "312 MB", "84 KB"). */
|
||||
export function formatBytes(n: number): string {
|
||||
if (n < 1024) return `${n} B`;
|
||||
if (n < 1024 * 1024) return `${(n / 1024).toFixed(1)} KB`;
|
||||
if (n < 1024 * 1024 * 1024) return `${(n / 1024 / 1024).toFixed(1)} MB`;
|
||||
return `${(n / 1024 / 1024 / 1024).toFixed(2)} GB`;
|
||||
}
|
||||
|
|
@ -11,7 +11,6 @@ import { handleSkillCommand } from './browser-skill-commands';
|
|||
import { validateNavigationUrl } from './url-validation';
|
||||
import { checkScope, type TokenInfo } from './token-registry';
|
||||
import { validateOutputPath, validateReadPath, SAFE_DIRECTORIES, escapeRegExp } from './path-security';
|
||||
import { guardScreenshotBuffer, guardScreenshotPath } from './screenshot-size-guard';
|
||||
// Re-export for backward compatibility (tests import from meta-commands)
|
||||
export { validateOutputPath, escapeRegExp } from './path-security';
|
||||
import * as Diff from 'diff';
|
||||
|
|
@ -137,7 +136,7 @@ function parsePdfArgs(args: string[]): ParsedPdfArgs {
|
|||
return result;
|
||||
}
|
||||
|
||||
export function parsePdfFromFile(payloadPath: string): ParsedPdfArgs {
|
||||
function parsePdfFromFile(payloadPath: string): ParsedPdfArgs {
|
||||
// Parity with load-html --from-file (browse/src/write-commands.ts) and
|
||||
// the direct load-html <file> path: every caller-supplied file path
|
||||
// must pass validateReadPath so the safe-dirs policy can't be skirted
|
||||
|
|
@ -150,16 +149,7 @@ export function parsePdfFromFile(payloadPath: string): ParsedPdfArgs {
|
|||
);
|
||||
}
|
||||
const raw = fs.readFileSync(payloadPath, 'utf8');
|
||||
let json: any;
|
||||
try {
|
||||
json = JSON.parse(raw);
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
throw new Error(`pdf: --from-file ${payloadPath} is not valid JSON (${msg}).`);
|
||||
}
|
||||
if (json === null || typeof json !== 'object' || Array.isArray(json)) {
|
||||
throw new Error(`pdf: --from-file ${payloadPath} must be a JSON object, got ${Array.isArray(json) ? 'array' : typeof json}.`);
|
||||
}
|
||||
const json = JSON.parse(raw);
|
||||
const out: ParsedPdfArgs = {
|
||||
output: json.output || `${TEMP_DIR}/browse-page.pdf`,
|
||||
format: json.format,
|
||||
|
|
@ -507,10 +497,6 @@ export async function handleMetaCommand(
|
|||
buffer = await page.screenshot({ clip: clipRect });
|
||||
} else {
|
||||
buffer = await page.screenshot({ fullPage: !viewportOnly });
|
||||
// Guard the most common API-bricking case (fullPage). Element /
|
||||
// clip captures usually stay within the cap; we still guard the
|
||||
// path-mode below for fullPage writes.
|
||||
({ buffer } = await guardScreenshotBuffer(buffer));
|
||||
}
|
||||
if (buffer.length > 10 * 1024 * 1024) {
|
||||
throw new Error('Screenshot too large for --base64 (>10MB). Use disk path instead.');
|
||||
|
|
@ -531,7 +517,6 @@ export async function handleMetaCommand(
|
|||
}
|
||||
|
||||
await page.screenshot({ path: outputPath, fullPage: !viewportOnly });
|
||||
if (!viewportOnly) await guardScreenshotPath(outputPath);
|
||||
return `Screenshot saved${viewportOnly ? ' (viewport)' : ''}: ${outputPath}`;
|
||||
}
|
||||
|
||||
|
|
@ -582,7 +567,6 @@ export async function handleMetaCommand(
|
|||
const screenshotPath = `${prefix}-${vp.name}.png`;
|
||||
validateOutputPath(screenshotPath);
|
||||
await page.screenshot({ path: screenshotPath, fullPage: true });
|
||||
await guardScreenshotPath(screenshotPath);
|
||||
results.push(`${vp.name} (${vp.width}x${vp.height}): ${screenshotPath}`);
|
||||
}
|
||||
|
||||
|
|
@ -1161,13 +1145,6 @@ export async function handleMetaCommand(
|
|||
return await handleCdpCommand(args, bm);
|
||||
}
|
||||
|
||||
case 'memory': {
|
||||
// Lazy import — pulls in cdp-bridge + memory-snapshot + buffer accessors
|
||||
// that aren't useful for projects that never run the diagnostic.
|
||||
const { handleMemoryCommand } = await import('./memory-command');
|
||||
return await handleMemoryCommand(args, bm);
|
||||
}
|
||||
|
||||
default:
|
||||
throw new Error(`Unknown meta command: ${command}`);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,137 +0,0 @@
|
|||
/**
|
||||
* PTY session lease registry (v1.44+).
|
||||
*
|
||||
* Separates two concerns that pre-v1.44 were conflated under one token:
|
||||
*
|
||||
* - **sessionId** — stable, non-secret identifier for a single PTY session.
|
||||
* Safe to log, safe to include in URLs and server access logs, safe to
|
||||
* keep in DevTools. Identifies "this terminal," not "you're allowed to
|
||||
* use this terminal."
|
||||
*
|
||||
* - **attachToken** — secret, short-lived (30 s) bearer credential that
|
||||
* grants the WS upgrade for ONE attach attempt against a session. Minted
|
||||
* on every /pty-session and /pty-session/reattach call; revoked when
|
||||
* the WS upgrade consumes it. Kept out of logs.
|
||||
*
|
||||
* - **lease** — server-side bookkeeping that maps sessionId → expiresAt.
|
||||
* Re-attach within the lease window resumes the same PTY (and replays
|
||||
* the ring buffer from terminal-agent). Lease expiry tears down the
|
||||
* session.
|
||||
*
|
||||
* Codex outside-voice (T1 of the eng review) pushed for this separation:
|
||||
* "the auth token IS the session id" collapsed identity into a secret,
|
||||
* meaning re-attach URLs and logs carry the bearer credential. The lease
|
||||
* model fixes that without changing the user experience.
|
||||
*
|
||||
* Mint cadence:
|
||||
* - Initial /pty-session: mint sessionId + lease + attachToken (one round trip).
|
||||
* - /pty-session/reattach: validate sessionId/lease, mint fresh attachToken.
|
||||
* - /pty-restart: revoke old lease, mint fresh sessionId + lease + attachToken.
|
||||
* - /pty-dispose: revoke lease (and the terminal-agent disposes the PTY).
|
||||
*
|
||||
* Lease TTL is env-overridable so v1.44 e2e tests can compress detach
|
||||
* windows to 1 s instead of waiting 30 minutes per assertion.
|
||||
*/
|
||||
import * as crypto from 'crypto';
|
||||
|
||||
interface Lease {
|
||||
createdAt: number;
|
||||
expiresAt: number;
|
||||
}
|
||||
|
||||
const LEASE_TTL_MS = parseInt(
|
||||
process.env.GSTACK_PTY_LEASE_TTL_MS || `${30 * 60 * 1000}`,
|
||||
10,
|
||||
); // 30 minutes default; covers idle-but-engaged user sessions
|
||||
const MAX_LEASES = 10_000;
|
||||
const leases = new Map<string, Lease>();
|
||||
|
||||
/**
|
||||
* Mint a fresh sessionId + lease. Returns the non-secret sessionId and
|
||||
* the expiry timestamp (caller surfaces both to the client). Never throws.
|
||||
*/
|
||||
export function mintLease(): { sessionId: string; expiresAt: number } {
|
||||
const sessionId = crypto.randomBytes(32).toString('base64url');
|
||||
const now = Date.now();
|
||||
const expiresAt = now + LEASE_TTL_MS;
|
||||
leases.set(sessionId, { createdAt: now, expiresAt });
|
||||
pruneExpired(now);
|
||||
return { sessionId, expiresAt };
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a lease is still valid (exists AND not expired). Returns
|
||||
* the current expiresAt for valid leases; null otherwise. Lazily prunes
|
||||
* stale entries.
|
||||
*/
|
||||
export function validateLease(sessionId: string | null | undefined): { ok: true; expiresAt: number } | { ok: false } {
|
||||
if (!sessionId) return { ok: false };
|
||||
const lease = leases.get(sessionId);
|
||||
if (!lease) {
|
||||
pruneExpired(Date.now());
|
||||
return { ok: false };
|
||||
}
|
||||
if (Date.now() > lease.expiresAt) {
|
||||
leases.delete(sessionId);
|
||||
pruneExpired(Date.now());
|
||||
return { ok: false };
|
||||
}
|
||||
return { ok: true, expiresAt: lease.expiresAt };
|
||||
}
|
||||
|
||||
/**
|
||||
* Extend the lease's expiresAt to `now + LEASE_TTL_MS`. Caller should
|
||||
* gate refresh on `expiresAt - now < REFRESH_THRESHOLD` (D10 lazy
|
||||
* refresh: avoid refreshing on every keepalive when the lease is
|
||||
* comfortably far from expiry).
|
||||
*
|
||||
* Returns `{ ok: true, expiresAt }` on success, `{ ok: false }` if the
|
||||
* lease is unknown or already expired (the agent must close the WS and
|
||||
* surface auth-invalid). Critical security invariant: never resurrect
|
||||
* an expired lease — the 30-min TTL is what bounds blast radius for a
|
||||
* leaked attach token whose lease should have been GC'd.
|
||||
*/
|
||||
export function refreshLease(sessionId: string | null | undefined): { ok: true; expiresAt: number } | { ok: false } {
|
||||
if (!sessionId) return { ok: false };
|
||||
const lease = leases.get(sessionId);
|
||||
if (!lease) return { ok: false };
|
||||
const now = Date.now();
|
||||
if (now > lease.expiresAt) {
|
||||
leases.delete(sessionId);
|
||||
return { ok: false };
|
||||
}
|
||||
lease.expiresAt = now + LEASE_TTL_MS;
|
||||
return { ok: true, expiresAt: lease.expiresAt };
|
||||
}
|
||||
|
||||
/**
|
||||
* Drop a lease. Called on explicit dispose (/pty-dispose, /pty-restart,
|
||||
* WS close with code 4001) and on session timeout in terminal-agent.
|
||||
*/
|
||||
export function revokeLease(sessionId: string | null | undefined): void {
|
||||
if (!sessionId) return;
|
||||
leases.delete(sessionId);
|
||||
}
|
||||
|
||||
/** Returns the lease count — test + observability helper. */
|
||||
export function leaseCount(): number {
|
||||
return leases.size;
|
||||
}
|
||||
|
||||
/** Test-only reset. */
|
||||
export function __resetLeases(): void {
|
||||
leases.clear();
|
||||
}
|
||||
|
||||
function pruneExpired(now: number): void {
|
||||
let checked = 0;
|
||||
for (const [sessionId, lease] of leases) {
|
||||
if (checked++ >= 20) break;
|
||||
if (lease.expiresAt <= now) leases.delete(sessionId);
|
||||
}
|
||||
while (leases.size > MAX_LEASES) {
|
||||
const first = leases.keys().next().value;
|
||||
if (!first) break;
|
||||
leases.delete(first);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,106 +0,0 @@
|
|||
/**
|
||||
* Screenshot size guard — keep full-page screenshots ≤ 2000px max-dim.
|
||||
*
|
||||
* The Anthropic vision API rejects images whose longest dimension exceeds
|
||||
* 2000 image-pixels (post deviceScaleFactor). Full-page screenshots of long
|
||||
* pages routinely exceed that, silently bricking the session: the agent
|
||||
* burns turns on a base64 blob that errors model-side with no useful
|
||||
* stderr surfacing on the browse side.
|
||||
*
|
||||
* This module centralizes the "after page.screenshot, check dimensions and
|
||||
* downscale if too big" path so every full-page caller in browse/src can
|
||||
* share the same enforcement. The cap is image-pixels, not CSS pixels,
|
||||
* matching the Anthropic API's own threshold.
|
||||
*
|
||||
* Used by: snapshot.ts (annotated, heatmap), meta-commands.ts (screenshot),
|
||||
* write-commands.ts (prettyscreenshot). See test/snapshot-meta-write-guard.test.ts.
|
||||
*
|
||||
* Closes #1214.
|
||||
*/
|
||||
|
||||
import { writeFileSync, readFileSync } from "fs";
|
||||
|
||||
const MAX_DIMENSION_PX = 2000;
|
||||
|
||||
export interface SizeGuardResult {
|
||||
/** True if the input image exceeded MAX_DIMENSION_PX and was downscaled. */
|
||||
resized: boolean;
|
||||
/** Final width and height (pixels) of the image as written/returned. */
|
||||
width: number;
|
||||
height: number;
|
||||
/** Original dimensions before any downscale. */
|
||||
originalWidth: number;
|
||||
originalHeight: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Inspect an image buffer and downscale if its longest side exceeds the
|
||||
* 2000px Anthropic vision API cap. Preserves aspect ratio. Encodes back
|
||||
* to PNG. Returns the resulting buffer plus a diagnostic shape.
|
||||
*
|
||||
* Imports sharp lazily so the module load cost only hits screenshot paths
|
||||
* (sharp's native binding is non-trivial to initialize).
|
||||
*/
|
||||
export async function guardScreenshotBuffer(input: Buffer): Promise<{ buffer: Buffer; result: SizeGuardResult }> {
|
||||
const sharpModule = await import("sharp");
|
||||
const sharp = sharpModule.default ?? sharpModule;
|
||||
const image = sharp(input);
|
||||
const metadata = await image.metadata();
|
||||
const width = metadata.width ?? 0;
|
||||
const height = metadata.height ?? 0;
|
||||
|
||||
const longest = Math.max(width, height);
|
||||
if (longest <= MAX_DIMENSION_PX) {
|
||||
return {
|
||||
buffer: input,
|
||||
result: {
|
||||
resized: false,
|
||||
width,
|
||||
height,
|
||||
originalWidth: width,
|
||||
originalHeight: height,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const scale = MAX_DIMENSION_PX / longest;
|
||||
const newWidth = Math.round(width * scale);
|
||||
const newHeight = Math.round(height * scale);
|
||||
|
||||
const resized = await image
|
||||
.resize(newWidth, newHeight, { fit: "inside" })
|
||||
.png()
|
||||
.toBuffer();
|
||||
|
||||
process.stderr.write(
|
||||
`[screenshot-size-guard] image ${width}x${height} exceeded ${MAX_DIMENSION_PX}px max-dim; ` +
|
||||
`downscaled to ${newWidth}x${newHeight} to fit Anthropic vision API\n`,
|
||||
);
|
||||
|
||||
return {
|
||||
buffer: resized,
|
||||
result: {
|
||||
resized: true,
|
||||
width: newWidth,
|
||||
height: newHeight,
|
||||
originalWidth: width,
|
||||
originalHeight: height,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* File-mode variant: read the image at the given path, downscale if
|
||||
* needed, and write the result back to the same path. Returns the
|
||||
* diagnostic shape. Use this after `await page.screenshot({ path, ... })`.
|
||||
*/
|
||||
export async function guardScreenshotPath(filePath: string): Promise<SizeGuardResult> {
|
||||
const input = readFileSync(filePath);
|
||||
const { buffer, result } = await guardScreenshotBuffer(input);
|
||||
if (result.resized) {
|
||||
writeFileSync(filePath, buffer);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
export const SCREENSHOT_MAX_DIMENSION_PX = MAX_DIMENSION_PX;
|
||||
|
|
@ -135,7 +135,7 @@ export function getClassifierStatus(): ClassifierStatus {
|
|||
|
||||
// ─── Model download + staging ────────────────────────────────
|
||||
|
||||
export async function downloadFile(url: string, dest: string): Promise<void> {
|
||||
async function downloadFile(url: string, dest: string): Promise<void> {
|
||||
const res = await fetch(url);
|
||||
if (!res.ok || !res.body) {
|
||||
throw new Error(`Failed to fetch ${url}: ${res.status} ${res.statusText}`);
|
||||
|
|
@ -144,30 +144,16 @@ export async function downloadFile(url: string, dest: string): Promise<void> {
|
|||
const writer = fs.createWriteStream(tmp);
|
||||
// @ts-ignore — Node stream compat
|
||||
const reader = res.body.getReader();
|
||||
try {
|
||||
let done = false;
|
||||
while (!done) {
|
||||
const chunk = await reader.read();
|
||||
if (chunk.done) { done = true; break; }
|
||||
writer.write(chunk.value);
|
||||
}
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
writer.end((err?: Error | null) => (err ? reject(err) : resolve()));
|
||||
});
|
||||
fs.renameSync(tmp, dest);
|
||||
} catch (err) {
|
||||
// Drop the half-written tmp so we don't ship a truncated model file to
|
||||
// a retry's renameSync. Wait for the writer to close fully before
|
||||
// unlinking: Node's createWriteStream lazily opens the FD and flushes
|
||||
// buffered writes during destroy(), so a naive unlinkSync hits ENOENT
|
||||
// first and the writer re-creates the file on the next tick.
|
||||
await new Promise<void>((resolve) => {
|
||||
writer.once('close', () => resolve());
|
||||
writer.destroy();
|
||||
});
|
||||
try { fs.unlinkSync(tmp); } catch { /* nothing to clean */ }
|
||||
throw err;
|
||||
let done = false;
|
||||
while (!done) {
|
||||
const chunk = await reader.read();
|
||||
if (chunk.done) { done = true; break; }
|
||||
writer.write(chunk.value);
|
||||
}
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
writer.end((err?: Error | null) => (err ? reject(err) : resolve()));
|
||||
});
|
||||
fs.renameSync(tmp, dest);
|
||||
}
|
||||
|
||||
async function ensureTestsavantStaged(onProgress?: (msg: string) => void): Promise<void> {
|
||||
|
|
|
|||
|
|
@ -1,231 +0,0 @@
|
|||
/**
|
||||
* Security sidecar client — IPC layer for the Node L4 classifier subprocess.
|
||||
*
|
||||
* Spawn model: lazy. First call to scan() spawns the sidecar, warms it (the
|
||||
* sidecar's loadTestsavant call on first scan-page-content), and reuses
|
||||
* the same process for every subsequent scan. The process dies when the
|
||||
* browse server exits (Node's stdin-close behavior).
|
||||
*
|
||||
* Reliability:
|
||||
* - 5s default timeout per scan. Caller can override per-call.
|
||||
* - 64KB request cap. Larger payloads short-circuit with `payload-too-large`.
|
||||
* - Respawn capped at 3 failures within 10 minutes; further failures
|
||||
* trip a circuit breaker that returns `available: false` until reset.
|
||||
* - Parent-exit cleanup: process.on('exit') sends SIGTERM to the child.
|
||||
*
|
||||
* Failure semantics:
|
||||
* - Node not on PATH → available() returns false; caller (the
|
||||
* /pty-inject-scan endpoint) returns l4: { available: false } and the
|
||||
* extension degrades to WARN + user confirm.
|
||||
* - Scan throws or times out → caller treats as L4-unavailable for that
|
||||
* request and falls through to L1-L3-only verdict.
|
||||
*
|
||||
* Single-process singleton. Multiple callers within the same browse
|
||||
* process share one sidecar.
|
||||
*/
|
||||
|
||||
import { ChildProcessByStdio, spawn } from "child_process";
|
||||
import { Readable, Writable } from "stream";
|
||||
import { findSecuritySidecar } from "./find-security-sidecar";
|
||||
|
||||
const REQUEST_CAP_BYTES = 64 * 1024;
|
||||
const DEFAULT_TIMEOUT_MS = 5000;
|
||||
const RESPAWN_WINDOW_MS = 10 * 60 * 1000;
|
||||
const RESPAWN_LIMIT = 3;
|
||||
|
||||
interface PendingRequest {
|
||||
resolve: (response: unknown) => void;
|
||||
reject: (err: Error) => void;
|
||||
timer: ReturnType<typeof setTimeout>;
|
||||
}
|
||||
|
||||
interface SidecarState {
|
||||
child: ChildProcessByStdio<Writable, Readable, Readable> | null;
|
||||
pending: Map<string, PendingRequest>;
|
||||
buffer: string;
|
||||
failures: number[]; // timestamps of recent failures
|
||||
available: boolean;
|
||||
/** True after circuit-breaker tripped; stays true until reset() */
|
||||
brokenCircuit: boolean;
|
||||
nextId: number;
|
||||
}
|
||||
|
||||
let state: SidecarState | null = null;
|
||||
|
||||
function getState(): SidecarState {
|
||||
if (!state) {
|
||||
state = {
|
||||
child: null,
|
||||
pending: new Map(),
|
||||
buffer: "",
|
||||
failures: [],
|
||||
available: true,
|
||||
brokenCircuit: false,
|
||||
nextId: 1,
|
||||
};
|
||||
}
|
||||
return state;
|
||||
}
|
||||
|
||||
function recordFailure(): void {
|
||||
const s = getState();
|
||||
const now = Date.now();
|
||||
s.failures = s.failures.filter((t) => now - t < RESPAWN_WINDOW_MS);
|
||||
s.failures.push(now);
|
||||
if (s.failures.length >= RESPAWN_LIMIT) {
|
||||
s.brokenCircuit = true;
|
||||
s.available = false;
|
||||
}
|
||||
}
|
||||
|
||||
function processBuffer(): void {
|
||||
const s = getState();
|
||||
let idx = s.buffer.indexOf("\n");
|
||||
while (idx !== -1) {
|
||||
const line = s.buffer.slice(0, idx).trim();
|
||||
s.buffer = s.buffer.slice(idx + 1);
|
||||
idx = s.buffer.indexOf("\n");
|
||||
if (!line) continue;
|
||||
let parsed: { id?: string; ok?: boolean; verdict?: unknown; status?: unknown; error?: string };
|
||||
try {
|
||||
parsed = JSON.parse(line);
|
||||
} catch {
|
||||
// Malformed line — record as failure but don't reject any specific
|
||||
// pending request (we don't know which one this was meant for).
|
||||
recordFailure();
|
||||
continue;
|
||||
}
|
||||
const id = typeof parsed.id === "string" ? parsed.id : null;
|
||||
if (!id) continue;
|
||||
const pending = s.pending.get(id);
|
||||
if (!pending) continue;
|
||||
s.pending.delete(id);
|
||||
clearTimeout(pending.timer);
|
||||
if (parsed.ok) {
|
||||
pending.resolve(parsed);
|
||||
} else {
|
||||
recordFailure();
|
||||
pending.reject(new Error(parsed.error ?? "sidecar-error"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function shutdownChild(): void {
|
||||
const s = getState();
|
||||
if (!s.child) return;
|
||||
try {
|
||||
s.child.kill("SIGTERM");
|
||||
} catch {
|
||||
// Already dead.
|
||||
}
|
||||
s.child = null;
|
||||
for (const [, p] of s.pending) {
|
||||
clearTimeout(p.timer);
|
||||
p.reject(new Error("sidecar-died"));
|
||||
}
|
||||
s.pending.clear();
|
||||
}
|
||||
|
||||
function spawnSidecar(): boolean {
|
||||
const s = getState();
|
||||
if (s.brokenCircuit) return false;
|
||||
const location = findSecuritySidecar();
|
||||
if (!location) {
|
||||
s.available = false;
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
const child = spawn(location.node, [location.entry], {
|
||||
stdio: ["pipe", "pipe", "pipe"],
|
||||
detached: false,
|
||||
});
|
||||
child.stdout.on("data", (chunk: Buffer) => {
|
||||
s.buffer += chunk.toString("utf-8");
|
||||
processBuffer();
|
||||
});
|
||||
child.on("exit", () => {
|
||||
shutdownChild();
|
||||
});
|
||||
child.on("error", () => {
|
||||
recordFailure();
|
||||
shutdownChild();
|
||||
});
|
||||
s.child = child;
|
||||
s.available = true;
|
||||
return true;
|
||||
} catch {
|
||||
recordFailure();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Best-effort parent-exit cleanup. Node's "exit" event blocks async work, so
|
||||
// we send SIGTERM synchronously and let the OS reap the child.
|
||||
process.on("exit", () => shutdownChild());
|
||||
|
||||
export interface SidecarAvailability {
|
||||
available: boolean;
|
||||
reason?: string;
|
||||
}
|
||||
|
||||
export function isSidecarAvailable(): SidecarAvailability {
|
||||
const s = getState();
|
||||
if (s.brokenCircuit) return { available: false, reason: "circuit-broken" };
|
||||
if (s.child) return { available: true };
|
||||
// Probe via findSecuritySidecar without spawning. If the resolver returns
|
||||
// null (no node on PATH, no entry on disk), we're permanently unavailable
|
||||
// until a setup re-run.
|
||||
const location = findSecuritySidecar();
|
||||
if (!location) return { available: false, reason: "no-node-or-entry" };
|
||||
return { available: true };
|
||||
}
|
||||
|
||||
export async function scanWithSidecar(text: string, opts?: { timeoutMs?: number }): Promise<{ verdict: unknown }> {
|
||||
const s = getState();
|
||||
if (s.brokenCircuit) {
|
||||
throw new Error("sidecar-circuit-broken");
|
||||
}
|
||||
if (Buffer.byteLength(text, "utf-8") > REQUEST_CAP_BYTES) {
|
||||
throw new Error("payload-too-large");
|
||||
}
|
||||
if (!s.child) {
|
||||
if (!spawnSidecar()) {
|
||||
throw new Error("sidecar-spawn-failed");
|
||||
}
|
||||
}
|
||||
const id = String(s.nextId++);
|
||||
const timeoutMs = opts?.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const timer = setTimeout(() => {
|
||||
s.pending.delete(id);
|
||||
recordFailure();
|
||||
reject(new Error("sidecar-timeout"));
|
||||
}, timeoutMs);
|
||||
|
||||
s.pending.set(id, {
|
||||
resolve: (response: unknown) => {
|
||||
const r = response as { verdict?: unknown };
|
||||
resolve({ verdict: r.verdict });
|
||||
},
|
||||
reject,
|
||||
timer,
|
||||
});
|
||||
|
||||
const payload = JSON.stringify({ id, op: "scan-page-content", text }) + "\n";
|
||||
try {
|
||||
s.child!.stdin.write(payload);
|
||||
} catch (err) {
|
||||
clearTimeout(timer);
|
||||
s.pending.delete(id);
|
||||
recordFailure();
|
||||
reject(err instanceof Error ? err : new Error(String(err)));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/** Reset the circuit breaker. Test-only escape hatch. */
|
||||
export function resetSidecarForTests(): void {
|
||||
shutdownChild();
|
||||
state = null;
|
||||
}
|
||||
|
|
@ -1,120 +0,0 @@
|
|||
/**
|
||||
* Security sidecar entry — Node script that hosts the L4 ML classifier on
|
||||
* behalf of the compiled browse server.
|
||||
*
|
||||
* Why a sidecar:
|
||||
* - browse/src/security-classifier.ts depends on @huggingface/transformers
|
||||
* which loads onnxruntime-node, a native module that fails to `dlopen`
|
||||
* from Bun's compile-binary temp extraction dir (CLAUDE.md "Sidebar
|
||||
* security stack" section). Importing the classifier into server.ts
|
||||
* would brick the compiled binary at startup.
|
||||
* - sidebar-agent.ts (the previous host of the classifier) was removed
|
||||
* when the PTY proved out. The classifier file still ships but had no
|
||||
* caller — exactly the gap codex flagged in #1370.
|
||||
*
|
||||
* This entry runs under plain Node (resolved by find-security-sidecar.ts).
|
||||
* It reads NDJSON requests from stdin and writes NDJSON responses to stdout.
|
||||
*
|
||||
* Protocol (one JSON object per line, both directions):
|
||||
* request: { id: string, op: "scan-page-content" | "ping", text?: string }
|
||||
* response: { id: string, ok: true, verdict: LayerSignal } |
|
||||
* { id: string, ok: false, error: string }
|
||||
*
|
||||
* Lifecycle:
|
||||
* - Spawned lazily by security-sidecar-client.ts on first /pty-inject-scan
|
||||
* - Exits when stdin closes (parent gone) — standard Node behavior
|
||||
* - Exits on SIGTERM cleanly
|
||||
*
|
||||
* Failure modes:
|
||||
* - Model download fails → reply { ok: false, error: "model-load" } and
|
||||
* keep the loop alive for the next request (caller decides whether to
|
||||
* retry or fail-safe to L1-L3-only)
|
||||
*/
|
||||
|
||||
import * as readline from "readline";
|
||||
import { scanPageContent, getClassifierStatus, loadTestsavant } from "./security-classifier";
|
||||
|
||||
interface Request {
|
||||
id: string;
|
||||
op: "scan-page-content" | "ping" | "status";
|
||||
text?: string;
|
||||
}
|
||||
|
||||
interface OkResponse {
|
||||
id: string;
|
||||
ok: true;
|
||||
verdict?: unknown;
|
||||
status?: unknown;
|
||||
}
|
||||
|
||||
interface ErrResponse {
|
||||
id: string;
|
||||
ok: false;
|
||||
error: string;
|
||||
}
|
||||
|
||||
function write(obj: OkResponse | ErrResponse): void {
|
||||
process.stdout.write(JSON.stringify(obj) + "\n");
|
||||
}
|
||||
|
||||
async function handle(req: Request): Promise<void> {
|
||||
if (!req || typeof req.id !== "string") {
|
||||
// Drop unidentifiable requests silently — protocol invariant.
|
||||
return;
|
||||
}
|
||||
try {
|
||||
if (req.op === "ping") {
|
||||
write({ id: req.id, ok: true, verdict: { layer: "ping", verdict: "alive", score: 0 } });
|
||||
return;
|
||||
}
|
||||
if (req.op === "status") {
|
||||
write({ id: req.id, ok: true, status: getClassifierStatus() });
|
||||
return;
|
||||
}
|
||||
if (req.op === "scan-page-content") {
|
||||
if (typeof req.text !== "string") {
|
||||
write({ id: req.id, ok: false, error: "missing-text" });
|
||||
return;
|
||||
}
|
||||
// Warm the classifier once per process; subsequent scans are fast.
|
||||
await loadTestsavant().catch(() => {
|
||||
// loadTestsavant degrades gracefully; scanPageContent below will
|
||||
// return a fail-open verdict if the model never loaded.
|
||||
});
|
||||
const verdict = await scanPageContent(req.text);
|
||||
write({ id: req.id, ok: true, verdict });
|
||||
return;
|
||||
}
|
||||
write({ id: req.id, ok: false, error: `unknown-op:${(req as { op?: unknown }).op}` });
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
write({ id: req.id, ok: false, error: msg });
|
||||
}
|
||||
}
|
||||
|
||||
function main(): void {
|
||||
// readline buffers stdin into one-line chunks. Stay alive until stdin
|
||||
// closes (parent gone) — Node exits naturally then.
|
||||
const rl = readline.createInterface({ input: process.stdin });
|
||||
rl.on("line", (line) => {
|
||||
if (!line.trim()) return;
|
||||
let req: Request;
|
||||
try {
|
||||
req = JSON.parse(line) as Request;
|
||||
} catch {
|
||||
// Malformed line — write a generic error without an id, callers can
|
||||
// detect via missing id and trip the circuit breaker.
|
||||
write({ id: "<malformed>", ok: false, error: "malformed-json" });
|
||||
return;
|
||||
}
|
||||
// Fire-and-forget; concurrent requests get id-correlated responses.
|
||||
void handle(req);
|
||||
});
|
||||
rl.on("close", () => {
|
||||
process.exit(0);
|
||||
});
|
||||
process.on("SIGTERM", () => process.exit(0));
|
||||
process.on("SIGINT", () => process.exit(0));
|
||||
}
|
||||
|
||||
main();
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -23,7 +23,6 @@ import * as Diff from 'diff';
|
|||
import { TEMP_DIR, isPathWithin } from './platform';
|
||||
import { escapeEnvelopeSentinels } from './content-security';
|
||||
import { stripLoneSurrogates } from './sanitize';
|
||||
import { guardScreenshotPath } from './screenshot-size-guard';
|
||||
|
||||
// Roles considered "interactive" for the -i flag
|
||||
const INTERACTIVE_ROLES = new Set([
|
||||
|
|
@ -419,7 +418,6 @@ export async function handleSnapshot(
|
|||
}, boxes);
|
||||
|
||||
await page.screenshot({ path: screenshotPath, fullPage: true });
|
||||
await guardScreenshotPath(screenshotPath);
|
||||
|
||||
// Always remove overlays
|
||||
await page.evaluate(() => {
|
||||
|
|
@ -540,7 +538,6 @@ export async function handleSnapshot(
|
|||
}, boxes);
|
||||
|
||||
await page.screenshot({ path: heatmapPath, fullPage: true });
|
||||
await guardScreenshotPath(heatmapPath);
|
||||
|
||||
// Remove heatmap overlays
|
||||
await page.evaluate(() => {
|
||||
|
|
|
|||
|
|
@ -1,154 +0,0 @@
|
|||
// SSE endpoint helper — shared cleanup contract for stream endpoints.
|
||||
//
|
||||
// Pre-helper, /activity/stream and /inspector/events implemented the same
|
||||
// pattern in parallel and both leaked subscribers when enqueue failed
|
||||
// without a corresponding abort signal (e.g. Chromium MV3 service-worker
|
||||
// suspend dropped the TCP without an abort edge). The subscriber closure
|
||||
// stayed in the Set, capturing the ReadableStreamDefaultController plus
|
||||
// any payloads queued behind it. Over a multi-day sidebar session this
|
||||
// compounded into multi-MB of retained controllers per dead connection.
|
||||
//
|
||||
// Centralizing the cleanup contract here means any future SSE endpoint
|
||||
// inherits the invariant — cleanup runs on abort, enqueue failure, AND
|
||||
// heartbeat failure, exactly once, regardless of which edge fires first.
|
||||
|
||||
import { stripLoneSurrogates } from './sanitize';
|
||||
|
||||
/**
|
||||
* JSON.stringify replacer that strips lone UTF-16 surrogates from string
|
||||
* values before they get escape-encoded. Pair with stringify when the
|
||||
* consumer will JSON.parse the payload back into JS strings (SSE clients
|
||||
* do this). Required at every SSE egress that ships page-content-derived
|
||||
* fields — see CLAUDE.md "Unicode sanitization at server egress".
|
||||
*/
|
||||
function sanitizeReplacer(_key: string, value: unknown): unknown {
|
||||
return typeof value === 'string' ? stripLoneSurrogates(value) : value;
|
||||
}
|
||||
|
||||
/** Send an SSE event. Handles JSON encoding + lone-surrogate sanitization. */
|
||||
export type SseSender = (event: string, data: unknown) => void;
|
||||
|
||||
export interface SseEndpointConfig<T> {
|
||||
/**
|
||||
* Optional. Runs once after the stream opens, before subscribing for live
|
||||
* events. Use for initial event replay (activity gap detection, history
|
||||
* burst) or a current-state snapshot (inspector). The `send` helper
|
||||
* handles JSON encoding with sanitizeReplacer and SSE framing; pass
|
||||
* any event name and any payload object.
|
||||
*/
|
||||
initialReplay?: (send: SseSender) => void;
|
||||
|
||||
/**
|
||||
* Subscribe to the live event source. Receives a `notify` callback;
|
||||
* returns an unsubscribe function. The callback routes through the
|
||||
* helper's safeEnqueue + cleanup-on-throw, so a dead consumer ends up
|
||||
* removed from the subscriber set on the very next event (instead of
|
||||
* waiting for an abort that may never fire).
|
||||
*/
|
||||
subscribe: (notify: (entry: T) => void) => () => void;
|
||||
|
||||
/**
|
||||
* SSE event name for live events. `data: <JSON.stringify(entry)>\n\n`
|
||||
* is wrapped automatically. /activity/stream uses 'activity';
|
||||
* /inspector/events uses 'inspector'.
|
||||
*/
|
||||
liveEventName: string;
|
||||
|
||||
/** Heartbeat interval in ms. Default: 15000. */
|
||||
heartbeatMs?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a streaming Response that owns the cleanup contract:
|
||||
* - safeEnqueue catches enqueue throws → cleanup
|
||||
* - 15s heartbeat catches dead peers; failure → cleanup
|
||||
* - req.signal abort → cleanup
|
||||
* - cleanup is idempotent (clearInterval + unsubscribe + try close)
|
||||
*/
|
||||
export function createSseEndpoint<T>(
|
||||
req: Request,
|
||||
config: SseEndpointConfig<T>,
|
||||
): Response {
|
||||
const heartbeatMs = config.heartbeatMs ?? 15000;
|
||||
const encoder = new TextEncoder();
|
||||
|
||||
const stream = new ReadableStream({
|
||||
start(controller) {
|
||||
let cleanedUp = false;
|
||||
let heartbeat: ReturnType<typeof setInterval> | null = null;
|
||||
let unsubscribe: (() => void) | null = null;
|
||||
|
||||
const cleanup = (): void => {
|
||||
if (cleanedUp) return;
|
||||
cleanedUp = true;
|
||||
if (heartbeat !== null) {
|
||||
clearInterval(heartbeat);
|
||||
heartbeat = null;
|
||||
}
|
||||
if (unsubscribe !== null) {
|
||||
unsubscribe();
|
||||
unsubscribe = null;
|
||||
}
|
||||
try {
|
||||
controller.close();
|
||||
} catch {
|
||||
// Expected: stream already closed by the consumer.
|
||||
}
|
||||
};
|
||||
|
||||
const send: SseSender = (event, data) => {
|
||||
if (cleanedUp) return;
|
||||
try {
|
||||
controller.enqueue(
|
||||
encoder.encode(
|
||||
`event: ${event}\ndata: ${JSON.stringify(data, sanitizeReplacer)}\n\n`,
|
||||
),
|
||||
);
|
||||
} catch {
|
||||
// Consumer disconnected mid-write. Tear down so this subscriber
|
||||
// doesn't sit in the set forever.
|
||||
cleanup();
|
||||
}
|
||||
};
|
||||
|
||||
// Initial replay (caller-provided).
|
||||
if (config.initialReplay) {
|
||||
try {
|
||||
config.initialReplay(send);
|
||||
} catch {
|
||||
cleanup();
|
||||
return;
|
||||
}
|
||||
if (cleanedUp) return;
|
||||
}
|
||||
|
||||
// Subscribe for live events.
|
||||
unsubscribe = config.subscribe((entry) => {
|
||||
send(config.liveEventName, entry);
|
||||
});
|
||||
|
||||
// Heartbeat keeps NAT boxes and proxies from dropping idle SSE,
|
||||
// and serves as a liveness probe: an enqueue failure here is the
|
||||
// cheapest way to learn the consumer is gone without waiting for
|
||||
// an abort signal that may never arrive.
|
||||
heartbeat = setInterval(() => {
|
||||
if (cleanedUp) return;
|
||||
try {
|
||||
controller.enqueue(encoder.encode(`: heartbeat\n\n`));
|
||||
} catch {
|
||||
cleanup();
|
||||
}
|
||||
}, heartbeatMs);
|
||||
|
||||
req.signal.addEventListener('abort', cleanup);
|
||||
},
|
||||
});
|
||||
|
||||
return new Response(stream, {
|
||||
headers: {
|
||||
'Content-Type': 'text/event-stream',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
},
|
||||
});
|
||||
}
|
||||
|
|
@ -1,200 +1,39 @@
|
|||
/**
|
||||
* Stealth init scripts — anti-bot detection countermeasures.
|
||||
* Stealth init script — webdriver-mask only (D7, codex narrowed).
|
||||
*
|
||||
* Two modes:
|
||||
* Modern anti-bot fingerprinters check consistency between navigator
|
||||
* properties (plugins.length, languages, userAgent, platform). Faking those
|
||||
* to fixed values (the wintermute approach) can flag MORE bot-like, not
|
||||
* less, and breaks legitimate sites that reflect on these properties.
|
||||
*
|
||||
* 1. DEFAULT (consistency-first, always on): masks navigator.webdriver
|
||||
* and adds --disable-blink-features=AutomationControlled. This is
|
||||
* the original "codex narrowed" minimum that preserves fingerprint
|
||||
* consistency — letting plugins/languages/chrome.runtime surface
|
||||
* native Chromium values keeps the fingerprint internally coherent.
|
||||
*
|
||||
* 2. EXTENDED (opt-in via GSTACK_STEALTH=extended): six additional
|
||||
* detection-vector patches on top of the default. Closes the
|
||||
* SannySoft test corpus to a 100% pass rate. Originally proposed in
|
||||
* PR #1112 (garrytan, Apr 2026).
|
||||
*
|
||||
* Vectors patched in extended mode:
|
||||
* - navigator.webdriver property fully deleted from prototype
|
||||
* (not just `false` — detectors check `"webdriver" in navigator`)
|
||||
* - WebGL renderer spoofed to a plausible Apple M1 Pro string
|
||||
* (SwiftShader was the #1 software-GPU giveaway in containers)
|
||||
* - navigator.plugins returns a real PluginArray with proper
|
||||
* MimeType objects and namedItem() — `instanceof PluginArray`
|
||||
* passes
|
||||
* - window.chrome populated with chrome.app, chrome.runtime,
|
||||
* chrome.loadTimes(), chrome.csi() with correct shapes
|
||||
* - navigator.mediaDevices present (some headless builds drop it)
|
||||
* - CDP cdc_* property names cleared from window
|
||||
*
|
||||
* Trade-off: extended mode actively LIES about the browser
|
||||
* environment. Sites that reflect on these properties can break or
|
||||
* misbehave. Use only when the default mode triggers detection AND
|
||||
* the target is anti-bot-protected. Not recommended as a global
|
||||
* default.
|
||||
* The honest minimum is masking navigator.webdriver, which Chromium exposes
|
||||
* as a known automation tell. Letting plugins/languages/chrome.runtime
|
||||
* surface their native Chromium values keeps the fingerprint internally
|
||||
* consistent.
|
||||
*/
|
||||
|
||||
import type { BrowserContext } from 'playwright';
|
||||
import type { Browser, BrowserContext } from 'playwright';
|
||||
|
||||
/**
|
||||
* Always-on default mask: navigator.webdriver returns false. Modern
|
||||
* fingerprinters check the property accessor, so a one-line getter is
|
||||
* sufficient when consistency with the rest of the navigator surface is
|
||||
* preserved.
|
||||
* Init script applied to every page in a context. Runs in the page's main
|
||||
* world before any other scripts. Idempotent — defining the same property
|
||||
* twice in different contexts is fine.
|
||||
*/
|
||||
export const WEBDRIVER_MASK_SCRIPT = `Object.defineProperty(navigator, 'webdriver', { get: () => false });`;
|
||||
|
||||
/**
|
||||
* Extended-mode init script — six detection-vector patches. Applied
|
||||
* AFTER the default mask, so the property-getter version remains in
|
||||
* place if any of the deletion paths fail.
|
||||
*
|
||||
* Self-contained string so it can be passed to addInitScript({ content })
|
||||
* without bundling concerns.
|
||||
*/
|
||||
export const EXTENDED_STEALTH_SCRIPT = `
|
||||
(() => {
|
||||
try {
|
||||
// 1. Fully delete navigator.webdriver from the prototype so
|
||||
// \`"webdriver" in navigator\` returns false (not just falsy).
|
||||
delete Object.getPrototypeOf(navigator).webdriver;
|
||||
} catch {}
|
||||
|
||||
try {
|
||||
// 2. WebGL renderer spoof — SwiftShader is the canonical software-GPU
|
||||
// tell. Spoof to a plausible Apple M1 Pro string.
|
||||
const getParameter = WebGLRenderingContext.prototype.getParameter;
|
||||
WebGLRenderingContext.prototype.getParameter = function (parameter) {
|
||||
// UNMASKED_VENDOR_WEBGL (37445) → 'Apple Inc.'
|
||||
if (parameter === 37445) return 'Apple Inc.';
|
||||
// UNMASKED_RENDERER_WEBGL (37446) → realistic Apple silicon string
|
||||
if (parameter === 37446) return 'Apple M1 Pro, OpenGL 4.1';
|
||||
return getParameter.call(this, parameter);
|
||||
};
|
||||
} catch {}
|
||||
|
||||
try {
|
||||
// 3. navigator.plugins: real PluginArray with MimeType objects.
|
||||
const makePlugin = (name, filename, desc, mimes) => {
|
||||
const p = Object.create(Plugin.prototype);
|
||||
Object.defineProperties(p, {
|
||||
name: { get: () => name },
|
||||
filename: { get: () => filename },
|
||||
description: { get: () => desc },
|
||||
length: { get: () => mimes.length },
|
||||
});
|
||||
mimes.forEach((m, i) => { p[i] = m; });
|
||||
p.item = (i) => mimes[i];
|
||||
p.namedItem = (n) => mimes.find((m) => m.type === n);
|
||||
return p;
|
||||
};
|
||||
const makeMime = (type, suffixes, desc) => {
|
||||
const m = Object.create(MimeType.prototype);
|
||||
Object.defineProperties(m, {
|
||||
type: { get: () => type },
|
||||
suffixes: { get: () => suffixes },
|
||||
description: { get: () => desc },
|
||||
});
|
||||
return m;
|
||||
};
|
||||
const pdfMime = makeMime('application/pdf', 'pdf', '');
|
||||
const cpdfMime = makeMime('application/x-google-chrome-pdf', 'pdf', 'Portable Document Format');
|
||||
const plugins = [
|
||||
makePlugin('PDF Viewer', 'internal-pdf-viewer', '', [pdfMime]),
|
||||
makePlugin('Chrome PDF Viewer', 'internal-pdf-viewer', '', [cpdfMime]),
|
||||
makePlugin('Chromium PDF Viewer', 'internal-pdf-viewer', '', [cpdfMime]),
|
||||
];
|
||||
Object.defineProperty(navigator, 'plugins', {
|
||||
get: () => {
|
||||
const arr = Object.create(PluginArray.prototype);
|
||||
Object.defineProperty(arr, 'length', { get: () => plugins.length });
|
||||
plugins.forEach((p, i) => { arr[i] = p; });
|
||||
arr.item = (i) => plugins[i];
|
||||
arr.namedItem = (n) => plugins.find((p) => p.name === n);
|
||||
arr.refresh = () => {};
|
||||
return arr;
|
||||
},
|
||||
});
|
||||
} catch {}
|
||||
|
||||
try {
|
||||
// 4. window.chrome shape — chrome.app + chrome.runtime + loadTimes/csi.
|
||||
if (!window.chrome) {
|
||||
window.chrome = {};
|
||||
}
|
||||
if (!window.chrome.runtime) {
|
||||
window.chrome.runtime = { OnInstalledReason: {}, OnRestartRequiredReason: {} };
|
||||
}
|
||||
if (!window.chrome.app) {
|
||||
window.chrome.app = {
|
||||
isInstalled: false,
|
||||
InstallState: { DISABLED: 'disabled', INSTALLED: 'installed', NOT_INSTALLED: 'not_installed' },
|
||||
RunningState: { CANNOT_RUN: 'cannot_run', READY_TO_RUN: 'ready_to_run', RUNNING: 'running' },
|
||||
};
|
||||
}
|
||||
if (!window.chrome.loadTimes) {
|
||||
window.chrome.loadTimes = function () {
|
||||
return { commitLoadTime: Date.now() / 1000, finishLoadTime: Date.now() / 1000 };
|
||||
};
|
||||
}
|
||||
if (!window.chrome.csi) {
|
||||
window.chrome.csi = function () {
|
||||
return { startE: Date.now(), onloadT: Date.now(), pageT: 0, tran: 15 };
|
||||
};
|
||||
}
|
||||
} catch {}
|
||||
|
||||
try {
|
||||
// 5. mediaDevices — some headless builds drop it entirely.
|
||||
if (!navigator.mediaDevices) {
|
||||
Object.defineProperty(navigator, 'mediaDevices', {
|
||||
get: () => ({ enumerateDevices: () => Promise.resolve([]) }),
|
||||
});
|
||||
}
|
||||
} catch {}
|
||||
|
||||
try {
|
||||
// 6. CDP cdc_* property cleanup. Chromium under CDP sets cdc_*-prefixed
|
||||
// globals (driver injection markers); a bot detector finds them by
|
||||
// iterating window keys. Strip all matching keys.
|
||||
for (const k of Object.keys(window)) {
|
||||
if (k.startsWith('cdc_')) {
|
||||
try { delete window[k]; } catch {}
|
||||
}
|
||||
}
|
||||
} catch {}
|
||||
})();
|
||||
`;
|
||||
|
||||
function extendedModeEnabled(): boolean {
|
||||
const v = process.env.GSTACK_STEALTH;
|
||||
return v === 'extended' || v === '1' || v === 'true';
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply stealth patches to a fresh BrowserContext (or persistent
|
||||
* context). Called by browser-manager.launch() and launchHeaded().
|
||||
* Always applies the WEBDRIVER_MASK_SCRIPT; only applies the
|
||||
* EXTENDED_STEALTH_SCRIPT when GSTACK_STEALTH=extended.
|
||||
* Apply stealth patches to a fresh BrowserContext (or persistent context).
|
||||
* Called by browser-manager.launch() and launchHeaded().
|
||||
*/
|
||||
export async function applyStealth(context: BrowserContext): Promise<void> {
|
||||
await context.addInitScript({ content: WEBDRIVER_MASK_SCRIPT });
|
||||
if (extendedModeEnabled()) {
|
||||
await context.addInitScript({ content: EXTENDED_STEALTH_SCRIPT });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Args added to chromium.launch's `args` to suppress the
|
||||
* AutomationControlled blink feature. This is independent of the init
|
||||
* script — it changes how Chromium identifies itself in the protocol
|
||||
* layer.
|
||||
* script — it changes how Chromium identifies itself in the protocol layer.
|
||||
*/
|
||||
export const STEALTH_LAUNCH_ARGS = [
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
];
|
||||
|
||||
/** Test-only helper: report whether extended mode is currently active. */
|
||||
export function isExtendedStealthEnabled(): boolean {
|
||||
return extendedModeEnabled();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,143 +0,0 @@
|
|||
/**
|
||||
* terminal-agent process-control primitives shared by cli.ts spawn site,
|
||||
* server.ts shutdown teardown, and the v1.44 watchdog/respawn loop.
|
||||
*
|
||||
* Why this exists: pre-v1.44 used `pkill -f terminal-agent\.ts`, which
|
||||
* matches any process whose argv contains the string and would kill
|
||||
* sibling gstack sessions on the same host. The agent now writes a
|
||||
* structured `terminal-agent-pid` record (`{pid, gen, startedAt}`) and
|
||||
* every kill site routes through `killAgentByRecord` here — identity-based,
|
||||
* no regex.
|
||||
*
|
||||
* The `gen` field is a per-boot generation counter. Loopback /internal/*
|
||||
* calls from the parent server include `X-Browse-Gen` so a slow agent that
|
||||
* the watchdog respawned around can't accidentally service a stale grant
|
||||
* from the old generation.
|
||||
*/
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { safeUnlink, safeKill, isProcessAlive } from './error-handling';
|
||||
import { writeSecureFile, mkdirSecure } from './file-permissions';
|
||||
|
||||
/**
|
||||
* Locate the terminal-agent script on disk. In dev (cli.ts running via
|
||||
* `bun run`), it lives next to this file in browse/src. In a compiled
|
||||
* binary, Bun's --compile bakes the source into the executable and
|
||||
* exposes it relative to process.execPath. Either path must work or
|
||||
* the agent can't be spawned at all.
|
||||
*/
|
||||
export function resolveTerminalAgentScript(searchHints: { metaDir?: string; execPath?: string } = {}): string | null {
|
||||
const meta = searchHints.metaDir || __dirname;
|
||||
const exec = searchHints.execPath || process.execPath;
|
||||
const candidates = [
|
||||
path.resolve(meta, 'terminal-agent.ts'),
|
||||
path.resolve(path.dirname(exec), '..', 'src', 'terminal-agent.ts'),
|
||||
];
|
||||
for (const c of candidates) {
|
||||
if (fs.existsSync(c)) return c;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Spawn a fresh terminal-agent as a detached child. Handles the standard
|
||||
* three steps: kill any prior agent recorded at `<stateDir>/terminal-agent-pid`,
|
||||
* clear the stale record, then `Bun.spawn(['bun', 'run', script], ...)` with
|
||||
* env wiring. Returns the PID of the new agent on success, null when the
|
||||
* agent script can't be located.
|
||||
*
|
||||
* Used by both the CLI cold-start path (cli.ts) and the v1.44 watchdog in
|
||||
* server.ts. Centralizing here removes a copy-paste between them and means
|
||||
* future spawn-env additions (e.g. BROWSE_OWNER_PID for the generation
|
||||
* counter rollout) land in one place.
|
||||
*/
|
||||
export function spawnTerminalAgent(opts: {
|
||||
stateFile: string;
|
||||
serverPort: number;
|
||||
cwd?: string;
|
||||
/** Optional extra env vars to add to the agent's process env. */
|
||||
extraEnv?: Record<string, string>;
|
||||
/** Override script lookup for tests. */
|
||||
scriptPath?: string;
|
||||
}): number | null {
|
||||
const stateDir = path.dirname(opts.stateFile);
|
||||
const prior = readAgentRecord(stateDir);
|
||||
if (prior) {
|
||||
killAgentByRecord(prior, 'SIGTERM');
|
||||
clearAgentRecord(stateDir);
|
||||
}
|
||||
const script = opts.scriptPath || resolveTerminalAgentScript();
|
||||
if (!script || !fs.existsSync(script)) return null;
|
||||
const proc = (Bun as any).spawn(['bun', 'run', script], {
|
||||
cwd: opts.cwd || process.cwd(),
|
||||
env: {
|
||||
...process.env,
|
||||
BROWSE_STATE_FILE: opts.stateFile,
|
||||
BROWSE_SERVER_PORT: String(opts.serverPort),
|
||||
...(opts.extraEnv || {}),
|
||||
},
|
||||
stdio: ['ignore', 'ignore', 'ignore'],
|
||||
});
|
||||
proc.unref?.();
|
||||
return proc.pid ?? null;
|
||||
}
|
||||
|
||||
export interface AgentRecord {
|
||||
pid: number;
|
||||
/** Random per-boot identifier. Loopback /internal/* sees X-Browse-Gen: <gen>. */
|
||||
gen: string;
|
||||
/** ms since epoch. Reserved for future PID-reuse guards. */
|
||||
startedAt: number;
|
||||
}
|
||||
|
||||
export function agentRecordPath(stateDir: string): string {
|
||||
return path.join(stateDir, 'terminal-agent-pid');
|
||||
}
|
||||
|
||||
/** Read the current record. Returns null on missing/malformed file. */
|
||||
export function readAgentRecord(stateDir: string): AgentRecord | null {
|
||||
try {
|
||||
const raw = fs.readFileSync(agentRecordPath(stateDir), 'utf-8');
|
||||
const j = JSON.parse(raw);
|
||||
if (typeof j?.pid === 'number' && typeof j?.gen === 'string' && typeof j?.startedAt === 'number') {
|
||||
return j as AgentRecord;
|
||||
}
|
||||
return null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Atomic write. Caller must ensure stateDir exists; agent does this at boot. */
|
||||
export function writeAgentRecord(stateDir: string, record: AgentRecord): void {
|
||||
try { mkdirSecure(stateDir); } catch {}
|
||||
const target = agentRecordPath(stateDir);
|
||||
const tmp = `${target}.tmp-${process.pid}`;
|
||||
writeSecureFile(tmp, JSON.stringify(record));
|
||||
fs.renameSync(tmp, target);
|
||||
}
|
||||
|
||||
export function clearAgentRecord(stateDir: string): void {
|
||||
safeUnlink(agentRecordPath(stateDir));
|
||||
}
|
||||
|
||||
/**
|
||||
* Kill the agent identified by `record`. Signal defaults to SIGTERM (give
|
||||
* the agent a chance to run its own SIGTERM cleanup). Returns true if a
|
||||
* signal was actually sent to a live PID; false if the PID was already
|
||||
* dead (no-op). Never throws — ESRCH is swallowed by safeKill.
|
||||
*
|
||||
* Validates liveness BEFORE signaling so a PID-reuse race (the recorded
|
||||
* PID was reaped and a brand-new unrelated process now holds it) can't
|
||||
* cause us to kill the wrong process. This is a best-effort defense:
|
||||
* Linux/macOS don't expose process-start-time cheaply, and the gap
|
||||
* between record-write and watchdog-tick is small (60s max).
|
||||
*/
|
||||
export function killAgentByRecord(
|
||||
record: AgentRecord,
|
||||
signal: NodeJS.Signals = 'SIGTERM',
|
||||
): boolean {
|
||||
if (!isProcessAlive(record.pid)) return false;
|
||||
safeKill(record.pid, signal);
|
||||
return true;
|
||||
}
|
||||
|
|
@ -25,47 +25,16 @@ import * as path from 'path';
|
|||
import * as crypto from 'crypto';
|
||||
import { writeSecureFile, mkdirSecure } from './file-permissions';
|
||||
import { safeUnlink } from './error-handling';
|
||||
import { writeAgentRecord, clearAgentRecord } from './terminal-agent-control';
|
||||
|
||||
const STATE_FILE = process.env.BROWSE_STATE_FILE || path.join(process.env.HOME || '/tmp', '.gstack', 'browse.json');
|
||||
const PORT_FILE = path.join(path.dirname(STATE_FILE), 'terminal-port');
|
||||
const BROWSE_SERVER_PORT = parseInt(process.env.BROWSE_SERVER_PORT || '0', 10);
|
||||
const EXTENSION_ID = process.env.BROWSE_EXTENSION_ID || ''; // optional: tighten Origin check
|
||||
const INTERNAL_TOKEN = crypto.randomBytes(32).toString('base64url'); // shared with parent server via env at spawn
|
||||
/**
|
||||
* Per-boot generation identifier. Loopback /internal/* callers include
|
||||
* `X-Browse-Gen: <CURRENT_GEN>` so a slow agent the watchdog respawned
|
||||
* around can't service a stale grant from the prior generation. Absent
|
||||
* header means "legacy caller" and is accepted (backward compat); a
|
||||
* present-but-mismatched header returns 409 stale generation.
|
||||
*/
|
||||
const CURRENT_GEN = crypto.randomBytes(16).toString('base64url');
|
||||
|
||||
// In-memory attach-token registry. Parent posts /internal/grant after
|
||||
// /pty-session; we validate WS upgrades against this map.
|
||||
//
|
||||
// v1.44+: each token is bound to a v1.44 sessionId (the stable, non-secret
|
||||
// identifier from browse/src/pty-session-lease.ts). The token grants ONE
|
||||
// attach for ONE session — re-attach within the lease window comes through
|
||||
// /pty-session/reattach, which mints a fresh token for the same sessionId.
|
||||
//
|
||||
// Legacy callers can still pass `{token}` without sessionId (the value
|
||||
// stays null and the WS upgrade still works); those callers don't get
|
||||
// re-attach because there's no stable identifier to match against.
|
||||
const validTokens = new Map<string, string | null>(); // token → sessionId
|
||||
|
||||
/**
|
||||
* Reverse index for re-attach lookups: sessionId → live PtySession.
|
||||
* Populated when a WS first attaches with a known sessionId; cleared when
|
||||
* the session is disposed or the lease expires. Used by:
|
||||
* - /ws upgrade: if the incoming attachToken maps to a sessionId that
|
||||
* already has a live session, REPLACE its ws ref instead of spawning.
|
||||
* - /internal/restart: enumerate by sessionId, dispose that one session.
|
||||
*
|
||||
* Kept separate from the WeakMap<ws,PtySession> so re-attach can find the
|
||||
* session by id even after the original ws has gone.
|
||||
*/
|
||||
const sessionsById = new Map<string, PtySession>();
|
||||
// In-memory cookie token registry. Parent posts /internal/grant after
|
||||
// /pty-session; we validate WS cookies against this set.
|
||||
const validTokens = new Set<string>();
|
||||
|
||||
// Active PTY session per WS. One terminal per connection. Codex finding #4:
|
||||
// uncaught handlers below catch bugs in framing/cleanup so they don't kill
|
||||
|
|
@ -77,154 +46,12 @@ process.on('unhandledRejection', (reason) => {
|
|||
console.error('[terminal-agent] unhandledRejection:', reason);
|
||||
});
|
||||
|
||||
export interface PtySession {
|
||||
interface PtySession {
|
||||
proc: any | null; // Bun.Subprocess once spawned
|
||||
cols: number;
|
||||
rows: number;
|
||||
cookie: string;
|
||||
/**
|
||||
* Current attached websocket. Swapped on re-attach (Commit 3): when a new
|
||||
* WS upgrade matches this session's sessionId, the old liveWs is gone
|
||||
* and the new ws takes its place. The PTY on-data callback closes over
|
||||
* `session`, not the original `ws`, so it always writes to the current
|
||||
* liveWs (or skips the write when detached and liveWs is null).
|
||||
*/
|
||||
liveWs: any | null;
|
||||
/**
|
||||
* v1.44+ stable session identifier (from pty-session-lease). Null for
|
||||
* legacy /internal/grant callers that didn't pass one. Used for
|
||||
* targeted /internal/restart and Commit 3 re-attach lookups.
|
||||
*/
|
||||
sessionId: string | null;
|
||||
spawned: boolean;
|
||||
/**
|
||||
* 25s server-side WS keepalive interval (v1.44+). Set in the WS `open`
|
||||
* handler, cleared in `close`. We send `{type:"ping",ts}` text frames so
|
||||
* NAT boxes, proxies, and Chrome's MV3 panel-suspend heuristics see the
|
||||
* connection as active; the client either replies with `{type:"pong"}`
|
||||
* or fires its own 25s `{type:"keepalive"}` cycle. Either path keeps
|
||||
* the underlying TCP from being silently dropped.
|
||||
*/
|
||||
pingInterval: ReturnType<typeof setInterval> | null;
|
||||
/**
|
||||
* Commit 3 scrollback ring buffer. Each PTY write appends a frame; the
|
||||
* total byte count is capped at RING_BUFFER_MAX_BYTES with oldest frames
|
||||
* evicted first. On re-attach, the surviving frames are replayed as a
|
||||
* single binary frame (prefixed with the v1.44 reset sequence) so the
|
||||
* user sees their last screen of output. Frame boundaries preserve UTF-8
|
||||
* + ANSI-CSI boundaries because each frame is the exact buffer that
|
||||
* spawnClaude's on-data callback emitted.
|
||||
*/
|
||||
ringBuffer: Buffer[];
|
||||
ringBufferBytes: number;
|
||||
/**
|
||||
* Tracks whether the PTY is currently in xterm alt-screen mode. claude's
|
||||
* TUI enters alt-screen (CSI ?1049h) during tool calls and exits (CSI
|
||||
* ?1049l) when returning to the main prompt. On re-attach, the replay
|
||||
* prelude must re-enter alt-screen if the original PTY left it active,
|
||||
* otherwise the replay renders against the main screen and the cursor
|
||||
* + colors end up in the wrong place.
|
||||
*/
|
||||
altScreenActive: boolean;
|
||||
/**
|
||||
* Detach state machine (Commit 3). When the WS closes for a reason OTHER
|
||||
* than the v1.44 intentional-restart code (4001), we keep the PtySession
|
||||
* alive for the detach window (default 60s) so a re-attach within the
|
||||
* window can resume the same PTY and replay the ring buffer. The timer
|
||||
* disposes the session if no re-attach arrives in time.
|
||||
*/
|
||||
detached: boolean;
|
||||
detachTimer: ReturnType<typeof setTimeout> | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* WS keepalive interval. 25s is comfortably under the lowest common NAT
|
||||
* idle timeout (typically 30-60s) and shorter than Chromium's WebSocket
|
||||
* dead-peer threshold. Test-overridable via env so the v1.44 e2e tests
|
||||
* can compress idle-window assertions to <1s without waiting half a
|
||||
* minute per assertion.
|
||||
*/
|
||||
const KEEPALIVE_INTERVAL_MS = parseInt(
|
||||
process.env.GSTACK_PTY_KEEPALIVE_INTERVAL_MS || '25000',
|
||||
10,
|
||||
);
|
||||
|
||||
/**
|
||||
* Commit 3 scrollback ring buffer cap. 1 MB is enough for a full screen
|
||||
* of dense claude output (including a recent tool result), small enough
|
||||
* that a worst-case 10 detached sessions only cost ~10 MB of RSS.
|
||||
* Env-overridable so e2e tests can verify eviction without writing 1 MB
|
||||
* of fixture data per assertion.
|
||||
*/
|
||||
const RING_BUFFER_MAX_BYTES = parseInt(
|
||||
process.env.GSTACK_PTY_RING_BUFFER_BYTES || `${1024 * 1024}`,
|
||||
10,
|
||||
);
|
||||
|
||||
/**
|
||||
* Commit 3 detach window — how long to keep a session alive after WS
|
||||
* close (with any code other than 4001 intentional-restart) so a
|
||||
* re-attach can resume the same PTY. 60s is long enough to cover a
|
||||
* Chrome MV3 service-worker suspend cycle, a wifi blip, or a brief
|
||||
* laptop sleep; short enough that genuinely-closed sessions don't
|
||||
* stack up unbounded.
|
||||
*/
|
||||
const DETACH_WINDOW_MS = parseInt(
|
||||
process.env.GSTACK_PTY_DETACH_WINDOW_MS || '60000',
|
||||
10,
|
||||
);
|
||||
|
||||
/**
|
||||
* Append a frame to a session's ring buffer, evicting oldest frames if
|
||||
* the total byte count exceeds RING_BUFFER_MAX_BYTES. Eviction is at
|
||||
* frame boundaries (one PTY write = one frame), so we never cut a
|
||||
* multi-byte UTF-8 sequence or a partial ANSI CSI in half — claude's
|
||||
* on-data callback emits coherent frames.
|
||||
*
|
||||
* Side effect: scans the appended chunk for alt-screen enter/exit
|
||||
* sequences (CSI ?1049h / CSI ?1049l) and updates session.altScreenActive
|
||||
* so the re-attach prelude knows whether to re-enter alt-screen.
|
||||
*/
|
||||
export function appendToRingBuffer(session: PtySession, frame: Buffer): void {
|
||||
session.ringBuffer.push(frame);
|
||||
session.ringBufferBytes += frame.length;
|
||||
while (session.ringBufferBytes > RING_BUFFER_MAX_BYTES && session.ringBuffer.length > 1) {
|
||||
const evicted = session.ringBuffer.shift()!;
|
||||
session.ringBufferBytes -= evicted.length;
|
||||
}
|
||||
// Alt-screen tracking. Scan for the canonical xterm enter/exit pairs.
|
||||
// We do this on every append (not just on attach) so the state is
|
||||
// correct even if many frames have flowed since the last attach.
|
||||
const ascii = frame.toString('latin1'); // single-byte view is enough — the codes are 7-bit ASCII
|
||||
// Use lastIndexOf so trailing state wins when both appear in one frame
|
||||
// (e.g., a quick tool-call open+close inside one render pass).
|
||||
const enterIdx = ascii.lastIndexOf('\x1b[?1049h');
|
||||
const exitIdx = ascii.lastIndexOf('\x1b[?1049l');
|
||||
if (enterIdx >= 0 && enterIdx > exitIdx) session.altScreenActive = true;
|
||||
else if (exitIdx >= 0 && exitIdx > enterIdx) session.altScreenActive = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the re-attach replay payload: server-side reset prelude + the
|
||||
* accumulated ring buffer. The client side writes RIS (`\x1bc`) to xterm
|
||||
* BEFORE feeding this payload in, so the layout is:
|
||||
*
|
||||
* 1. Client: `\x1bc` (RIS — full reset, clears pre-blip xterm content)
|
||||
* 2. Server: `\x1b[!p` (DECSTR soft reset — re-defaults char attributes)
|
||||
* 3. Server: optional `\x1b[?1049h` if we were in alt-screen at detach
|
||||
* 4. Server: ring buffer contents, in append order
|
||||
*
|
||||
* The client coordinates the order by waiting for a `{type:"reattach-begin"}`
|
||||
* text frame before treating the next binary frame as replay. That separation
|
||||
* is what lets us prepend reset codes without clobbering the live stream
|
||||
* that resumes immediately after.
|
||||
*/
|
||||
export function buildReplayPayload(session: PtySession): Buffer {
|
||||
const parts: Buffer[] = [];
|
||||
parts.push(Buffer.from('\x1b[!p'));
|
||||
if (session.altScreenActive) parts.push(Buffer.from('\x1b[?1049h'));
|
||||
for (const frame of session.ringBuffer) parts.push(frame);
|
||||
return Buffer.concat(parts);
|
||||
}
|
||||
|
||||
const sessions = new WeakMap<any, PtySession>(); // ws -> session
|
||||
|
|
@ -374,118 +201,6 @@ function disposeSession(session: PtySession): void {
|
|||
*
|
||||
* Everything else returns 404. The listener binds 127.0.0.1 only.
|
||||
*/
|
||||
/**
|
||||
* Validate a loopback /internal/* request. Returns null when the request
|
||||
* is allowed; otherwise returns the Response to send back. Centralizes
|
||||
* bearer auth + the v1.44 X-Browse-Gen generation check so adding a new
|
||||
* /internal/* route is a one-liner.
|
||||
*/
|
||||
function checkInternalAuth(req: Request): Response | null {
|
||||
const auth = req.headers.get('authorization');
|
||||
if (auth !== `Bearer ${INTERNAL_TOKEN}`) {
|
||||
return new Response('forbidden', { status: 403 });
|
||||
}
|
||||
const headerGen = req.headers.get('x-browse-gen');
|
||||
if (headerGen && headerGen !== CURRENT_GEN) {
|
||||
return new Response('stale generation', { status: 409 });
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrap a JSON-bodied /internal/* handler with the standard bearer-auth +
|
||||
* generation-check + json-parse + error-response boilerplate. The handler
|
||||
* `fn` is called with the parsed body; whatever it returns is JSON-stringified
|
||||
* into a 200 Response, or the handler can return a Response directly to
|
||||
* customize status / headers. Throwing from `fn` collapses to a 400 "bad".
|
||||
*
|
||||
* Centralizing the dance kills the copy-paste pattern of bearer + gen check
|
||||
* + req.json().then(...).catch(...) that every /internal/* route needs.
|
||||
* New routes become a single call to internalHandler.
|
||||
*/
|
||||
async function internalHandler<T>(
|
||||
req: Request,
|
||||
fn: (body: any) => T | Promise<T> | Response | Promise<Response>,
|
||||
): Promise<Response> {
|
||||
const denied = checkInternalAuth(req);
|
||||
if (denied) return denied;
|
||||
let body: any;
|
||||
try {
|
||||
body = await req.json();
|
||||
} catch {
|
||||
return new Response('bad', { status: 400 });
|
||||
}
|
||||
try {
|
||||
const result = await fn(body);
|
||||
if (result instanceof Response) return result;
|
||||
if (result === undefined || result === null) return new Response('ok');
|
||||
return new Response(JSON.stringify(result), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
});
|
||||
} catch {
|
||||
return new Response('bad', { status: 400 });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Spawn the claude PTY for a session if it hasn't been spawned yet.
|
||||
* Used by both the legacy binary-frame spawn trigger and the v1.44 explicit
|
||||
* `{type:"start"}` text-frame trigger. Idempotent on `session.spawned`.
|
||||
*
|
||||
* Returns true if claude is now running, false if spawn failed (e.g. claude
|
||||
* binary not on PATH). On failure, the caller is expected to have already
|
||||
* surfaced the error to the client (or will via the next frame).
|
||||
*/
|
||||
function maybeSpawnPty(ws: any, session: PtySession): boolean {
|
||||
if (session.spawned) return true;
|
||||
session.spawned = true;
|
||||
let leftover = Buffer.alloc(0);
|
||||
const proc = spawnClaude(session.cols, session.rows, (chunk) => {
|
||||
const combined = Buffer.concat([leftover, Buffer.from(chunk)]);
|
||||
// UTF-8 boundary detection (issue #1272). Look back at most 3 bytes
|
||||
// for the start of an incomplete multibyte sequence and defer it.
|
||||
let safeEnd = combined.length;
|
||||
for (let i = combined.length - 1; i >= Math.max(0, combined.length - 3); i--) {
|
||||
const b = combined[i];
|
||||
if ((b & 0x80) === 0) { safeEnd = i + 1; break; }
|
||||
if ((b & 0xC0) === 0x80) continue;
|
||||
const expected = (b & 0xE0) === 0xC0 ? 2 : (b & 0xF0) === 0xE0 ? 3 : 4;
|
||||
safeEnd = (combined.length - i >= expected) ? combined.length : i;
|
||||
break;
|
||||
}
|
||||
const flush = combined.slice(0, safeEnd);
|
||||
leftover = combined.slice(safeEnd);
|
||||
if (flush.length) {
|
||||
// Always record into the ring buffer (Commit 3) so re-attach can
|
||||
// replay. session.liveWs is what changes across re-attaches — we
|
||||
// close over `session`, not the original `ws`, so the write always
|
||||
// goes to whichever ws is currently attached (or is skipped when
|
||||
// detached and liveWs is null).
|
||||
appendToRingBuffer(session, flush);
|
||||
if (session.liveWs) {
|
||||
try { session.liveWs.sendBinary(flush); } catch {}
|
||||
}
|
||||
}
|
||||
});
|
||||
if (!proc) {
|
||||
try {
|
||||
ws.send(JSON.stringify({
|
||||
type: 'error',
|
||||
code: 'CLAUDE_NOT_FOUND',
|
||||
message: 'claude CLI not on PATH. Install: https://docs.anthropic.com/en/docs/claude-code',
|
||||
}));
|
||||
ws.close(4404, 'claude not found');
|
||||
} catch {}
|
||||
return false;
|
||||
}
|
||||
session.proc = proc;
|
||||
proc.exited?.then?.(() => {
|
||||
try { session.liveWs?.close(1000, 'pty exited'); } catch {}
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
function buildServer() {
|
||||
return Bun.serve({
|
||||
hostname: '127.0.0.1',
|
||||
|
|
@ -496,66 +211,29 @@ function buildServer() {
|
|||
const url = new URL(req.url);
|
||||
|
||||
// /internal/grant — loopback-only handshake from parent server.
|
||||
// v1.44+: accepts `{token, sessionId?}`. The sessionId binding lets
|
||||
// the agent route re-attach attempts (same sessionId, fresh token)
|
||||
// back to the same PtySession. Legacy callers passing just `{token}`
|
||||
// still work — sessionId becomes null and re-attach is unavailable
|
||||
// for that grant.
|
||||
if (url.pathname === '/internal/grant' && req.method === 'POST') {
|
||||
return internalHandler(req, (body) => {
|
||||
const auth = req.headers.get('authorization');
|
||||
if (auth !== `Bearer ${INTERNAL_TOKEN}`) {
|
||||
return new Response('forbidden', { status: 403 });
|
||||
}
|
||||
return req.json().then((body: any) => {
|
||||
if (typeof body?.token === 'string' && body.token.length > 16) {
|
||||
const sid = typeof body?.sessionId === 'string' && body.sessionId.length > 0
|
||||
? body.sessionId
|
||||
: null;
|
||||
validTokens.set(body.token, sid);
|
||||
validTokens.add(body.token);
|
||||
}
|
||||
});
|
||||
return new Response('ok');
|
||||
}).catch(() => new Response('bad', { status: 400 }));
|
||||
}
|
||||
|
||||
// /internal/revoke — drop a token (called on WS close or bootstrap reload)
|
||||
if (url.pathname === '/internal/revoke' && req.method === 'POST') {
|
||||
return internalHandler(req, (body) => {
|
||||
const auth = req.headers.get('authorization');
|
||||
if (auth !== `Bearer ${INTERNAL_TOKEN}`) {
|
||||
return new Response('forbidden', { status: 403 });
|
||||
}
|
||||
return req.json().then((body: any) => {
|
||||
if (typeof body?.token === 'string') validTokens.delete(body.token);
|
||||
});
|
||||
}
|
||||
|
||||
// /internal/restart — dispose the PtySession for a specific sessionId.
|
||||
// Scoped to one caller (not enumerate-all). Server.ts /pty-restart
|
||||
// posts here with the caller's sessionId; we kill ONLY that PTY,
|
||||
// leaving any other live sidebar tabs untouched. Codex T2 of the
|
||||
// eng review caught this gap — pre-spec the route would have
|
||||
// disposed all sessions.
|
||||
if (url.pathname === '/internal/restart' && req.method === 'POST') {
|
||||
return internalHandler(req, (body) => {
|
||||
const sid = typeof body?.sessionId === 'string' ? body.sessionId : null;
|
||||
if (!sid) return { killed: 0 };
|
||||
const session = sessionsById.get(sid);
|
||||
if (!session) return { killed: 0 };
|
||||
// Cancel any pending detach timer before disposal — otherwise it
|
||||
// would fire later against an already-disposed session.
|
||||
if (session.detachTimer) {
|
||||
clearTimeout(session.detachTimer);
|
||||
session.detachTimer = null;
|
||||
}
|
||||
disposeSession(session);
|
||||
sessionsById.delete(sid);
|
||||
return { killed: 1 };
|
||||
});
|
||||
}
|
||||
|
||||
// /internal/healthz — liveness probe used by the v1.44 watchdog.
|
||||
// Returns this agent's pid + gen + active session count without
|
||||
// touching claude binary lookup (which can fail for non-process
|
||||
// reasons and isn't a useful liveness signal). GET — no body to parse,
|
||||
// so it stays on the bare checkInternalAuth gate.
|
||||
if (url.pathname === '/internal/healthz' && req.method === 'GET') {
|
||||
const denied = checkInternalAuth(req);
|
||||
if (denied) return denied;
|
||||
return new Response(JSON.stringify({
|
||||
pid: process.pid,
|
||||
gen: CURRENT_GEN,
|
||||
sessions: validTokens.size,
|
||||
}), { status: 200, headers: { 'Content-Type': 'application/json' } });
|
||||
return new Response('ok');
|
||||
}).catch(() => new Response('bad', { status: 400 }));
|
||||
}
|
||||
|
||||
// /claude-available — bootstrap card hits this when user clicks "I installed it".
|
||||
|
|
@ -627,13 +305,8 @@ function buildServer() {
|
|||
return new Response('unauthorized', { status: 401 });
|
||||
}
|
||||
|
||||
// v1.44+: surface the token's sessionId binding to the upgraded ws.
|
||||
// open() reads it via ws.data and registers the session in
|
||||
// sessionsById so /internal/restart and (Commit 3) re-attach
|
||||
// lookups can find it.
|
||||
const sessionId = validTokens.get(token) ?? null;
|
||||
const upgraded = server.upgrade(req, {
|
||||
data: { cookie: token, sessionId },
|
||||
data: { cookie: token },
|
||||
// Echo the protocol back so the browser accepts the upgrade.
|
||||
// Required when the client sends Sec-WebSocket-Protocol — the
|
||||
// server MUST select one of the offered protocols, otherwise
|
||||
|
|
@ -647,105 +320,22 @@ function buildServer() {
|
|||
},
|
||||
|
||||
websocket: {
|
||||
/**
|
||||
* Spawn the claude PTY for `session` if it hasn't been spawned yet.
|
||||
* Called from both message paths: the legacy binary-frame trigger
|
||||
* (any keystroke) AND the v1.44 explicit `{type:"start"}` trigger
|
||||
* (forceRestart sends this on every fresh WS to get an eager prompt
|
||||
* without requiring the user to type). Idempotent — a second call
|
||||
* after `spawned: true` is a no-op.
|
||||
*/
|
||||
open(ws) {
|
||||
const sessionId = (ws.data as any)?.sessionId ?? null;
|
||||
const cookie = (ws.data as any)?.cookie || '';
|
||||
|
||||
// Commit 3 re-attach: if this sessionId already has a detached
|
||||
// PtySession in sessionsById, REPLACE its liveWs ref and replay
|
||||
// the ring buffer. The PTY process is unchanged — claude keeps
|
||||
// running through the wifi blip / panel-suspend cycle.
|
||||
if (sessionId) {
|
||||
const existing = sessionsById.get(sessionId);
|
||||
if (existing) {
|
||||
if (existing.detachTimer) {
|
||||
clearTimeout(existing.detachTimer);
|
||||
existing.detachTimer = null;
|
||||
}
|
||||
existing.detached = false;
|
||||
existing.liveWs = ws;
|
||||
existing.cookie = cookie;
|
||||
// Re-bind the WS-keyed map so resize/close/message handlers
|
||||
// can still find this session via the new ws.
|
||||
sessions.set(ws, existing);
|
||||
// Restart keepalive on the new ws.
|
||||
if (existing.pingInterval) clearInterval(existing.pingInterval);
|
||||
existing.pingInterval = setInterval(() => {
|
||||
try { ws.send(JSON.stringify({ type: 'ping', ts: Date.now() })); } catch {}
|
||||
}, KEEPALIVE_INTERVAL_MS);
|
||||
// Tell the client to prep its xterm (write RIS) before the
|
||||
// replay binary arrives. Order matters — the binary frame
|
||||
// immediately after this text frame IS the replay.
|
||||
try { ws.send(JSON.stringify({ type: 'reattach-begin', sessionId })); } catch {}
|
||||
try { ws.sendBinary(buildReplayPayload(existing)); } catch {}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const session: PtySession = {
|
||||
proc: null,
|
||||
cols: 80,
|
||||
rows: 24,
|
||||
cookie,
|
||||
liveWs: ws,
|
||||
sessionId,
|
||||
spawned: false,
|
||||
pingInterval: null,
|
||||
ringBuffer: [],
|
||||
ringBufferBytes: 0,
|
||||
altScreenActive: false,
|
||||
detached: false,
|
||||
detachTimer: null,
|
||||
};
|
||||
session.pingInterval = setInterval(() => {
|
||||
try {
|
||||
ws.send(JSON.stringify({ type: 'ping', ts: Date.now() }));
|
||||
} catch {
|
||||
// ws likely closed mid-tick; close handler clears the interval.
|
||||
}
|
||||
}, KEEPALIVE_INTERVAL_MS);
|
||||
sessions.set(ws, session);
|
||||
// Index by sessionId for /internal/restart + Commit 3 re-attach.
|
||||
if (sessionId) sessionsById.set(sessionId, session);
|
||||
},
|
||||
|
||||
message(ws, raw) {
|
||||
let session = sessions.get(ws);
|
||||
if (!session) {
|
||||
// Fallback for any path where open() didn't fire (shouldn't happen
|
||||
// in Bun.serve but keeps the spawn path safe). No keepalive on
|
||||
// this branch — open() is the supported entry point.
|
||||
session = {
|
||||
proc: null,
|
||||
cols: 80,
|
||||
rows: 24,
|
||||
cookie: (ws.data as any)?.cookie || '',
|
||||
liveWs: ws,
|
||||
sessionId: (ws.data as any)?.sessionId ?? null,
|
||||
spawned: false,
|
||||
pingInterval: null,
|
||||
ringBuffer: [],
|
||||
ringBufferBytes: 0,
|
||||
altScreenActive: false,
|
||||
detached: false,
|
||||
detachTimer: null,
|
||||
};
|
||||
sessions.set(ws, session);
|
||||
if (session.sessionId) sessionsById.set(session.sessionId, session);
|
||||
}
|
||||
|
||||
// Text frames are control messages: {type: "resize", cols, rows},
|
||||
// {type: "tabSwitch", tabId, url, title}, {type: "tabState", ...},
|
||||
// or v1.44 keepalive frames: {type: "pong", ts}, {type: "keepalive"}.
|
||||
// Binary frames are raw input bytes destined for the PTY stdin.
|
||||
// Text frames are control messages: {type: "resize", cols, rows} or
|
||||
// {type: "tabSwitch", tabId, url, title}. Binary frames are raw input
|
||||
// bytes destined for the PTY stdin.
|
||||
if (typeof raw === 'string') {
|
||||
let msg: any;
|
||||
try { msg = JSON.parse(raw); } catch { return; }
|
||||
|
|
@ -765,32 +355,50 @@ function buildServer() {
|
|||
handleTabState(msg);
|
||||
return;
|
||||
}
|
||||
if (msg?.type === 'pong' || msg?.type === 'keepalive' || msg?.type === 'ping') {
|
||||
// Keepalive frames — accepted and silently dropped. The mere
|
||||
// fact that the WS carried this frame is the liveness signal;
|
||||
// there's no application-level state to update at this layer.
|
||||
// `ping` is acknowledged here too in case the client (or a
|
||||
// future agent peer) mirrors our server-side ping shape.
|
||||
return;
|
||||
}
|
||||
if (msg?.type === 'start') {
|
||||
// v1.44 explicit spawn trigger. forceRestart sends this
|
||||
// immediately on every fresh WS so claude boots without the
|
||||
// user having to type a keystroke (pre-v1.44, the lazy-binary
|
||||
// spawn made restart look stuck until the user typed). No-op
|
||||
// if already spawned.
|
||||
maybeSpawnPty(ws, session);
|
||||
return;
|
||||
}
|
||||
// Unknown text frame — ignore.
|
||||
return;
|
||||
}
|
||||
|
||||
// Binary input. Lazy-spawn claude on the first byte if `start`
|
||||
// wasn't sent first. Both paths land in the same maybeSpawnPty
|
||||
// helper for behavior parity.
|
||||
// Binary input. Lazy-spawn claude on the first byte.
|
||||
if (!session.spawned) {
|
||||
if (!maybeSpawnPty(ws, session)) return;
|
||||
session.spawned = true;
|
||||
// UTF-8 boundary detection to prevent splitting multi-byte characters (issue #1272).
|
||||
// Buffer incomplete UTF-8 sequences until the next chunk completes them.
|
||||
let leftover = Buffer.alloc(0);
|
||||
const proc = spawnClaude(session.cols, session.rows, (chunk) => {
|
||||
const combined = Buffer.concat([leftover, Buffer.from(chunk)]);
|
||||
// Find the last index where a UTF-8 codepoint ends. Look back at most 3 bytes.
|
||||
let safeEnd = combined.length;
|
||||
for (let i = combined.length - 1; i >= Math.max(0, combined.length - 3); i--) {
|
||||
const b = combined[i];
|
||||
if ((b & 0x80) === 0) { safeEnd = i + 1; break; } // ASCII
|
||||
if ((b & 0xC0) === 0x80) continue; // continuation byte
|
||||
const expected = (b & 0xE0) === 0xC0 ? 2 : (b & 0xF0) === 0xE0 ? 3 : 4;
|
||||
safeEnd = (combined.length - i >= expected) ? combined.length : i;
|
||||
break;
|
||||
}
|
||||
const flush = combined.slice(0, safeEnd);
|
||||
leftover = combined.slice(safeEnd);
|
||||
if (flush.length) {
|
||||
try { ws.sendBinary(flush); } catch {}
|
||||
}
|
||||
});
|
||||
if (!proc) {
|
||||
try {
|
||||
ws.send(JSON.stringify({
|
||||
type: 'error',
|
||||
code: 'CLAUDE_NOT_FOUND',
|
||||
message: 'claude CLI not on PATH. Install: https://docs.anthropic.com/en/docs/claude-code',
|
||||
}));
|
||||
ws.close(4404, 'claude not found');
|
||||
} catch {}
|
||||
return;
|
||||
}
|
||||
session.proc = proc;
|
||||
// Watch for child exit so the WS closes cleanly when claude exits.
|
||||
proc.exited?.then?.(() => {
|
||||
try { ws.close(1000, 'pty exited'); } catch {}
|
||||
});
|
||||
}
|
||||
try {
|
||||
// raw is a Uint8Array; Bun.Terminal.write accepts string|Buffer.
|
||||
|
|
@ -801,49 +409,16 @@ function buildServer() {
|
|||
}
|
||||
},
|
||||
|
||||
close(ws, code, _reason) {
|
||||
close(ws) {
|
||||
const session = sessions.get(ws);
|
||||
if (!session) return;
|
||||
// Always drop the WS-keyed map entry and the per-attach
|
||||
// attachToken — the attach grant was single-use.
|
||||
sessions.delete(ws);
|
||||
if (session.cookie) validTokens.delete(session.cookie);
|
||||
// Keepalive lives with the WS — every attach starts a fresh one.
|
||||
if (session.pingInterval) {
|
||||
clearInterval(session.pingInterval);
|
||||
session.pingInterval = null;
|
||||
}
|
||||
|
||||
// Commit 3 detach state machine. If the close was intentional
|
||||
// (code 4001 = restart, 4404 = no-claude error), dispose
|
||||
// immediately — there's no value in keeping the PTY alive.
|
||||
// Otherwise enter the detach window: claude keeps running, the
|
||||
// ring buffer keeps accumulating, and a re-attach with the same
|
||||
// sessionId within DETACH_WINDOW_MS picks back up. If the timer
|
||||
// fires without a re-attach, the session is disposed normally.
|
||||
//
|
||||
// Sessions without a sessionId (legacy single-shot grants) can't
|
||||
// re-attach by definition — fall through to immediate dispose.
|
||||
const intentional = code === 4001 || code === 4404 || code === 1000;
|
||||
if (intentional || !session.sessionId) {
|
||||
if (session) {
|
||||
disposeSession(session);
|
||||
if (session.sessionId) sessionsById.delete(session.sessionId);
|
||||
return;
|
||||
if (session.cookie) {
|
||||
// Drop the cookie so it can't be replayed against a new PTY.
|
||||
validTokens.delete(session.cookie);
|
||||
}
|
||||
sessions.delete(ws);
|
||||
}
|
||||
|
||||
// Mark detached and start the disposal timer. The session stays
|
||||
// in sessionsById so the next /ws upgrade with the same
|
||||
// sessionId can find and reattach to it.
|
||||
session.detached = true;
|
||||
session.liveWs = null;
|
||||
session.detachTimer = setTimeout(() => {
|
||||
if (!session.detached) return; // re-attached in the meantime
|
||||
disposeSession(session);
|
||||
if (session.sessionId) sessionsById.delete(session.sessionId);
|
||||
}, DETACH_WINDOW_MS);
|
||||
// setTimeout returns a Bun Timer; unref so the detach window
|
||||
// doesn't keep the process alive past natural shutdown.
|
||||
(session.detachTimer as any)?.unref?.();
|
||||
},
|
||||
},
|
||||
});
|
||||
|
|
@ -973,25 +548,14 @@ function main() {
|
|||
writeSecureFile(tmp, String(port));
|
||||
fs.renameSync(tmp, PORT_FILE);
|
||||
|
||||
// Write identity-based agent record (pid + per-boot gen). Replaces the
|
||||
// v1.43- `pkill -f terminal-agent\.ts` regex teardown that could kill
|
||||
// sibling gstack sessions. Callers (cli.ts spawn site, server.ts
|
||||
// shutdown, the v1.44 watchdog) now route through killAgentByRecord in
|
||||
// terminal-agent-control.ts.
|
||||
writeAgentRecord(dir, { pid: process.pid, gen: CURRENT_GEN, startedAt: Date.now() });
|
||||
|
||||
// Hand the parent the internal token so it can call /internal/grant.
|
||||
// Parent learns INTERNAL_TOKEN via env (TERMINAL_AGENT_INTERNAL_TOKEN below).
|
||||
// We just print it on stdout for the supervising process to pick up if it's
|
||||
// not already in env. Defense against env races at spawn time.
|
||||
console.log(`[terminal-agent] listening on 127.0.0.1:${port} pid=${process.pid} gen=${CURRENT_GEN}`);
|
||||
console.log(`[terminal-agent] listening on 127.0.0.1:${port} pid=${process.pid}`);
|
||||
|
||||
// Cleanup port file + agent record on exit.
|
||||
const cleanup = () => {
|
||||
safeUnlink(PORT_FILE);
|
||||
clearAgentRecord(dir);
|
||||
process.exit(0);
|
||||
};
|
||||
// Cleanup port file on exit.
|
||||
const cleanup = () => { safeUnlink(PORT_FILE); process.exit(0); };
|
||||
process.on('SIGTERM', cleanup);
|
||||
process.on('SIGINT', cleanup);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,14 +11,12 @@ import { findInstalledBrowsers, importCookies, importCookiesViaCdp, hasV20Cookie
|
|||
import { generatePickerCode } from './cookie-picker-routes';
|
||||
import { validateNavigationUrl } from './url-validation';
|
||||
import { validateOutputPath, validateReadPath } from './path-security';
|
||||
import { guardScreenshotPath } from './screenshot-size-guard';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import type { SetContentWaitUntil } from './tab-session';
|
||||
import { TEMP_DIR, isPathWithin } from './platform';
|
||||
import { SAFE_DIRECTORIES } from './path-security';
|
||||
import { modifyStyle, undoModification, resetModifications, getModificationHistory } from './cdp-inspector';
|
||||
import { withCdpSession } from './cdp-bridge';
|
||||
|
||||
/**
|
||||
* Aggressive page cleanup selectors and heuristics.
|
||||
|
|
@ -1125,10 +1123,6 @@ export async function handleWriteCommand(
|
|||
|
||||
// Take screenshot
|
||||
await page.screenshot({ path: outputPath, fullPage: !scrollTo });
|
||||
// Guard against Anthropic vision API >2000px brick (#1214). Only
|
||||
// applies to fullPage captures; scrollTo viewport-bound shots are
|
||||
// already capped by the viewport size.
|
||||
if (!scrollTo) await guardScreenshotPath(outputPath);
|
||||
|
||||
// Restore viewport
|
||||
if (viewportWidth && originalViewport) {
|
||||
|
|
@ -1410,10 +1404,9 @@ export async function handleWriteCommand(
|
|||
validateOutputPath(outputPath);
|
||||
|
||||
try {
|
||||
const data = await withCdpSession(page, async (cdp) => {
|
||||
const result = await cdp.send('Page.captureSnapshot', { format: 'mhtml' });
|
||||
return (result as { data: string }).data;
|
||||
});
|
||||
const cdp = await page.context().newCDPSession(page);
|
||||
const { data } = await cdp.send('Page.captureSnapshot', { format: 'mhtml' });
|
||||
await cdp.detach();
|
||||
fs.writeFileSync(outputPath, data);
|
||||
return `Archive saved: ${outputPath} (${Math.round(data.length / 1024)}KB, MHTML)`;
|
||||
} catch (err: any) {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
import { EventEmitter } from 'node:events';
|
||||
import { afterEach, beforeEach, describe, it, expect } from 'bun:test';
|
||||
import { describe, it, expect } from 'bun:test';
|
||||
|
||||
// ─── BrowserManager basic unit tests ─────────────────────────────
|
||||
|
||||
|
|
@ -16,214 +15,3 @@ describe('BrowserManager defaults', () => {
|
|||
expect(bm.getRefMap()).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── shouldEnableChromiumSandbox ─────────────────────────────────
|
||||
//
|
||||
// Pinning this is what prevents the "--no-sandbox" yellow infobar from
|
||||
// regressing on headed launches. Playwright auto-adds --no-sandbox when
|
||||
// chromiumSandbox !== true (playwright-core chromium.js:291-292), so all
|
||||
// three launch sites in browser-manager.ts must pass the policy this
|
||||
// helper computes.
|
||||
|
||||
describe('shouldEnableChromiumSandbox', () => {
|
||||
const origPlatform = process.platform;
|
||||
const origCI = process.env.CI;
|
||||
const origContainer = process.env.CONTAINER;
|
||||
const origNoSandbox = process.env.GSTACK_CHROMIUM_NO_SANDBOX;
|
||||
const origGetuid = process.getuid;
|
||||
|
||||
beforeEach(() => {
|
||||
delete process.env.CI;
|
||||
delete process.env.CONTAINER;
|
||||
delete process.env.GSTACK_CHROMIUM_NO_SANDBOX;
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
Object.defineProperty(process, 'platform', { value: origPlatform });
|
||||
if (origCI === undefined) delete process.env.CI; else process.env.CI = origCI;
|
||||
if (origContainer === undefined) delete process.env.CONTAINER; else process.env.CONTAINER = origContainer;
|
||||
if (origNoSandbox === undefined) delete process.env.GSTACK_CHROMIUM_NO_SANDBOX; else process.env.GSTACK_CHROMIUM_NO_SANDBOX = origNoSandbox;
|
||||
process.getuid = origGetuid;
|
||||
});
|
||||
|
||||
function setPlatform(p: NodeJS.Platform) {
|
||||
Object.defineProperty(process, 'platform', { value: p });
|
||||
}
|
||||
|
||||
it('darwin, no CI/CONTAINER/root → true', async () => {
|
||||
setPlatform('darwin');
|
||||
process.getuid = (() => 501) as typeof process.getuid;
|
||||
const { shouldEnableChromiumSandbox } = await import('../src/browser-manager');
|
||||
expect(shouldEnableChromiumSandbox()).toBe(true);
|
||||
});
|
||||
|
||||
it('linux, no CI/CONTAINER/root → true', async () => {
|
||||
setPlatform('linux');
|
||||
process.getuid = (() => 1000) as typeof process.getuid;
|
||||
const { shouldEnableChromiumSandbox } = await import('../src/browser-manager');
|
||||
expect(shouldEnableChromiumSandbox()).toBe(true);
|
||||
});
|
||||
|
||||
it('win32 → false (sandbox fails in Bun→Node→Chromium chain)', async () => {
|
||||
setPlatform('win32');
|
||||
process.getuid = (() => 1000) as typeof process.getuid;
|
||||
const { shouldEnableChromiumSandbox } = await import('../src/browser-manager');
|
||||
expect(shouldEnableChromiumSandbox()).toBe(false);
|
||||
});
|
||||
|
||||
it('linux + CI=1 → false', async () => {
|
||||
setPlatform('linux');
|
||||
process.env.CI = '1';
|
||||
process.getuid = (() => 1000) as typeof process.getuid;
|
||||
const { shouldEnableChromiumSandbox } = await import('../src/browser-manager');
|
||||
expect(shouldEnableChromiumSandbox()).toBe(false);
|
||||
});
|
||||
|
||||
it('linux + CONTAINER=1 → false', async () => {
|
||||
setPlatform('linux');
|
||||
process.env.CONTAINER = '1';
|
||||
process.getuid = (() => 1000) as typeof process.getuid;
|
||||
const { shouldEnableChromiumSandbox } = await import('../src/browser-manager');
|
||||
expect(shouldEnableChromiumSandbox()).toBe(false);
|
||||
});
|
||||
|
||||
it('linux + root (uid 0) → false', async () => {
|
||||
setPlatform('linux');
|
||||
process.getuid = (() => 0) as typeof process.getuid;
|
||||
const { shouldEnableChromiumSandbox } = await import('../src/browser-manager');
|
||||
expect(shouldEnableChromiumSandbox()).toBe(false);
|
||||
});
|
||||
|
||||
// #1562 — Ubuntu/AppArmor opt-in override
|
||||
it('linux + GSTACK_CHROMIUM_NO_SANDBOX=1 → false (Ubuntu/AppArmor opt-out)', async () => {
|
||||
setPlatform('linux');
|
||||
process.env.GSTACK_CHROMIUM_NO_SANDBOX = '1';
|
||||
process.getuid = (() => 1000) as typeof process.getuid;
|
||||
const { shouldEnableChromiumSandbox } = await import('../src/browser-manager');
|
||||
expect(shouldEnableChromiumSandbox()).toBe(false);
|
||||
});
|
||||
|
||||
it('darwin + GSTACK_CHROMIUM_NO_SANDBOX=1 → false (env override wins on any platform)', async () => {
|
||||
setPlatform('darwin');
|
||||
process.env.GSTACK_CHROMIUM_NO_SANDBOX = '1';
|
||||
process.getuid = (() => 501) as typeof process.getuid;
|
||||
const { shouldEnableChromiumSandbox } = await import('../src/browser-manager');
|
||||
expect(shouldEnableChromiumSandbox()).toBe(false);
|
||||
});
|
||||
|
||||
it('GSTACK_CHROMIUM_NO_SANDBOX=0 → does NOT trigger override (must be exactly "1")', async () => {
|
||||
setPlatform('linux');
|
||||
process.env.GSTACK_CHROMIUM_NO_SANDBOX = '0';
|
||||
process.getuid = (() => 1000) as typeof process.getuid;
|
||||
const { shouldEnableChromiumSandbox } = await import('../src/browser-manager');
|
||||
expect(shouldEnableChromiumSandbox()).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── resolveDisconnectCause ──────────────────────────────────────
|
||||
//
|
||||
// Pinning the clean-vs-crash distinction matters because gbd's
|
||||
// HealthMonitor consumes our exit code (0 = don't restart, !=0 =
|
||||
// restart). A regression here brings back the "Cmd+Q makes the browser
|
||||
// keep coming back" UX bug.
|
||||
|
||||
function makeFakeBrowser(opts: {
|
||||
exitCode: number | null;
|
||||
signalCode: NodeJS.Signals | null;
|
||||
/** ms before emitting 'exit'; default = already exited at construction */
|
||||
exitDelay?: number;
|
||||
}): { process(): { exitCode: number | null; signalCode: NodeJS.Signals | null; once: EventEmitter['once'] } } {
|
||||
const ee = new EventEmitter();
|
||||
const state = {
|
||||
exitCode: opts.exitDelay != null ? null : opts.exitCode,
|
||||
signalCode: opts.exitDelay != null ? null : opts.signalCode,
|
||||
once: ee.once.bind(ee),
|
||||
};
|
||||
if (opts.exitDelay != null) {
|
||||
setTimeout(() => {
|
||||
state.exitCode = opts.exitCode;
|
||||
state.signalCode = opts.signalCode;
|
||||
ee.emit('exit', opts.exitCode, opts.signalCode);
|
||||
}, opts.exitDelay);
|
||||
}
|
||||
return { process: () => state };
|
||||
}
|
||||
|
||||
describe('resolveDisconnectCause', () => {
|
||||
it('clean: process already exited with code 0', async () => {
|
||||
const { resolveDisconnectCause } = await import('../src/browser-manager');
|
||||
const fake = makeFakeBrowser({ exitCode: 0, signalCode: null });
|
||||
expect(await resolveDisconnectCause(fake as never)).toBe('clean');
|
||||
});
|
||||
|
||||
it('crash: non-zero exit code', async () => {
|
||||
const { resolveDisconnectCause } = await import('../src/browser-manager');
|
||||
const fake = makeFakeBrowser({ exitCode: 1, signalCode: null });
|
||||
expect(await resolveDisconnectCause(fake as never)).toBe('crash');
|
||||
});
|
||||
|
||||
it('crash: SIGSEGV', async () => {
|
||||
const { resolveDisconnectCause } = await import('../src/browser-manager');
|
||||
const fake = makeFakeBrowser({ exitCode: null, signalCode: 'SIGSEGV' });
|
||||
expect(await resolveDisconnectCause(fake as never)).toBe('crash');
|
||||
});
|
||||
|
||||
it('crash: SIGKILL', async () => {
|
||||
const { resolveDisconnectCause } = await import('../src/browser-manager');
|
||||
const fake = makeFakeBrowser({ exitCode: null, signalCode: 'SIGKILL' });
|
||||
expect(await resolveDisconnectCause(fake as never)).toBe('crash');
|
||||
});
|
||||
|
||||
it('clean: process exits asynchronously with code 0 within timeout', async () => {
|
||||
const { resolveDisconnectCause } = await import('../src/browser-manager');
|
||||
const fake = makeFakeBrowser({ exitCode: 0, signalCode: null, exitDelay: 50 });
|
||||
expect(await resolveDisconnectCause(fake as never)).toBe('clean');
|
||||
});
|
||||
|
||||
it('crash: process exits asynchronously with non-zero code', async () => {
|
||||
const { resolveDisconnectCause } = await import('../src/browser-manager');
|
||||
const fake = makeFakeBrowser({ exitCode: 137, signalCode: null, exitDelay: 50 });
|
||||
expect(await resolveDisconnectCause(fake as never)).toBe('crash');
|
||||
});
|
||||
|
||||
it('crash: null browser returns crash (defensive default)', async () => {
|
||||
const { resolveDisconnectCause } = await import('../src/browser-manager');
|
||||
expect(await resolveDisconnectCause(null)).toBe('crash');
|
||||
});
|
||||
});
|
||||
|
||||
// ─── onDisconnect exit-code propagation (regression test) ──────────
|
||||
//
|
||||
// The contract: BrowserManager.onDisconnect is called with the resolved
|
||||
// exit code (0 for clean Cmd+Q, 2 for crash). server.ts then forwards
|
||||
// that code to activeShutdown(), which exits the process.
|
||||
//
|
||||
// Without this propagation, the headed-mode user-visible Cmd+Q respawn
|
||||
// bug returns: server.ts hardcoded `activeShutdown?.(2)` ignores the
|
||||
// resolved 0 and gbrowser's gbd HealthMonitor treats the clean quit as
|
||||
// a crash, restarting the window.
|
||||
describe('BrowserManager.onDisconnect exit-code propagation', () => {
|
||||
it('signature accepts an optional exitCode argument', async () => {
|
||||
const { BrowserManager } = await import('../src/browser-manager');
|
||||
const bm = new BrowserManager();
|
||||
const calls: Array<number | undefined> = [];
|
||||
bm.onDisconnect = (code?: number) => { calls.push(code); };
|
||||
bm.onDisconnect(0);
|
||||
bm.onDisconnect(2);
|
||||
bm.onDisconnect(undefined);
|
||||
expect(calls).toEqual([0, 2, undefined]);
|
||||
});
|
||||
|
||||
it('server.ts callback forwards exitCode when provided, falls back to 2', async () => {
|
||||
// Mirror the production wiring in browse/src/server.ts so a refactor
|
||||
// that drops the forward (e.g. reverting to `() => activeShutdown?.(2)`)
|
||||
// fails CI before the user-visible bug returns.
|
||||
const shutdownCalls: number[] = [];
|
||||
const activeShutdown = (code: number) => { shutdownCalls.push(code); };
|
||||
const onDisconnect = (code?: number) => activeShutdown(code ?? 2);
|
||||
onDisconnect(0);
|
||||
onDisconnect(2);
|
||||
onDisconnect(undefined);
|
||||
expect(shutdownCalls).toEqual([0, 2, 2]);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -178,17 +178,7 @@ describe('buildSpawnEnv', () => {
|
|||
process.env.LANG = 'en_US.UTF-8';
|
||||
});
|
||||
afterEach(() => {
|
||||
// process.env = origEnv replaces only the reference; the underlying
|
||||
// env stays mutated and leaks to later test files in the same Bun
|
||||
// process (e.g., breaks Bun.which('bash') in security.test.ts and
|
||||
// bun-spawn in pair-agent-tunnel-eval.test.ts). Delete every current
|
||||
// key then re-assign from the snapshot — restores the actual env.
|
||||
for (const k of Object.keys(process.env)) {
|
||||
if (!(k in origEnv)) delete process.env[k];
|
||||
}
|
||||
for (const [k, v] of Object.entries(origEnv)) {
|
||||
if (v !== undefined) process.env[k] = v;
|
||||
}
|
||||
process.env = origEnv;
|
||||
});
|
||||
|
||||
it('untrusted: drops $HOME and secrets', () => {
|
||||
|
|
@ -303,15 +293,7 @@ describe.skipIf(SKIP_SPAWN)('spawnSkill: lifecycle', () => {
|
|||
expect(parsed.gh).toBeNull();
|
||||
expect(parsed.gstack).toBeNull();
|
||||
} finally {
|
||||
// See afterEach comment in `buildSpawnEnv` describe — direct
|
||||
// reassignment of process.env doesn't actually restore the
|
||||
// underlying env in Bun. Delete + re-assign instead.
|
||||
for (const k of Object.keys(process.env)) {
|
||||
if (!(k in origEnv)) delete process.env[k];
|
||||
}
|
||||
for (const [k, v] of Object.entries(origEnv)) {
|
||||
if (v !== undefined) process.env[k] = v;
|
||||
}
|
||||
process.env = origEnv;
|
||||
}
|
||||
});
|
||||
|
||||
|
|
@ -330,12 +312,7 @@ describe.skipIf(SKIP_SPAWN)('spawnSkill: lifecycle', () => {
|
|||
const parsed = JSON.parse(result.stdout);
|
||||
expect(parsed.home).toBe('/Users/test-user');
|
||||
} finally {
|
||||
for (const k of Object.keys(process.env)) {
|
||||
if (!(k in origEnv)) delete process.env[k];
|
||||
}
|
||||
for (const [k, v] of Object.entries(origEnv)) {
|
||||
if (v !== undefined) process.env[k] = v;
|
||||
}
|
||||
process.env = origEnv;
|
||||
}
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -1,95 +0,0 @@
|
|||
import { describe, test, expect, beforeEach } from 'bun:test';
|
||||
import type { Page } from 'playwright';
|
||||
import {
|
||||
__testInternals,
|
||||
undoModification,
|
||||
} from '../src/cdp-inspector';
|
||||
|
||||
// Regression tests for the modificationHistory cap (D6 / smoking gun #2).
|
||||
// Pre-cap, the module-scoped array grew unbounded across the session. Cap is
|
||||
// 200 entries, oldest evicted on push past the cap. undoModification reports
|
||||
// "evicted at the cap" in the error message so a user who asks for a
|
||||
// no-longer-available index understands what happened (instead of seeing the
|
||||
// pre-cap "No modification at index 500" with no context).
|
||||
|
||||
const { pushModification, MOD_HISTORY_CAP, getRawHistory, getTotalPushed, resetForTest } = __testInternals;
|
||||
|
||||
function fakeMod(id: number) {
|
||||
return {
|
||||
selector: `#node-${id}`,
|
||||
property: 'color',
|
||||
oldValue: 'red',
|
||||
newValue: 'blue',
|
||||
source: 'inline' as const,
|
||||
timestamp: id,
|
||||
method: 'setProperty' as 'setProperty',
|
||||
};
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
resetForTest();
|
||||
});
|
||||
|
||||
describe('modificationHistory cap', () => {
|
||||
test('1. push under cap keeps every entry', () => {
|
||||
for (let i = 0; i < 50; i++) pushModification(fakeMod(i));
|
||||
expect(getRawHistory().length).toBe(50);
|
||||
expect(getTotalPushed()).toBe(50);
|
||||
expect(getRawHistory()[0].timestamp).toBe(0);
|
||||
expect(getRawHistory()[49].timestamp).toBe(49);
|
||||
});
|
||||
|
||||
test('2. push exactly cap keeps every entry', () => {
|
||||
for (let i = 0; i < MOD_HISTORY_CAP; i++) pushModification(fakeMod(i));
|
||||
expect(getRawHistory().length).toBe(MOD_HISTORY_CAP);
|
||||
expect(getTotalPushed()).toBe(MOD_HISTORY_CAP);
|
||||
expect(getRawHistory()[0].timestamp).toBe(0);
|
||||
});
|
||||
|
||||
test('3. push past cap evicts oldest, keeps length at cap', () => {
|
||||
const total = MOD_HISTORY_CAP + 50;
|
||||
for (let i = 0; i < total; i++) pushModification(fakeMod(i));
|
||||
expect(getRawHistory().length).toBe(MOD_HISTORY_CAP);
|
||||
expect(getTotalPushed()).toBe(total);
|
||||
// Oldest 50 dropped — entry that was #0 is gone; new oldest is #50.
|
||||
expect(getRawHistory()[0].timestamp).toBe(50);
|
||||
expect(getRawHistory()[MOD_HISTORY_CAP - 1].timestamp).toBe(total - 1);
|
||||
});
|
||||
|
||||
test('4. resetForTest clears both buffer and totalPushed', () => {
|
||||
for (let i = 0; i < 10; i++) pushModification(fakeMod(i));
|
||||
resetForTest();
|
||||
expect(getRawHistory().length).toBe(0);
|
||||
expect(getTotalPushed()).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('undoModification eviction-aware error', () => {
|
||||
// Stub Page: undoModification throws before any await when idx is out of
|
||||
// range, so the stub never actually gets called.
|
||||
const stubPage = {} as unknown as Page;
|
||||
|
||||
test('5. out-of-range BEFORE any eviction → no evicted note', async () => {
|
||||
for (let i = 0; i < 5; i++) pushModification(fakeMod(i));
|
||||
await expect(undoModification(stubPage, 99)).rejects.toThrow(
|
||||
'No modification at index 99. History has 5 entries.',
|
||||
);
|
||||
});
|
||||
|
||||
test('6. out-of-range AFTER eviction → message names the evicted count', async () => {
|
||||
const total = MOD_HISTORY_CAP + 73;
|
||||
for (let i = 0; i < total; i++) pushModification(fakeMod(i));
|
||||
// 273 pushed, 200 in buffer, 73 evicted. Ask for idx=400 (above buffer).
|
||||
await expect(undoModification(stubPage, 400)).rejects.toThrow(
|
||||
`No modification at index 400. History has ${MOD_HISTORY_CAP} entries ` +
|
||||
`(most recent ${MOD_HISTORY_CAP} only — 73 earlier entries evicted at the cap).`,
|
||||
);
|
||||
});
|
||||
|
||||
test('7. negative explicit index throws cleanly (no NaN propagation)', async () => {
|
||||
for (let i = 0; i < 10; i++) pushModification(fakeMod(i));
|
||||
await expect(undoModification(stubPage, -1)).rejects.toThrow(
|
||||
'No modification at index -1.',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
@ -1,171 +0,0 @@
|
|||
import { describe, test, expect } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import type { Page } from 'playwright';
|
||||
import { withCdpSession, getOrCreateCdpSession } from '../src/cdp-bridge';
|
||||
|
||||
// Static-grep tripwire + behavior tests for the CDP session lifecycle
|
||||
// helpers introduced as part of the D11 EXPAND_SCOPE memory-leak fix.
|
||||
//
|
||||
// Direct calls to `page.context().newCDPSession(page)` are the leak class
|
||||
// the helpers exist to close — every direct call needs a matching
|
||||
// `session.detach()` and forgetting it leaves the Chromium-side target
|
||||
// attached until the underlying transport drops. The tripwire fails CI
|
||||
// if any source file calls `newCDPSession(` outside `cdp-bridge.ts`
|
||||
// (the file that owns the helpers).
|
||||
//
|
||||
// Pattern mirrors browse/test/terminal-agent-pid-identity.test.ts and
|
||||
// browse/test/server-sanitize-surrogates.test.ts: read source files
|
||||
// directly, assert an invariant on their contents.
|
||||
|
||||
const SRC_DIR = path.resolve(new URL(import.meta.url).pathname, '..', '..', 'src');
|
||||
|
||||
function readAllSourceFiles(): Array<{ file: string; content: string }> {
|
||||
const out: Array<{ file: string; content: string }> = [];
|
||||
for (const entry of fs.readdirSync(SRC_DIR)) {
|
||||
if (!entry.endsWith('.ts')) continue;
|
||||
const full = path.join(SRC_DIR, entry);
|
||||
out.push({ file: entry, content: fs.readFileSync(full, 'utf-8') });
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
describe('CDP session cleanup invariant', () => {
|
||||
test('1. no source file calls `newCDPSession(` outside cdp-bridge.ts', () => {
|
||||
const offenders: Array<{ file: string; line: number; text: string }> = [];
|
||||
for (const { file, content } of readAllSourceFiles()) {
|
||||
// The helper file is the ONE allowed home for direct newCDPSession calls.
|
||||
if (file === 'cdp-bridge.ts') continue;
|
||||
const lines = content.split('\n');
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const line = lines[i];
|
||||
if (!/newCDPSession\s*\(/.test(line)) continue;
|
||||
// Skip comment lines — documentation mentions are fine.
|
||||
const trimmed = line.trim();
|
||||
if (trimmed.startsWith('//') || trimmed.startsWith('*')) continue;
|
||||
offenders.push({ file, line: i + 1, text: trimmed });
|
||||
}
|
||||
}
|
||||
if (offenders.length > 0) {
|
||||
const formatted = offenders
|
||||
.map((o) => ` ${o.file}:${o.line} ${o.text}`)
|
||||
.join('\n');
|
||||
throw new Error(
|
||||
`Direct newCDPSession(...) calls found outside cdp-bridge.ts. ` +
|
||||
`Route through withCdpSession() (one-shot, finally-detach) or ` +
|
||||
`getOrCreateCdpSession() (cached, close-detach) instead:\n${formatted}`,
|
||||
);
|
||||
}
|
||||
expect(offenders).toEqual([]);
|
||||
});
|
||||
|
||||
test('2. helper file exports the two documented entry points', () => {
|
||||
// Sanity: the tripwire is meaningless if the helpers themselves are gone.
|
||||
expect(typeof withCdpSession).toBe('function');
|
||||
expect(typeof getOrCreateCdpSession).toBe('function');
|
||||
});
|
||||
});
|
||||
|
||||
describe('withCdpSession finally-detach', () => {
|
||||
// Fake Page surface for unit-testing the helper without spinning up a real
|
||||
// browser. The helper only touches page.context().newCDPSession(page) and
|
||||
// the returned session's .detach(), so this surface is enough.
|
||||
function makeFakePage(detachSpy: { called: number; rejected?: Error }) {
|
||||
const session = {
|
||||
detach: async () => {
|
||||
detachSpy.called++;
|
||||
if (detachSpy.rejected) throw detachSpy.rejected;
|
||||
},
|
||||
};
|
||||
return {
|
||||
context: () => ({
|
||||
newCDPSession: async (_p: unknown) => session,
|
||||
}),
|
||||
} as unknown as Page;
|
||||
}
|
||||
|
||||
test('3. detaches on the success path', async () => {
|
||||
const detachSpy = { called: 0 };
|
||||
const page = makeFakePage(detachSpy);
|
||||
const result = await withCdpSession(page, async (session) => {
|
||||
expect(session).toBeDefined();
|
||||
return 42;
|
||||
});
|
||||
expect(result).toBe(42);
|
||||
expect(detachSpy.called).toBe(1);
|
||||
});
|
||||
|
||||
test('4. detaches even when fn throws (the actual leak fix)', async () => {
|
||||
const detachSpy = { called: 0 };
|
||||
const page = makeFakePage(detachSpy);
|
||||
await expect(
|
||||
withCdpSession(page, async () => {
|
||||
throw new Error('boom');
|
||||
}),
|
||||
).rejects.toThrow('boom');
|
||||
expect(detachSpy.called).toBe(1);
|
||||
});
|
||||
|
||||
test('5. swallows detach errors so they do not mask fn errors', async () => {
|
||||
const detachSpy = { called: 0, rejected: new Error('already detached') };
|
||||
const page = makeFakePage(detachSpy);
|
||||
await expect(
|
||||
withCdpSession(page, async () => {
|
||||
throw new Error('original');
|
||||
}),
|
||||
).rejects.toThrow('original');
|
||||
expect(detachSpy.called).toBe(1);
|
||||
});
|
||||
|
||||
test('6. swallows detach errors on the success path too', async () => {
|
||||
const detachSpy = { called: 0, rejected: new Error('target closed') };
|
||||
const page = makeFakePage(detachSpy);
|
||||
const result = await withCdpSession(page, async () => 'ok');
|
||||
expect(result).toBe('ok');
|
||||
expect(detachSpy.called).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getOrCreateCdpSession close-detach', () => {
|
||||
function makeFakePage() {
|
||||
const closeListeners: Array<() => void> = [];
|
||||
const session = {
|
||||
detach: async () => {
|
||||
session._detachCount++;
|
||||
},
|
||||
_detachCount: 0,
|
||||
};
|
||||
const page = {
|
||||
context: () => ({
|
||||
newCDPSession: async (_p: unknown) => session,
|
||||
}),
|
||||
once: (event: string, fn: () => void) => {
|
||||
if (event === 'close') closeListeners.push(fn);
|
||||
},
|
||||
_fireClose: () => {
|
||||
for (const fn of closeListeners) fn();
|
||||
},
|
||||
};
|
||||
return { page: page as unknown as Page, session, fireClose: page._fireClose };
|
||||
}
|
||||
|
||||
test('7. caches the session across calls', async () => {
|
||||
const { page } = makeFakePage();
|
||||
const cache = new WeakMap<Page, any>();
|
||||
const s1 = await getOrCreateCdpSession(page, cache);
|
||||
const s2 = await getOrCreateCdpSession(page, cache);
|
||||
expect(s1).toBe(s2);
|
||||
});
|
||||
|
||||
test('8. close hook detaches the session AND clears the cache', async () => {
|
||||
const { page, session, fireClose } = makeFakePage();
|
||||
const cache = new WeakMap<Page, any>();
|
||||
await getOrCreateCdpSession(page, cache);
|
||||
expect(cache.get(page)).toBeDefined();
|
||||
fireClose();
|
||||
// Detach runs synchronously up to the await in the close hook; let it settle.
|
||||
await new Promise((r) => setTimeout(r, 0));
|
||||
expect(cache.get(page)).toBeUndefined();
|
||||
expect(session._detachCount).toBe(1);
|
||||
});
|
||||
});
|
||||
|
|
@ -1,75 +0,0 @@
|
|||
/**
|
||||
* Coverage for #1612 — macOS/Linux server must survive sandboxed-shell
|
||||
* harnesses by becoming its own session leader (setsid).
|
||||
*
|
||||
* Pre-#1612, Bun.spawn().unref() removed the child from Bun's event loop
|
||||
* but did NOT call setsid(). When the CLI ran inside Claude Code's
|
||||
* per-command sandbox, Conductor, or CI step runners, the session leader's
|
||||
* exit sent SIGHUP to every PID in the session, killing the bun server.
|
||||
*
|
||||
* The fix routes macOS/Linux spawn through Node's child_process.spawn with
|
||||
* detached:true, which calls setsid() so the server becomes its own session
|
||||
* leader (PPID=1 on Linux, similar reparenting on Darwin).
|
||||
*
|
||||
* The actual setsid syscall is hard to assert in a unit test without a
|
||||
* real spawn — testing here is static: the cli.ts source must use the
|
||||
* Node spawn path on macOS/Linux, with detached:true and .unref(). If a
|
||||
* future refactor reverts to Bun.spawn().unref() on the macOS/Linux branch
|
||||
* the regression returns and these tests fail.
|
||||
*/
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, "..", "..");
|
||||
const CLI = path.join(ROOT, "browse", "src", "cli.ts");
|
||||
|
||||
function read(): string {
|
||||
return fs.readFileSync(CLI, "utf-8");
|
||||
}
|
||||
|
||||
describe("#1612 macOS/Linux daemonize via Node setsid path", () => {
|
||||
test("cli.ts imports nodeSpawn from child_process (Node spawn alias)", () => {
|
||||
const body = read();
|
||||
// The fix relies on Node's child_process.spawn (which calls setsid on
|
||||
// detached:true), aliased to avoid name collision with Bun.spawn. Match
|
||||
// either `nodeSpawn` or `spawn as nodeSpawn` to be flexible to the
|
||||
// exact import style.
|
||||
expect(body).toMatch(/(spawn as nodeSpawn|nodeSpawn\s*[,}])/);
|
||||
expect(body).toMatch(/from\s+['"]child_process['"]/);
|
||||
});
|
||||
|
||||
test("non-Windows branch uses nodeSpawn(...).unref() with detached:true", () => {
|
||||
const body = read();
|
||||
// Find the non-Windows branch and assert it uses the Node spawn alias
|
||||
// with detached:true. Match the pattern `nodeSpawn(...) ... detached:true`.
|
||||
expect(body).toMatch(/nodeSpawn\([\s\S]{0,500}detached:\s*true/);
|
||||
expect(body).toMatch(/nodeSpawn\([\s\S]{0,500}\.unref\(\)/);
|
||||
});
|
||||
|
||||
test("non-Windows branch comment documents setsid/SIGHUP root cause", () => {
|
||||
const body = read();
|
||||
// The comment block must mention setsid() so a future refactor sees the
|
||||
// why before changing the spawn call.
|
||||
expect(body).toMatch(/setsid/);
|
||||
expect(body).toMatch(/SIGHUP/);
|
||||
});
|
||||
|
||||
test("the spawn call on macOS/Linux is nodeSpawn, not Bun.spawn", () => {
|
||||
const body = read();
|
||||
// Strip line comments before regex matching, so the "Bun.spawn().unref()"
|
||||
// mentions inside the explanatory comment don't trigger false positives.
|
||||
const codeOnly = body
|
||||
.split("\n")
|
||||
.filter((line) => !line.trim().startsWith("//"))
|
||||
.join("\n");
|
||||
// Find the non-Windows branch. The `} else {` block following the
|
||||
// Windows branch. We then require its first ~400 chars contain a
|
||||
// nodeSpawn() call and NOT a Bun.spawn() call (excluding the comment).
|
||||
const nonWindowsStart = codeOnly.indexOf("nodeSpawn('bun'");
|
||||
expect(nonWindowsStart).toBeGreaterThan(-1);
|
||||
const slice = codeOnly.slice(nonWindowsStart, nonWindowsStart + 400);
|
||||
expect(slice).toMatch(/nodeSpawn\(/);
|
||||
expect(slice).not.toMatch(/Bun\.spawn\(/);
|
||||
});
|
||||
});
|
||||
|
|
@ -1,81 +0,0 @@
|
|||
import { describe, test, expect } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
// v1.44 outer supervisor — static-grep invariants.
|
||||
//
|
||||
// Pre-v1.44 `$B connect` was fire-and-forget: spawn server detached, CLI
|
||||
// exits, server runs unsupervised. If the server crashed, the user had to
|
||||
// re-run `$B connect`. The opt-in supervisor (--supervise or
|
||||
// BROWSE_SUPERVISE=1) keeps the CLI attached and respawns the server on
|
||||
// unexpected exit, with the same crash-loop guard shape as the v1.44
|
||||
// terminal-agent watchdog.
|
||||
//
|
||||
// Live respawn tests belong in the e2e tier (real Bun.spawn cycles take
|
||||
// 3-8s each). These tripwires defend the load-bearing invariants:
|
||||
// opt-in by default, signal handlers wired, crash-loop guard, env knobs.
|
||||
|
||||
const CLI_TS = path.resolve(new URL(import.meta.url).pathname, '..', '..', 'src', 'cli.ts');
|
||||
|
||||
describe('CLI outer supervisor (v1.44+)', () => {
|
||||
test('1. supervisor is opt-in via --supervise flag or BROWSE_SUPERVISE env', () => {
|
||||
const src = fs.readFileSync(CLI_TS, 'utf-8');
|
||||
expect(src).toContain("commandArgs.includes('--supervise')");
|
||||
expect(src).toContain("process.env.BROWSE_SUPERVISE === '1'");
|
||||
// Default path MUST still exit 0 promptly. The legacy contract is
|
||||
// that every caller of `$B connect` (Claude Code Bash tool, scripts,
|
||||
// CI) gets a prompt return.
|
||||
expect(src).toMatch(/if \(!superviseRequested\) \{\s*process\.exit\(0\);\s*\}/);
|
||||
});
|
||||
|
||||
test('2. SIGINT and SIGTERM trigger clean teardown', () => {
|
||||
const src = fs.readFileSync(CLI_TS, 'utf-8');
|
||||
// Both signals must hit the teardown path or the user's Ctrl-C leaves
|
||||
// an orphaned server (worse than no supervisor).
|
||||
expect(src).toMatch(/process\.on\('SIGINT'.*teardownAndExit/);
|
||||
expect(src).toMatch(/process\.on\('SIGTERM'.*teardownAndExit/);
|
||||
// Teardown must signal the supervised server before exiting itself.
|
||||
expect(src).toContain("safeKill(state.pid, 'SIGTERM')");
|
||||
});
|
||||
|
||||
test('3. crash-loop guard with 5-in-5min rolling window', () => {
|
||||
const src = fs.readFileSync(CLI_TS, 'utf-8');
|
||||
expect(src).toContain('SUPERVISOR_GUARD_WINDOW_MS = 5 * 60_000');
|
||||
expect(src).toContain('SUPERVISOR_GUARD_MAX = 5');
|
||||
// Window pruning: a long-lived daemon with sporadic crashes must NOT
|
||||
// hit the guard (otherwise we punish the user for the supervisor doing
|
||||
// its job).
|
||||
expect(src).toMatch(/respawns\.shift\(\)/);
|
||||
});
|
||||
|
||||
test('4. exponential backoff schedule, env-overridable', () => {
|
||||
const src = fs.readFileSync(CLI_TS, 'utf-8');
|
||||
expect(src).toContain('GSTACK_SUPERVISOR_BACKOFF');
|
||||
// Default schedule must include short waits at first (rapid recovery
|
||||
// from transient crashes) and cap at a sensible long wait.
|
||||
expect(src).toContain('1000,2000,4000,8000,30000');
|
||||
});
|
||||
|
||||
test('5. tick interval is env-overridable for tests', () => {
|
||||
const src = fs.readFileSync(CLI_TS, 'utf-8');
|
||||
expect(src).toContain('GSTACK_SUPERVISOR_TICK_MS');
|
||||
});
|
||||
|
||||
test('6. respawned server gets a fresh terminal-agent too', () => {
|
||||
const src = fs.readFileSync(CLI_TS, 'utf-8');
|
||||
// After server respawn, the terminal-agent state is stale (old PID
|
||||
// record points to a dead agent that exited with its parent). The
|
||||
// supervisor must re-call spawnTerminalAgent or the PTY path stays
|
||||
// broken even though the server is back up.
|
||||
const block = sliceBetween(src, 'Supervisor mode:', '// ─── Headed Disconnect');
|
||||
expect(block).toContain('spawnTerminalAgent({');
|
||||
});
|
||||
});
|
||||
|
||||
function sliceBetween(source: string, start: string, end: string): string {
|
||||
const i = source.indexOf(start);
|
||||
if (i === -1) throw new Error(`marker not found: ${start}`);
|
||||
const j = source.indexOf(end, i + start.length);
|
||||
if (j === -1) throw new Error(`end marker not found: ${end}`);
|
||||
return source.slice(i, j);
|
||||
}
|
||||
|
|
@ -47,15 +47,4 @@ describe('locateBinary', () => {
|
|||
expect(typeof locateBinary).toBe('function');
|
||||
expect(locateBinary.length).toBe(0);
|
||||
});
|
||||
|
||||
test('source-checkout fallback resolves <git-root>/browse/dist/browse[.exe]', () => {
|
||||
// The windows-setup-e2e.yml workflow builds binaries directly under
|
||||
// browse/dist/ (no .claude/skills/gstack/ install layout). find-browse
|
||||
// must resolve those — otherwise every fresh build that hasn't run
|
||||
// ./setup yet looks broken. Static pin so a future refactor that
|
||||
// drops the source-checkout branch trips this test.
|
||||
const src = require('fs').readFileSync(require('path').join(__dirname, '../src/find-browse.ts'), 'utf-8');
|
||||
expect(src).toContain('Source-checkout fallback');
|
||||
expect(src).toContain("join(root, 'browse', 'dist', 'browse')");
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
import { describe, test, expect } from 'bun:test';
|
||||
import * as net from 'net';
|
||||
import * as path from 'path';
|
||||
import { __testInternals__ } from '../src/server';
|
||||
|
||||
const polyfillPath = path.resolve(import.meta.dir, '../src/bun-polyfill.cjs');
|
||||
|
||||
|
|
@ -29,47 +28,6 @@ function getFreePort(): Promise<number> {
|
|||
}
|
||||
|
||||
describe('findPort / isPortAvailable', () => {
|
||||
test('explicit BROWSE_PORT diagnostic distinguishes bind denial from occupied port', () => {
|
||||
const blocked = __testInternals__.formatExplicitPortUnavailableError(34567, {
|
||||
available: false,
|
||||
code: 'EPERM',
|
||||
message: 'operation not permitted',
|
||||
}).message;
|
||||
|
||||
expect(blocked).toContain('Cannot bind BROWSE_PORT=34567');
|
||||
expect(blocked).toContain('localhost port binding is blocked');
|
||||
expect(blocked).toContain('not that the port is occupied');
|
||||
|
||||
const occupied = __testInternals__.formatExplicitPortUnavailableError(34567, {
|
||||
available: false,
|
||||
code: 'EADDRINUSE',
|
||||
message: 'address already in use',
|
||||
}).message;
|
||||
|
||||
expect(occupied).toBe('[browse] Port 34567 (from BROWSE_PORT env) is in use');
|
||||
});
|
||||
|
||||
test('random port diagnostic calls out sandbox-style bind denial', () => {
|
||||
const message = __testInternals__.formatRandomPortUnavailableError([
|
||||
{ port: 11001, result: { available: false, code: 'EADDRINUSE', message: 'address already in use' } },
|
||||
{ port: 12002, result: { available: false, code: 'EPERM', message: 'operation not permitted' } },
|
||||
]).message;
|
||||
|
||||
expect(message).toContain('Cannot bind localhost ports after 2 attempts');
|
||||
expect(message).toContain('Last error: 12002 (EPERM: operation not permitted)');
|
||||
expect(message).toContain('not that every sampled port is occupied');
|
||||
expect(message).toContain('set BROWSE_PORT to an approved port');
|
||||
});
|
||||
|
||||
test('random port diagnostic preserves old busy-port meaning when all attempts are occupied', () => {
|
||||
const message = __testInternals__.formatRandomPortUnavailableError([
|
||||
{ port: 11001, result: { available: false, code: 'EADDRINUSE', message: 'address already in use' } },
|
||||
{ port: 12002, result: { available: false, code: 'EADDRINUSE', message: 'address already in use' } },
|
||||
]).message;
|
||||
|
||||
expect(message).toContain('No available port after 5 attempts');
|
||||
expect(message).toContain('every sampled port was already in use');
|
||||
});
|
||||
|
||||
test('isPortAvailable returns true for a free port', async () => {
|
||||
// Use the same isPortAvailable logic from server.ts
|
||||
|
|
|
|||
|
|
@ -1,247 +0,0 @@
|
|||
import { describe, test, expect } from 'bun:test';
|
||||
import { formatBytes, type MemorySnapshot, type MemoryStructureStats } from '../src/memory-snapshot';
|
||||
|
||||
// Unit coverage for the $B memory diagnostic surface — formatter, byte
|
||||
// renderer, and the structures-stats aggregator. The integration path
|
||||
// ($B memory through the BrowserManager → CDP) requires a real headless
|
||||
// Chromium and is covered indirectly by browse-basic in the eval suite.
|
||||
// These tests pin the renderer logic in isolation so format regressions
|
||||
// (rounded GB drift, missing "and N more" tail, snapshot.notes ordering)
|
||||
// surface immediately.
|
||||
|
||||
// ─── formatBytes() ─────────────────────────────────────────────
|
||||
|
||||
describe('formatBytes', () => {
|
||||
test('1. < 1 KB renders as bytes', () => {
|
||||
expect(formatBytes(0)).toBe('0 B');
|
||||
expect(formatBytes(1)).toBe('1 B');
|
||||
expect(formatBytes(1023)).toBe('1023 B');
|
||||
});
|
||||
|
||||
test('2. KB tier (1024 ... 1024^2-1)', () => {
|
||||
expect(formatBytes(1024)).toBe('1.0 KB');
|
||||
expect(formatBytes(1536)).toBe('1.5 KB');
|
||||
expect(formatBytes(1024 * 1024 - 1)).toMatch(/^1024\.0 KB$|^1023\.\d KB$/);
|
||||
});
|
||||
|
||||
test('3. MB tier', () => {
|
||||
expect(formatBytes(1024 * 1024)).toBe('1.0 MB');
|
||||
expect(formatBytes(312 * 1024 * 1024)).toBe('312.0 MB');
|
||||
});
|
||||
|
||||
test('4. GB tier renders with 2 decimals', () => {
|
||||
expect(formatBytes(1024 * 1024 * 1024)).toBe('1.00 GB');
|
||||
expect(formatBytes(1.4 * 1024 * 1024 * 1024)).toMatch(/^1\.40 GB$/);
|
||||
// 160.61 GB — the friend's OOM number from the original screenshot.
|
||||
// Verify the renderer doesn't blow up at the actual leak scale.
|
||||
const big = 160.61 * 1024 * 1024 * 1024;
|
||||
expect(formatBytes(big)).toMatch(/^160\.6\d GB$/);
|
||||
});
|
||||
|
||||
test('5. negative input behavior — coerces to bytes path (best-effort, do not throw)', () => {
|
||||
// Diagnostic should never crash on a weird CDP reading; render
|
||||
// something reasonable.
|
||||
expect(() => formatBytes(-1)).not.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
// ─── handleMemoryCommand text + json output ────────────────────
|
||||
|
||||
// Build a minimal MemorySnapshot fixture exercising every render branch.
|
||||
// This is what bm.getMemorySnapshot would return; we stub the BrowserManager
|
||||
// so the test never spins up real Chromium.
|
||||
function makeStructureStats(): MemoryStructureStats {
|
||||
return {
|
||||
modificationHistory: { current: 42, cap: 200, evicted: 0 },
|
||||
activitySubscribers: 1,
|
||||
inspectorSubscribers: 0,
|
||||
consoleBufferLen: 1842,
|
||||
networkBufferLen: 12000,
|
||||
dialogBufferLen: 3,
|
||||
captureBufferBytes: 0,
|
||||
};
|
||||
}
|
||||
|
||||
function makeSnapshot(overrides: Partial<MemorySnapshot> = {}): MemorySnapshot {
|
||||
return {
|
||||
bunServer: {
|
||||
rss: 312 * 1024 * 1024,
|
||||
heapUsed: 84 * 1024 * 1024,
|
||||
heapTotal: 120 * 1024 * 1024,
|
||||
external: 21 * 1024 * 1024,
|
||||
},
|
||||
tabs: [],
|
||||
processes: null,
|
||||
structures: makeStructureStats(),
|
||||
capturedAt: 1700000000000,
|
||||
notes: [],
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
// Mock BrowserManager surface for handleMemoryCommand. Only
|
||||
// getMemorySnapshot is touched.
|
||||
function makeFakeBm(snapshot: MemorySnapshot) {
|
||||
return {
|
||||
getMemorySnapshot: async (structures: MemoryStructureStats) => ({
|
||||
...snapshot,
|
||||
structures,
|
||||
}),
|
||||
} as unknown as import('../src/browser-manager').BrowserManager;
|
||||
}
|
||||
|
||||
describe('handleMemoryCommand', () => {
|
||||
test('6. --json mode emits parseable JSON with bunServer + structures', async () => {
|
||||
const { handleMemoryCommand } = await import('../src/memory-command');
|
||||
const snapshot = makeSnapshot();
|
||||
const result = await handleMemoryCommand(['--json'], makeFakeBm(snapshot));
|
||||
const parsed = JSON.parse(result);
|
||||
expect(parsed.bunServer.rss).toBe(312 * 1024 * 1024);
|
||||
expect(parsed.structures).toBeDefined();
|
||||
expect(parsed.structures.modificationHistory.cap).toBe(200);
|
||||
});
|
||||
|
||||
test('7. text mode renders Bun server line with RSS + heap', async () => {
|
||||
const { handleMemoryCommand } = await import('../src/memory-command');
|
||||
const result = await handleMemoryCommand([], makeFakeBm(makeSnapshot()));
|
||||
expect(result).toContain('Bun server:');
|
||||
expect(result).toContain('312.0 MB');
|
||||
expect(result).toContain('84.0 MB');
|
||||
});
|
||||
|
||||
test('8. text mode renders "no tabs tracked" when tabs array is empty', async () => {
|
||||
const { handleMemoryCommand } = await import('../src/memory-command');
|
||||
const result = await handleMemoryCommand([], makeFakeBm(makeSnapshot({ tabs: [] })));
|
||||
expect(result).toContain('Renderers:');
|
||||
expect(result).toContain('(no tabs tracked)');
|
||||
});
|
||||
|
||||
test('9. text mode shows top 10 tabs + "...and N more" tail when > 10', async () => {
|
||||
const { handleMemoryCommand } = await import('../src/memory-command');
|
||||
const tabs = Array.from({ length: 15 }, (_, i) => ({
|
||||
id: i,
|
||||
url: `https://example.com/tab${i}`,
|
||||
title: `Tab ${i}`,
|
||||
jsHeapUsed: (15 - i) * 50 * 1024 * 1024, // descending so sort matters
|
||||
jsHeapTotal: (15 - i) * 60 * 1024 * 1024,
|
||||
documents: 1,
|
||||
nodes: 100,
|
||||
listeners: 10,
|
||||
}));
|
||||
const result = await handleMemoryCommand([], makeFakeBm(makeSnapshot({ tabs })));
|
||||
expect(result).toContain('Renderers: 15 tabs');
|
||||
expect(result).toContain('and 5 more');
|
||||
// Sorted by JS heap descending — tab 0 (largest) should appear before tab 9
|
||||
expect(result.indexOf('tab #0 —')).toBeLessThan(result.indexOf('tab #9 —'));
|
||||
});
|
||||
|
||||
test('10. text mode renders Chromium processes grouped by type', async () => {
|
||||
const { handleMemoryCommand } = await import('../src/memory-command');
|
||||
const snapshot = makeSnapshot({
|
||||
processes: [
|
||||
{ id: 1, type: 'browser', cpuTime: 1.5 },
|
||||
{ id: 2, type: 'renderer', cpuTime: 3.2 },
|
||||
{ id: 3, type: 'renderer', cpuTime: 2.1 },
|
||||
{ id: 4, type: 'gpu', cpuTime: 0.5 },
|
||||
],
|
||||
});
|
||||
const result = await handleMemoryCommand([], makeFakeBm(snapshot));
|
||||
expect(result).toContain('Chromium processes: 4 total');
|
||||
expect(result).toContain('renderer=2');
|
||||
expect(result).toContain('browser=1');
|
||||
expect(result).toContain('gpu=1');
|
||||
});
|
||||
|
||||
test('11. text mode renders "unavailable" line when processes is null', async () => {
|
||||
const { handleMemoryCommand } = await import('../src/memory-command');
|
||||
const result = await handleMemoryCommand([], makeFakeBm(makeSnapshot({ processes: null })));
|
||||
expect(result).toContain('Chromium processes: (unavailable — see notes)');
|
||||
});
|
||||
|
||||
test('12. text mode renders modificationHistory with evicted-count when > 0', async () => {
|
||||
// formatSnapshotText is what we're really testing here — exercise it
|
||||
// directly with a known snapshot so the live collectStructureStats
|
||||
// doesn't override the fixture values.
|
||||
const mod = await import('../src/memory-command');
|
||||
// formatSnapshotText is private; reach via re-rendering through
|
||||
// --json mode then visually validating the JSON shape. The text-mode
|
||||
// renderer is exercised by test 13 below with live (zero) values.
|
||||
const stats = makeStructureStats();
|
||||
stats.modificationHistory = { current: 200, cap: 200, evicted: 47 };
|
||||
// Synthesize a "would-render" snapshot to assert the eviction note shape.
|
||||
const renderedExpected =
|
||||
'modificationHistory: 200 / 200 entries (47 evicted since reset)';
|
||||
// Since formatSnapshotText isn't exported, validate the format
|
||||
// contract by re-implementing the line and asserting our expectation
|
||||
// matches the canonical format. This pins the user-visible string
|
||||
// shape — a renderer change to drop the "evicted since reset" suffix
|
||||
// would fail this assertion.
|
||||
const evicted = stats.modificationHistory.evicted;
|
||||
const current = stats.modificationHistory.current;
|
||||
const cap = stats.modificationHistory.cap;
|
||||
const expected =
|
||||
`modificationHistory: ${current} / ${cap} entries` +
|
||||
(evicted > 0 ? ` (${evicted} evicted since reset)` : '');
|
||||
expect(expected).toBe(renderedExpected);
|
||||
void mod;
|
||||
});
|
||||
|
||||
test('13. text mode renders modificationHistory line shape', async () => {
|
||||
const { handleMemoryCommand } = await import('../src/memory-command');
|
||||
const result = await handleMemoryCommand([], makeFakeBm(makeSnapshot()));
|
||||
// collectStructureStats reads live module state; values may be 0 in
|
||||
// the test env. Verify the LINE SHAPE rather than specific numbers.
|
||||
expect(result).toMatch(/modificationHistory:\s+\d+ \/ \d+ entries/);
|
||||
});
|
||||
|
||||
test('14. text mode prints notes section when notes are present', async () => {
|
||||
const { handleMemoryCommand } = await import('../src/memory-command');
|
||||
const snapshot = makeSnapshot({
|
||||
notes: ['Per-Chromium-process RSS not collected — CDP limitation.'],
|
||||
});
|
||||
const result = await handleMemoryCommand([], makeFakeBm(snapshot));
|
||||
expect(result).toContain('Notes:');
|
||||
expect(result).toContain('CDP limitation.');
|
||||
});
|
||||
|
||||
test('15. text mode omits notes section when notes is empty', async () => {
|
||||
const { handleMemoryCommand } = await import('../src/memory-command');
|
||||
const result = await handleMemoryCommand([], makeFakeBm(makeSnapshot({ notes: [] })));
|
||||
expect(result).not.toContain('Notes:');
|
||||
});
|
||||
|
||||
test('16. text mode truncates long tab URLs with ellipsis', async () => {
|
||||
const { handleMemoryCommand } = await import('../src/memory-command');
|
||||
const longUrl = 'https://example.com/' + 'a'.repeat(120);
|
||||
const tabs = [{
|
||||
id: 1,
|
||||
url: longUrl,
|
||||
title: 'long',
|
||||
jsHeapUsed: 1024,
|
||||
jsHeapTotal: 2048,
|
||||
documents: 1,
|
||||
nodes: 10,
|
||||
listeners: 1,
|
||||
}];
|
||||
const result = await handleMemoryCommand([], makeFakeBm(makeSnapshot({ tabs })));
|
||||
expect(result).toContain('...');
|
||||
// The truncated URL appears, the full URL does not
|
||||
expect(result.includes(longUrl)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── buildMemorySnapshotJson — server-endpoint entry ──────────
|
||||
|
||||
describe('buildMemorySnapshotJson', () => {
|
||||
test('17. returns the snapshot with structures populated', async () => {
|
||||
const { buildMemorySnapshotJson } = await import('../src/memory-command');
|
||||
const snapshot = makeSnapshot();
|
||||
const result = await buildMemorySnapshotJson(makeFakeBm(snapshot));
|
||||
expect(result.bunServer.rss).toBe(snapshot.bunServer.rss);
|
||||
expect(result.structures.modificationHistory.cap).toBe(200);
|
||||
// structures is populated from live module accessors, not from the
|
||||
// fixture. Just assert the shape is right.
|
||||
expect(typeof result.structures.consoleBufferLen).toBe('number');
|
||||
expect(typeof result.structures.networkBufferLen).toBe('number');
|
||||
});
|
||||
});
|
||||
|
|
@ -1,132 +0,0 @@
|
|||
import { describe, test, expect } from 'bun:test';
|
||||
import { BrowserManager } from '../src/browser-manager';
|
||||
import { networkBuffer } from '../src/buffers';
|
||||
|
||||
// Reproducer for the body-materialization leak fixed in the D10
|
||||
// USE_CDP_EVENT_BATCHED commit. Pre-fix, the wirePageEvents
|
||||
// `requestfinished` listener called `await res.body()` just to read
|
||||
// `.length`, allocating the full response body into a Bun Buffer on
|
||||
// every request — multi-GB/hour of churn on long-lived headed
|
||||
// Chromium with media-heavy pages.
|
||||
//
|
||||
// What this test pins:
|
||||
// - The handler calls Playwright's structured req.sizes() API
|
||||
// (which pulls from Network.loadingFinished without
|
||||
// materializing the body).
|
||||
// - The handler NEVER calls res.body(), even though a fake response
|
||||
// exposes the method.
|
||||
// - networkBuffer entries are still populated with the right size.
|
||||
//
|
||||
// What this test does NOT cover:
|
||||
// - A real Chromium burst measuring peak Bun RSS during concurrent
|
||||
// fetches. That's a periodic-tier test (browse/test/
|
||||
// memory-leak-reproducer-e2e.test.ts, deferred — see TODOS).
|
||||
// - Per-tab JS heap growth on the Chromium side. Outside Bun's
|
||||
// visibility entirely.
|
||||
//
|
||||
// Wall clock target: < 1 second. Gate tier.
|
||||
|
||||
interface CallCounters {
|
||||
sizes: number;
|
||||
body: number;
|
||||
}
|
||||
|
||||
function makeFakeReq(url: string, responseBodySize: number, counters: CallCounters) {
|
||||
return {
|
||||
url: () => url,
|
||||
sizes: async () => {
|
||||
counters.sizes++;
|
||||
return {
|
||||
requestBodySize: 0,
|
||||
requestHeadersSize: 100,
|
||||
responseBodySize,
|
||||
responseHeadersSize: 200,
|
||||
};
|
||||
},
|
||||
method: () => 'GET',
|
||||
response: async () => ({
|
||||
url: () => url,
|
||||
status: () => 200,
|
||||
body: async () => {
|
||||
// If THIS runs, the leak is back. Allocate a real Buffer so a
|
||||
// future reviewer reading the failing assertion sees what
|
||||
// pre-fix code was doing on every request.
|
||||
counters.body++;
|
||||
return Buffer.alloc(responseBodySize);
|
||||
},
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
interface ListenerMap {
|
||||
[event: string]: Array<(arg: unknown) => void>;
|
||||
}
|
||||
|
||||
function makeFakePage() {
|
||||
const listeners: ListenerMap = {};
|
||||
return {
|
||||
on(event: string, fn: (arg: unknown) => void): void {
|
||||
(listeners[event] ||= []).push(fn);
|
||||
},
|
||||
emit(event: string, arg: unknown): void {
|
||||
for (const fn of listeners[event] || []) fn(arg);
|
||||
},
|
||||
listenerCount(event: string): number {
|
||||
return (listeners[event] || []).length;
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
describe('memory-leak reproducer: requestfinished does not materialize bodies', () => {
|
||||
test('burst of 200 requestfinished events calls req.sizes() but never res.body()', async () => {
|
||||
const bm = new BrowserManager();
|
||||
const page = makeFakePage();
|
||||
|
||||
// wirePageEvents is private — access via the same indexed pattern the
|
||||
// tab-guardrail test uses to drive private methods.
|
||||
const wirePageEvents = (
|
||||
bm as unknown as { wirePageEvents: (p: unknown) => void }
|
||||
).wirePageEvents.bind(bm);
|
||||
wirePageEvents(page);
|
||||
|
||||
// Seed networkBuffer with 200 request entries via the existing
|
||||
// page.on('request') handler so the requestfinished backward-scan
|
||||
// has something to match against.
|
||||
const startLen = networkBuffer.length;
|
||||
for (let i = 0; i < 200; i++) {
|
||||
page.emit('request', {
|
||||
url: () => `https://example.invalid/asset/${i}`,
|
||||
method: () => 'GET',
|
||||
});
|
||||
}
|
||||
|
||||
// Fire 200 requestfinished events concurrently. Each notional response
|
||||
// is 1 MB — pre-fix this would allocate 200 MB of Buffer. With the fix,
|
||||
// not one byte of body content is allocated.
|
||||
const counters: CallCounters = { sizes: 0, body: 0 };
|
||||
const reqs = Array.from({ length: 200 }, (_, i) =>
|
||||
makeFakeReq(`https://example.invalid/asset/${i}`, 1024 * 1024, counters),
|
||||
);
|
||||
for (const req of reqs) page.emit('requestfinished', req);
|
||||
|
||||
// Drain the async handler chain — wirePageEvents.requestfinished is
|
||||
// async; each emit kicks off a microtask that awaits req.sizes().
|
||||
await new Promise((r) => setTimeout(r, 50));
|
||||
// One more tick in case of cascading microtasks.
|
||||
await new Promise((r) => setTimeout(r, 0));
|
||||
|
||||
// Every event hit req.sizes().
|
||||
expect(counters.sizes).toBeGreaterThanOrEqual(200);
|
||||
// The actual leak fix: res.body() is NEVER called.
|
||||
expect(counters.body).toBe(0);
|
||||
// And the size data still made it into networkBuffer.
|
||||
const populated = Array.from({ length: networkBuffer.length }, (_, i) =>
|
||||
networkBuffer.get(i),
|
||||
)
|
||||
.filter((e) => e && e.url?.startsWith('https://example.invalid/asset/'))
|
||||
.filter((e) => typeof e?.size === 'number' && e.size > 0).length;
|
||||
expect(populated).toBeGreaterThanOrEqual(200);
|
||||
// Sanity: the seed didn't double-count from a previous run.
|
||||
expect(networkBuffer.length).toBeGreaterThan(startLen);
|
||||
});
|
||||
});
|
||||
|
|
@ -1,76 +0,0 @@
|
|||
/**
|
||||
* Tests for the /pty-inject-scan endpoint (#1370).
|
||||
*
|
||||
* Verifies the endpoint's invariants without spinning a real browse
|
||||
* server: auth required, tunnel-listener denial, payload cap, JSON
|
||||
* shape, and the local-only routing rule (NOT in TUNNEL_PATHS).
|
||||
*
|
||||
* Full integration with a live sidecar + Chromium is exercised by the
|
||||
* existing browser security suite; this file covers the static + unit
|
||||
* invariants codex's plan review specifically called out.
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import { readFileSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
|
||||
const SERVER_SRC = readFileSync(
|
||||
join(import.meta.dir, '..', 'src', 'server.ts'),
|
||||
'utf-8',
|
||||
);
|
||||
|
||||
describe('/pty-inject-scan — server.ts static invariants', () => {
|
||||
test('endpoint is defined as a POST handler', () => {
|
||||
expect(SERVER_SRC).toContain(
|
||||
"url.pathname === '/pty-inject-scan' && req.method === 'POST'",
|
||||
);
|
||||
});
|
||||
|
||||
test('endpoint requires auth (validateAuth gate)', () => {
|
||||
// Find the endpoint block, verify it calls validateAuth before doing
|
||||
// any work.
|
||||
const start = SERVER_SRC.indexOf("'/pty-inject-scan'");
|
||||
expect(start).toBeGreaterThan(-1);
|
||||
const blockEnd = SERVER_SRC.indexOf("\n // ─", start);
|
||||
const block = SERVER_SRC.slice(start, blockEnd > start ? blockEnd : start + 5000);
|
||||
expect(block).toContain('validateAuth(req)');
|
||||
expect(block).toContain('401');
|
||||
});
|
||||
|
||||
test('endpoint caps payload at 64KB', () => {
|
||||
const start = SERVER_SRC.indexOf("'/pty-inject-scan'");
|
||||
const block = SERVER_SRC.slice(start, start + 5000);
|
||||
expect(block).toContain('64 * 1024');
|
||||
expect(block).toContain('payload-too-large');
|
||||
expect(block).toContain('413');
|
||||
});
|
||||
|
||||
test('endpoint is NOT in the tunnel listener allowlist', () => {
|
||||
const tunnelBlockStart = SERVER_SRC.indexOf('const TUNNEL_PATHS = new Set<string>([');
|
||||
expect(tunnelBlockStart).toBeGreaterThan(-1);
|
||||
const tunnelBlockEnd = SERVER_SRC.indexOf(']);', tunnelBlockStart);
|
||||
const tunnelAllowlist = SERVER_SRC.slice(tunnelBlockStart, tunnelBlockEnd);
|
||||
expect(tunnelAllowlist).not.toContain('/pty-inject-scan');
|
||||
});
|
||||
|
||||
test('response goes through sanitizeReplacer (Unicode egress hardening)', () => {
|
||||
const start = SERVER_SRC.indexOf("'/pty-inject-scan'");
|
||||
const block = SERVER_SRC.slice(start, start + 5000);
|
||||
expect(block).toContain('sanitizeReplacer');
|
||||
});
|
||||
|
||||
test('endpoint surfaces l4 availability shape for D7 degrade-to-WARN path', () => {
|
||||
const start = SERVER_SRC.indexOf("'/pty-inject-scan'");
|
||||
const block = SERVER_SRC.slice(start, start + 5000);
|
||||
expect(block).toContain('isSidecarAvailable');
|
||||
expect(block).toContain('available');
|
||||
});
|
||||
|
||||
test('endpoint uses the sidecar client, not direct security-classifier import', () => {
|
||||
// Static check that server.ts imports from security-sidecar-client.ts,
|
||||
// NOT from security-classifier.ts directly (would brick the compiled
|
||||
// binary per CLAUDE.md).
|
||||
expect(SERVER_SRC).toContain("from './security-sidecar-client'");
|
||||
expect(SERVER_SRC).not.toContain("from './security-classifier'");
|
||||
});
|
||||
});
|
||||
|
|
@ -1,98 +0,0 @@
|
|||
import { describe, test, expect, beforeEach } from 'bun:test';
|
||||
|
||||
// pty-session-lease registers a sessionId space distinct from the pre-v1.44
|
||||
// attach-token space (browse/src/pty-session-cookie.ts). These tests pin
|
||||
// the validate-first contract that codex outside-voice flagged as critical:
|
||||
// refreshLease MUST NOT resurrect expired leases, otherwise the 30-min TTL
|
||||
// stops bounding leaked-token blast radius.
|
||||
|
||||
import {
|
||||
mintLease,
|
||||
validateLease,
|
||||
refreshLease,
|
||||
revokeLease,
|
||||
leaseCount,
|
||||
__resetLeases,
|
||||
} from '../src/pty-session-lease';
|
||||
|
||||
beforeEach(() => {
|
||||
__resetLeases();
|
||||
});
|
||||
|
||||
describe('pty-session-lease: mint/validate/revoke', () => {
|
||||
test('mintLease returns a fresh non-secret sessionId + future expiresAt', () => {
|
||||
const a = mintLease();
|
||||
const b = mintLease();
|
||||
expect(a.sessionId).toBeTruthy();
|
||||
expect(b.sessionId).toBeTruthy();
|
||||
expect(a.sessionId).not.toBe(b.sessionId);
|
||||
expect(a.expiresAt).toBeGreaterThan(Date.now());
|
||||
// base64url alphabet: characters in [A-Za-z0-9_-].
|
||||
expect(a.sessionId).toMatch(/^[A-Za-z0-9_-]+$/);
|
||||
expect(leaseCount()).toBe(2);
|
||||
});
|
||||
|
||||
test('validateLease ok for fresh lease, false for unknown', () => {
|
||||
const { sessionId } = mintLease();
|
||||
const ok = validateLease(sessionId);
|
||||
expect(ok.ok).toBe(true);
|
||||
if (ok.ok) expect(ok.expiresAt).toBeGreaterThan(Date.now());
|
||||
expect(validateLease('not-a-real-session-id').ok).toBe(false);
|
||||
expect(validateLease(null).ok).toBe(false);
|
||||
expect(validateLease(undefined).ok).toBe(false);
|
||||
});
|
||||
|
||||
test('revokeLease removes the lease; subsequent validate returns false', () => {
|
||||
const { sessionId } = mintLease();
|
||||
expect(validateLease(sessionId).ok).toBe(true);
|
||||
revokeLease(sessionId);
|
||||
expect(validateLease(sessionId).ok).toBe(false);
|
||||
expect(leaseCount()).toBe(0);
|
||||
});
|
||||
|
||||
test('revokeLease tolerates unknown sessionId without throwing', () => {
|
||||
expect(() => revokeLease('phantom')).not.toThrow();
|
||||
expect(() => revokeLease(null)).not.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe('pty-session-lease: refresh contract (validate-first)', () => {
|
||||
test('refreshLease extends expiresAt for a valid lease', () => {
|
||||
const { sessionId, expiresAt: initial } = mintLease();
|
||||
// Sleep micro-tick — Date.now() is ms-grain so a synchronous extend
|
||||
// may not move the integer. Use a tight async wait instead.
|
||||
return new Promise<void>((resolve) => {
|
||||
setTimeout(() => {
|
||||
const r = refreshLease(sessionId);
|
||||
expect(r.ok).toBe(true);
|
||||
if (r.ok) expect(r.expiresAt).toBeGreaterThan(initial);
|
||||
resolve();
|
||||
}, 5);
|
||||
});
|
||||
});
|
||||
|
||||
test('refreshLease rejects unknown sessionId (validate-first invariant)', () => {
|
||||
const r = refreshLease('never-minted');
|
||||
expect(r.ok).toBe(false);
|
||||
});
|
||||
|
||||
test('refreshLease never resurrects an expired lease', async () => {
|
||||
// Force TTL down to 5ms for this assertion by minting + waiting past expiry.
|
||||
// Lease internals use Date.now() so the easiest way to expire one is
|
||||
// to artificially backdate via revoke+remint cycle. Simpler: mint, then
|
||||
// wait for the registry's own expiry check to trip.
|
||||
//
|
||||
// We can't backdate without breaking encapsulation, so this test exercises
|
||||
// the negative-validate path: minted lease, then prove that refresh after
|
||||
// explicit revoke still returns ok:false (same as expired-and-pruned).
|
||||
const { sessionId } = mintLease();
|
||||
revokeLease(sessionId);
|
||||
const r = refreshLease(sessionId);
|
||||
expect(r.ok).toBe(false);
|
||||
});
|
||||
|
||||
test('refreshLease tolerates null / undefined sessionId', () => {
|
||||
expect(refreshLease(null).ok).toBe(false);
|
||||
expect(refreshLease(undefined).ok).toBe(false);
|
||||
});
|
||||
});
|
||||
|
|
@ -1,83 +0,0 @@
|
|||
/**
|
||||
* Regression test for PR #1169 bug #7 — `pdf --from-file` ran JSON.parse on
|
||||
* user-supplied file contents with no try/catch. A malformed payload crashed
|
||||
* the pdf handler with a raw SyntaxError. Codex flagged that JSON.parse
|
||||
* accepts primitives too (numbers, strings, null) and Array.isArray must be
|
||||
* checked separately, so the fix added an explicit object-shape gate.
|
||||
*
|
||||
* Test surface: parsePdfFromFile, exported for tests at meta-commands.ts:139.
|
||||
* All fixtures land in process.cwd() (SAFE_DIRECTORIES allows TEMP_DIR or cwd;
|
||||
* cwd is universally safe on every platform our CI runs on).
|
||||
*/
|
||||
import { describe, expect, test, beforeAll, afterAll } from "bun:test";
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
|
||||
import { parsePdfFromFile } from "../src/meta-commands";
|
||||
|
||||
const FIXTURE_DIR = fs.mkdtempSync(path.join(process.cwd(), "pr1169-pdf-"));
|
||||
|
||||
beforeAll(() => {
|
||||
// mkdtempSync already created the dir
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
fs.rmSync(FIXTURE_DIR, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
function writeFixture(name: string, body: string): string {
|
||||
const p = path.join(FIXTURE_DIR, name);
|
||||
fs.writeFileSync(p, body);
|
||||
return p;
|
||||
}
|
||||
|
||||
describe("parsePdfFromFile — invalid JSON regression (PR #1169 bug #7)", () => {
|
||||
test("invalid JSON: throws with file path AND parser detail", () => {
|
||||
const p = writeFixture("invalid.json", "{ not-json");
|
||||
expect(() => parsePdfFromFile(p)).toThrow(/not valid JSON/);
|
||||
expect(() => parsePdfFromFile(p)).toThrow(p);
|
||||
});
|
||||
|
||||
test("empty file: throws JSON-parse style error", () => {
|
||||
const p = writeFixture("empty.json", "");
|
||||
// Empty string is invalid JSON per ECMA-404.
|
||||
expect(() => parsePdfFromFile(p)).toThrow(/not valid JSON/);
|
||||
});
|
||||
|
||||
test("top-level array: throws 'must be a JSON object' with type", () => {
|
||||
const p = writeFixture("array.json", JSON.stringify(["a", "b"]));
|
||||
expect(() => parsePdfFromFile(p)).toThrow(/must be a JSON object/);
|
||||
expect(() => parsePdfFromFile(p)).toThrow(/array/);
|
||||
});
|
||||
|
||||
test("top-level number: throws with 'number' type label", () => {
|
||||
const p = writeFixture("number.json", "42");
|
||||
expect(() => parsePdfFromFile(p)).toThrow(/must be a JSON object/);
|
||||
expect(() => parsePdfFromFile(p)).toThrow(/number/);
|
||||
});
|
||||
|
||||
test("top-level string: throws with 'string' type label", () => {
|
||||
const p = writeFixture("string.json", JSON.stringify("hello"));
|
||||
expect(() => parsePdfFromFile(p)).toThrow(/must be a JSON object/);
|
||||
expect(() => parsePdfFromFile(p)).toThrow(/string/);
|
||||
});
|
||||
|
||||
test("top-level null: throws with 'object' type label (JS null typeof === object)", () => {
|
||||
const p = writeFixture("null.json", "null");
|
||||
// null passes typeof === 'object' but the fix's `=== null` branch catches it.
|
||||
expect(() => parsePdfFromFile(p)).toThrow(/must be a JSON object/);
|
||||
});
|
||||
|
||||
test("top-level boolean: throws with 'boolean' type label", () => {
|
||||
const p = writeFixture("bool.json", "true");
|
||||
expect(() => parsePdfFromFile(p)).toThrow(/must be a JSON object/);
|
||||
expect(() => parsePdfFromFile(p)).toThrow(/boolean/);
|
||||
});
|
||||
|
||||
test("valid object: parses successfully (happy-path regression)", () => {
|
||||
const p = writeFixture("valid.json", JSON.stringify({ format: "A4", pageNumbers: true }));
|
||||
const result = parsePdfFromFile(p);
|
||||
expect(result.format).toBe("A4");
|
||||
expect(result.pageNumbers).toBe(true);
|
||||
});
|
||||
});
|
||||
|
|
@ -1,39 +0,0 @@
|
|||
import { describe, test, expect } from "bun:test";
|
||||
import { buildRestartEnv } from "../src/cli";
|
||||
|
||||
// #1781: an auto-restart triggered by a plain command (no --headed flag) must
|
||||
// NOT silently downgrade a headed session to headless. buildRestartEnv reapplies
|
||||
// headed/proxy/configHash from this invocation OR the persisted server state.
|
||||
describe("buildRestartEnv (#1781 headed persistence)", () => {
|
||||
const headedState = { pid: 1, port: 9, token: "t", startedAt: "", serverPath: "", mode: "headed" as const };
|
||||
const launchedState = { pid: 1, port: 9, token: "t", startedAt: "", serverPath: "", mode: "launched" as const };
|
||||
|
||||
test("headed flag on this invocation → BROWSE_HEADED=1", () => {
|
||||
expect(buildRestartEnv({ headed: true } as any, null).BROWSE_HEADED).toBe("1");
|
||||
});
|
||||
|
||||
test("plain command + persisted headed state → still BROWSE_HEADED=1 (the regression)", () => {
|
||||
const env = buildRestartEnv({} as any, headedState as any);
|
||||
expect(env.BROWSE_HEADED).toBe("1");
|
||||
});
|
||||
|
||||
test("plain command + headless state → no BROWSE_HEADED (no spurious headed)", () => {
|
||||
const env = buildRestartEnv({} as any, launchedState as any);
|
||||
expect(env.BROWSE_HEADED).toBeUndefined();
|
||||
});
|
||||
|
||||
test("nothing set → empty env", () => {
|
||||
expect(buildRestartEnv(null, null)).toEqual({});
|
||||
});
|
||||
|
||||
test("proxy + configHash reapplied from flags", () => {
|
||||
const env = buildRestartEnv({ proxyUrl: "socks5://x", configHash: "abc" } as any, null);
|
||||
expect(env.BROWSE_PROXY_URL).toBe("socks5://x");
|
||||
expect(env.BROWSE_CONFIG_HASH).toBe("abc");
|
||||
});
|
||||
|
||||
test("configHash falls back to persisted state", () => {
|
||||
const env = buildRestartEnv({} as any, { ...launchedState, configHash: "fromstate" } as any);
|
||||
expect(env.BROWSE_CONFIG_HASH).toBe("fromstate");
|
||||
});
|
||||
});
|
||||
|
|
@ -1,118 +0,0 @@
|
|||
/**
|
||||
* Unit tests for the screenshot size guard (#1214).
|
||||
*
|
||||
* Verifies that images exceeding 2000px on the longest dimension get
|
||||
* downscaled to fit the Anthropic vision API cap, while images already
|
||||
* inside the cap pass through untouched.
|
||||
*
|
||||
* Integration with the three callsites (snapshot.ts, meta-commands.ts,
|
||||
* write-commands.ts) is exercised by the existing browse E2E suite — we
|
||||
* don't need to spin up Chromium just to verify the helper. The static
|
||||
* invariant test below pins that all three callsites import the guard.
|
||||
*/
|
||||
|
||||
import { afterEach, beforeEach, describe, expect, test } from 'bun:test';
|
||||
import { mkdtempSync, readFileSync, rmSync, writeFileSync } from 'fs';
|
||||
import { tmpdir } from 'os';
|
||||
import { join } from 'path';
|
||||
import sharp from 'sharp';
|
||||
import {
|
||||
SCREENSHOT_MAX_DIMENSION_PX,
|
||||
guardScreenshotBuffer,
|
||||
guardScreenshotPath,
|
||||
} from '../src/screenshot-size-guard';
|
||||
|
||||
let tmp: string;
|
||||
|
||||
beforeEach(() => {
|
||||
tmp = mkdtempSync(join(tmpdir(), 'screenshot-guard-'));
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
async function makePng(width: number, height: number): Promise<Buffer> {
|
||||
return sharp({
|
||||
create: { width, height, channels: 3, background: { r: 200, g: 50, b: 50 } },
|
||||
})
|
||||
.png()
|
||||
.toBuffer();
|
||||
}
|
||||
|
||||
describe('guardScreenshotBuffer', () => {
|
||||
test('passes through images already within the cap', async () => {
|
||||
const input = await makePng(1500, 1800);
|
||||
const { buffer, result } = await guardScreenshotBuffer(input);
|
||||
expect(result.resized).toBe(false);
|
||||
expect(result.width).toBe(1500);
|
||||
expect(result.height).toBe(1800);
|
||||
expect(buffer).toBe(input); // identity — no re-encode
|
||||
});
|
||||
|
||||
test('downscales a 5000px-tall image to fit the cap', async () => {
|
||||
const input = await makePng(1200, 5000);
|
||||
const { buffer, result } = await guardScreenshotBuffer(input);
|
||||
expect(result.resized).toBe(true);
|
||||
expect(result.originalHeight).toBe(5000);
|
||||
expect(Math.max(result.width, result.height)).toBeLessThanOrEqual(
|
||||
SCREENSHOT_MAX_DIMENSION_PX,
|
||||
);
|
||||
// Aspect ratio preserved.
|
||||
expect(result.height / result.width).toBeCloseTo(5000 / 1200, 1);
|
||||
// Buffer is a different (smaller) PNG.
|
||||
expect(buffer.length).toBeLessThan(input.length);
|
||||
});
|
||||
|
||||
test('downscales a 6000px-wide image', async () => {
|
||||
const input = await makePng(6000, 1200);
|
||||
const { buffer, result } = await guardScreenshotBuffer(input);
|
||||
expect(result.resized).toBe(true);
|
||||
expect(result.originalWidth).toBe(6000);
|
||||
expect(Math.max(result.width, result.height)).toBeLessThanOrEqual(
|
||||
SCREENSHOT_MAX_DIMENSION_PX,
|
||||
);
|
||||
expect(buffer.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
test('treats exactly-2000px images as in-bounds (no resize)', async () => {
|
||||
const input = await makePng(2000, 1000);
|
||||
const { result } = await guardScreenshotBuffer(input);
|
||||
expect(result.resized).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('guardScreenshotPath', () => {
|
||||
test('rewrites the file in place when downscale is needed', async () => {
|
||||
const filePath = join(tmp, 'tall.png');
|
||||
writeFileSync(filePath, await makePng(1200, 5000));
|
||||
const result = await guardScreenshotPath(filePath);
|
||||
expect(result.resized).toBe(true);
|
||||
const written = readFileSync(filePath);
|
||||
const meta = await sharp(written).metadata();
|
||||
expect(Math.max(meta.width ?? 0, meta.height ?? 0)).toBeLessThanOrEqual(
|
||||
SCREENSHOT_MAX_DIMENSION_PX,
|
||||
);
|
||||
});
|
||||
|
||||
test('leaves the file untouched when already within cap', async () => {
|
||||
const filePath = join(tmp, 'short.png');
|
||||
const original = await makePng(800, 600);
|
||||
writeFileSync(filePath, original);
|
||||
const result = await guardScreenshotPath(filePath);
|
||||
expect(result.resized).toBe(false);
|
||||
const written = readFileSync(filePath);
|
||||
expect(written.equals(original)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('static invariant: all three full-page callsites import the guard', () => {
|
||||
test('snapshot.ts, meta-commands.ts, and write-commands.ts wire the size guard', () => {
|
||||
const browseSrc = join(import.meta.dir, '..', 'src');
|
||||
const paths = ['snapshot.ts', 'meta-commands.ts', 'write-commands.ts'];
|
||||
for (const rel of paths) {
|
||||
const content = readFileSync(join(browseSrc, rel), 'utf-8');
|
||||
expect(content).toContain('screenshot-size-guard');
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
@ -1,138 +0,0 @@
|
|||
/**
|
||||
* Regression test for PR #1169 bug #6 — downloadFile opened a WriteStream to
|
||||
* `<dest>.tmp.<pid>` but never closed it on error paths. If the reader or
|
||||
* writer threw mid-download, the FD leaked and the half-written tmp could
|
||||
* be promoted by a retry's renameSync.
|
||||
*
|
||||
* The fix wraps the read loop in try/catch and runs `writer.destroy()` +
|
||||
* `fs.unlinkSync(tmp)` before rethrowing.
|
||||
*
|
||||
* Per codex's pushback, this test must exercise BOTH the reader-throws path
|
||||
* and the non-2xx-response path, and it must NOT assume the specific tmp
|
||||
* filename — only that no `<dest>.tmp.*` sibling remains.
|
||||
*/
|
||||
import { describe, expect, test, beforeAll, afterAll, beforeEach, afterEach } from "bun:test";
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
|
||||
import { downloadFile } from "../src/security-classifier";
|
||||
|
||||
function tmpSiblings(destDir: string, destBase: string): string[] {
|
||||
if (!fs.existsSync(destDir)) return [];
|
||||
return fs.readdirSync(destDir).filter((f) =>
|
||||
f.startsWith(destBase + ".tmp.")
|
||||
);
|
||||
}
|
||||
|
||||
let FIXTURE_DIR = "";
|
||||
let originalFetch: typeof fetch;
|
||||
|
||||
beforeAll(() => {
|
||||
FIXTURE_DIR = fs.mkdtempSync(path.join(process.cwd(), "pr1169-dl-"));
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
if (FIXTURE_DIR) {
|
||||
fs.rmSync(FIXTURE_DIR, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
originalFetch = globalThis.fetch;
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
globalThis.fetch = originalFetch;
|
||||
});
|
||||
|
||||
describe("downloadFile error-path cleanup (PR #1169 bug #6)", () => {
|
||||
test("reader rejects mid-stream: throws, no dest, no tmp sibling left", async () => {
|
||||
const dest = path.join(FIXTURE_DIR, "reader-fail-model.bin");
|
||||
const destDir = path.dirname(dest);
|
||||
const destBase = path.basename(dest);
|
||||
|
||||
// Build a ReadableStream that emits one chunk then errors on second pull.
|
||||
const body = new ReadableStream<Uint8Array>({
|
||||
start(controller) {
|
||||
controller.enqueue(new Uint8Array([1, 2, 3, 4]));
|
||||
},
|
||||
pull(controller) {
|
||||
// Second pull triggers the failure path the fix protects against.
|
||||
controller.error(new Error("simulated mid-stream read failure"));
|
||||
},
|
||||
});
|
||||
|
||||
// @ts-expect-error — overwrite global fetch for the test
|
||||
globalThis.fetch = async () =>
|
||||
new Response(body, { status: 200, statusText: "OK" });
|
||||
|
||||
await expect(downloadFile("https://example.com/model.bin", dest)).rejects.toThrow(
|
||||
/simulated mid-stream read failure/
|
||||
);
|
||||
|
||||
expect(fs.existsSync(dest)).toBe(false);
|
||||
expect(tmpSiblings(destDir, destBase)).toEqual([]);
|
||||
});
|
||||
|
||||
test("non-2xx response: throws with status, no tmp file created", async () => {
|
||||
const dest = path.join(FIXTURE_DIR, "http500-model.bin");
|
||||
const destDir = path.dirname(dest);
|
||||
const destBase = path.basename(dest);
|
||||
|
||||
// @ts-expect-error — overwrite global fetch for the test
|
||||
globalThis.fetch = async () =>
|
||||
new Response("server boom", { status: 500, statusText: "Server Error" });
|
||||
|
||||
await expect(downloadFile("https://example.com/model.bin", dest)).rejects.toThrow(
|
||||
/Failed to fetch.*500/
|
||||
);
|
||||
|
||||
expect(fs.existsSync(dest)).toBe(false);
|
||||
expect(tmpSiblings(destDir, destBase)).toEqual([]);
|
||||
});
|
||||
|
||||
test("missing body: throws, no tmp file created", async () => {
|
||||
const dest = path.join(FIXTURE_DIR, "nobody-model.bin");
|
||||
const destDir = path.dirname(dest);
|
||||
const destBase = path.basename(dest);
|
||||
|
||||
// Response with null body (some upstreams send this on edge errors).
|
||||
// @ts-expect-error — overwrite global fetch for the test
|
||||
globalThis.fetch = async () =>
|
||||
new Response(null, { status: 200, statusText: "OK" });
|
||||
|
||||
await expect(downloadFile("https://example.com/model.bin", dest)).rejects.toThrow(
|
||||
/Failed to fetch/
|
||||
);
|
||||
|
||||
expect(fs.existsSync(dest)).toBe(false);
|
||||
expect(tmpSiblings(destDir, destBase)).toEqual([]);
|
||||
});
|
||||
|
||||
test("happy path: 2xx body completes, dest exists, no tmp sibling remains", async () => {
|
||||
const dest = path.join(FIXTURE_DIR, "ok-model.bin");
|
||||
const destDir = path.dirname(dest);
|
||||
const destBase = path.basename(dest);
|
||||
|
||||
const body = new ReadableStream<Uint8Array>({
|
||||
start(controller) {
|
||||
controller.enqueue(new Uint8Array([9, 9, 9, 9]));
|
||||
controller.close();
|
||||
},
|
||||
});
|
||||
|
||||
// @ts-expect-error — overwrite global fetch for the test
|
||||
globalThis.fetch = async () =>
|
||||
new Response(body, { status: 200, statusText: "OK" });
|
||||
|
||||
await downloadFile("https://example.com/model.bin", dest);
|
||||
|
||||
expect(fs.existsSync(dest)).toBe(true);
|
||||
expect(tmpSiblings(destDir, destBase)).toEqual([]);
|
||||
const written = fs.readFileSync(dest);
|
||||
expect(Array.from(written)).toEqual([9, 9, 9, 9]);
|
||||
|
||||
fs.unlinkSync(dest);
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -1,66 +0,0 @@
|
|||
/**
|
||||
* Unit tests for browse/src/security-sidecar-client.ts.
|
||||
*
|
||||
* Tests the IPC client's behavior against a fake sidecar (a tiny Node
|
||||
* script we spawn) — verifies request/response id correlation, timeout,
|
||||
* payload cap, malformed-response handling, and circuit-breaker tripping.
|
||||
*
|
||||
* Does NOT exercise the real classifier — that lives behind the model
|
||||
* download and is covered by the existing security-classifier tests + the
|
||||
* E2E browser security suite.
|
||||
*/
|
||||
|
||||
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
||||
import { mkdtempSync, rmSync, writeFileSync } from "fs";
|
||||
import { tmpdir } from "os";
|
||||
import { join } from "path";
|
||||
|
||||
let tmp: string;
|
||||
|
||||
beforeEach(() => {
|
||||
tmp = mkdtempSync(join(tmpdir(), "sidecar-client-test-"));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
const mod = await import("../src/security-sidecar-client");
|
||||
mod.resetSidecarForTests();
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
describe("security-sidecar-client — payload cap", () => {
|
||||
test("rejects requests over 64KB without spawning", async () => {
|
||||
const { scanWithSidecar } = await import("../src/security-sidecar-client");
|
||||
const huge = "a".repeat(65 * 1024);
|
||||
await expect(scanWithSidecar(huge)).rejects.toThrow(/payload-too-large/);
|
||||
});
|
||||
});
|
||||
|
||||
describe("security-sidecar-client — availability probe", () => {
|
||||
test("isSidecarAvailable returns a shape regardless of platform", async () => {
|
||||
const { isSidecarAvailable } = await import("../src/security-sidecar-client");
|
||||
const result = isSidecarAvailable();
|
||||
expect(typeof result.available).toBe("boolean");
|
||||
if (!result.available) {
|
||||
// When unavailable, reason must explain why
|
||||
expect(typeof result.reason).toBe("string");
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("security-sidecar-client — circuit breaker after repeated failures", () => {
|
||||
test("trips after RESPAWN_LIMIT failures and stays unavailable", async () => {
|
||||
// We can simulate the breaker tripping by repeatedly calling against an
|
||||
// invalid sidecar entry. The cleanest way without faking spawn() is to
|
||||
// exercise the payload-too-large path which doesn't trip the breaker
|
||||
// (it short-circuits before spawn), so this is an indirect proof:
|
||||
// verify the timeout path can be exercised by an oversized small text
|
||||
// and that retries don't crash.
|
||||
const { scanWithSidecar } = await import("../src/security-sidecar-client");
|
||||
const oversized = "x".repeat(70 * 1024);
|
||||
for (let i = 0; i < 5; i += 1) {
|
||||
await expect(scanWithSidecar(oversized)).rejects.toThrow(/payload-too-large/);
|
||||
}
|
||||
// Sentinel — if the loop above silently passed, fail fast.
|
||||
expect(true).toBe(true);
|
||||
});
|
||||
});
|
||||
|
|
@ -63,13 +63,13 @@ describe('Server auth security', () => {
|
|||
|
||||
// Test 4: /activity/history requires auth via validateAuth
|
||||
test('/activity/history requires authentication', () => {
|
||||
const historyBlock = sliceBetween(SERVER_SRC, "url.pathname === '/activity/history'", 'Batch endpoint');
|
||||
const historyBlock = sliceBetween(SERVER_SRC, "url.pathname === '/activity/history'", 'Sidebar endpoints');
|
||||
expect(historyBlock).toContain('validateAuth');
|
||||
});
|
||||
|
||||
// Test 5: /activity/history has no wildcard CORS header
|
||||
test('/activity/history has no wildcard CORS header', () => {
|
||||
const historyBlock = sliceBetween(SERVER_SRC, "url.pathname === '/activity/history'", 'Batch endpoint');
|
||||
const historyBlock = sliceBetween(SERVER_SRC, "url.pathname === '/activity/history'", 'Sidebar endpoints');
|
||||
expect(historyBlock).not.toContain("'*'");
|
||||
});
|
||||
|
||||
|
|
@ -314,7 +314,7 @@ describe('Server auth security', () => {
|
|||
// Regression: connect command crashed with "domains is not defined" because
|
||||
// a stray `domains,` variable was in the status fetch body (cli.ts:852).
|
||||
test('connect command status fetch body has no undefined variable references', () => {
|
||||
const connectBlock = sliceBetween(CLI_SRC, 'Launching headed Chromium', 'Terminal agent started');
|
||||
const connectBlock = sliceBetween(CLI_SRC, 'Launching headed Chromium', 'Sidebar agent started');
|
||||
// The status fetch should use a clean JSON body
|
||||
expect(connectBlock).toContain("command: 'status'");
|
||||
// Must NOT contain a bare `domains` reference in the fetch body
|
||||
|
|
@ -335,15 +335,10 @@ describe('Server auth security', () => {
|
|||
// The connect subprocess env must override BROWSE_PARENT_PID
|
||||
expect(pairBlock).toContain("BROWSE_PARENT_PID");
|
||||
expect(pairBlock).toContain("'0'");
|
||||
// The connect command must propagate BROWSE_PARENT_PID=0 via the
|
||||
// serverEnv object literal passed to startServer. The literal text
|
||||
// `serverEnv.BROWSE_PARENT_PID` is NOT in source — the value is
|
||||
// assigned via object-literal syntax (`BROWSE_PARENT_PID: '0'`)
|
||||
// inside the `const serverEnv: Record<string, string> = { ... }`
|
||||
// declaration. Assert both pieces appear in the connect block.
|
||||
const connectBlock = sliceBetween(CLI_SRC, 'Launching headed Chromium', 'Terminal agent started');
|
||||
expect(connectBlock).toContain("const serverEnv");
|
||||
expect(connectBlock).toContain("BROWSE_PARENT_PID: '0'");
|
||||
// The connect command must propagate BROWSE_PARENT_PID=0 to serverEnv
|
||||
const connectBlock = sliceBetween(CLI_SRC, 'Launching headed Chromium', 'Sidebar agent started');
|
||||
expect(connectBlock).toContain("BROWSE_PARENT_PID");
|
||||
expect(connectBlock).toContain("serverEnv.BROWSE_PARENT_PID");
|
||||
});
|
||||
|
||||
// Regression: newtab returned 403 for scoped tokens because the tab ownership
|
||||
|
|
|
|||
|
|
@ -1,232 +0,0 @@
|
|||
import { describe, test, expect, beforeEach, beforeAll, afterAll } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as crypto from 'crypto';
|
||||
import {
|
||||
buildFetchHandler,
|
||||
__resetShuttingDown,
|
||||
type ServerConfig,
|
||||
} from '../src/server';
|
||||
import { __resetRegistry } from '../src/token-registry';
|
||||
import { BrowserManager } from '../src/browser-manager';
|
||||
import { resolveConfig } from '../src/config';
|
||||
|
||||
// Tests for the v1.41+ ownsTerminalAgent flag.
|
||||
//
|
||||
// Embedders (gbrowser phoenix overlay) that run their own PTY server and write
|
||||
// terminal-port / terminal-internal-token / terminal-agent-pid themselves were
|
||||
// getting those files clobbered by gstack's shutdown(). The flag (default true)
|
||||
// gates four side effects (v1.44+):
|
||||
// 1. identity-based kill of the PID in <stateDir>/terminal-agent-pid
|
||||
// 2. unlink terminal-port
|
||||
// 3. unlink terminal-internal-token
|
||||
// 4. unlink terminal-agent-pid
|
||||
// False = embedder owns them, gstack stays hands-off.
|
||||
//
|
||||
// Pre-v1.44 used `pkill -f terminal-agent\.ts` which matched sibling gstack
|
||||
// sessions on the same host — see browse/src/terminal-agent-control.ts header.
|
||||
//
|
||||
// CRITICAL: each test stubs process.exit (so shutdown's exit doesn't kill
|
||||
// the test runner). The PID in the test agent-record is a guaranteed-dead
|
||||
// PID (1 = init / launchd — exists but cannot be killed by an unprivileged
|
||||
// process, so safeKill returns ESRCH-equivalent without affecting anything).
|
||||
// Use isProcessAlive's false branch by also testing with a PID that does
|
||||
// not exist (negative PID rejected by the OS).
|
||||
|
||||
const stateDir = resolveConfig().stateDir;
|
||||
const PORT_FILE = path.join(stateDir, 'terminal-port');
|
||||
const TOKEN_FILE = path.join(stateDir, 'terminal-internal-token');
|
||||
const AGENT_RECORD_FILE = path.join(stateDir, 'terminal-agent-pid');
|
||||
const SENTINEL_PORT = 'sentinel-port-65432';
|
||||
const SENTINEL_TOKEN = 'sentinel-token-abcdef1234567890';
|
||||
// PID 2^31-1 is the Linux PID_MAX_LIMIT; macOS uses 99998. Either way, no
|
||||
// real process will ever hold this PID on a developer machine. isProcessAlive
|
||||
// returns false → killAgentByRecord no-ops without sending any signal.
|
||||
const SENTINEL_DEAD_PID = 2147483646;
|
||||
|
||||
function makeMinimalConfig(overrides: Partial<ServerConfig> = {}): ServerConfig {
|
||||
const token = 'embedder-test-' + crypto.randomBytes(16).toString('hex');
|
||||
return {
|
||||
authToken: token,
|
||||
browsePort: 34568,
|
||||
idleTimeoutMs: 1_800_000,
|
||||
config: resolveConfig(),
|
||||
browserManager: new BrowserManager(),
|
||||
startTime: Date.now(),
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function writeSentinels(): void {
|
||||
fs.mkdirSync(stateDir, { recursive: true });
|
||||
fs.writeFileSync(PORT_FILE, SENTINEL_PORT);
|
||||
fs.writeFileSync(TOKEN_FILE, SENTINEL_TOKEN);
|
||||
fs.writeFileSync(
|
||||
AGENT_RECORD_FILE,
|
||||
JSON.stringify({ pid: SENTINEL_DEAD_PID, gen: 'sentinel-gen', startedAt: Date.now() }),
|
||||
);
|
||||
}
|
||||
|
||||
function readIfExists(p: string): string | null {
|
||||
try { return fs.readFileSync(p, 'utf-8'); } catch { return null; }
|
||||
}
|
||||
|
||||
/**
|
||||
* Stubs process.exit so shutdown()'s process.exit(0) throws an __exit:N
|
||||
* marker the test can swallow instead of killing the runner. Also stubs
|
||||
* process.kill so an accidental kill (regression in killAgentByRecord
|
||||
* that bypassed isProcessAlive) cannot reach a real PID on the developer
|
||||
* machine. Returns the captured kill calls so tests can assert kill
|
||||
* scope.
|
||||
*/
|
||||
async function withStubs(
|
||||
cb: (killCalls: Array<[number, NodeJS.Signals | number]>) => Promise<void>
|
||||
): Promise<Array<[number, NodeJS.Signals | number]>> {
|
||||
const origExit = process.exit;
|
||||
const origKill = process.kill;
|
||||
const killCalls: Array<[number, NodeJS.Signals | number]> = [];
|
||||
(process as any).exit = ((code: number) => {
|
||||
throw new Error(`__exit:${code}`);
|
||||
}) as any;
|
||||
(process as any).kill = ((pid: number, signal: NodeJS.Signals | number) => {
|
||||
killCalls.push([pid, signal ?? 'SIGTERM']);
|
||||
// signal 0 is a liveness probe — keep the existing 'process is dead'
|
||||
// semantics so isProcessAlive(SENTINEL_DEAD_PID) returns false.
|
||||
if (signal === 0) {
|
||||
const err: any = new Error('No such process');
|
||||
err.code = 'ESRCH';
|
||||
throw err;
|
||||
}
|
||||
return true;
|
||||
}) as any;
|
||||
try {
|
||||
await cb(killCalls);
|
||||
} finally {
|
||||
(process as any).exit = origExit;
|
||||
(process as any).kill = origKill;
|
||||
}
|
||||
return killCalls;
|
||||
}
|
||||
|
||||
async function runShutdown(handle: { shutdown: (code?: number) => Promise<void> }): Promise<void> {
|
||||
try {
|
||||
await handle.shutdown(0);
|
||||
} catch (err: any) {
|
||||
if (typeof err?.message !== 'string' || !err.message.startsWith('__exit:')) throw err;
|
||||
}
|
||||
}
|
||||
|
||||
// Filter out the signal=0 liveness probes; only count actual termination signals.
|
||||
function terminationCalls(
|
||||
calls: Array<[number, NodeJS.Signals | number]>,
|
||||
): Array<[number, NodeJS.Signals | number]> {
|
||||
return calls.filter(([, sig]) => sig !== 0);
|
||||
}
|
||||
|
||||
describe('buildFetchHandler ownsTerminalAgent gate', () => {
|
||||
// shutdown() reads `path.dirname(config.stateFile)` from module-level config
|
||||
// (composition gap — see TODOS T9). So unlinks target the real state dir,
|
||||
// not a per-test temp dir. If a real gstack daemon is running on this host,
|
||||
// its terminal-port + terminal-internal-token + terminal-agent-pid live
|
||||
// where this test writes. Save + restore real-daemon file contents around
|
||||
// the whole suite so the test never clobbers a developer's running session.
|
||||
let realPortBackup: string | null = null;
|
||||
let realTokenBackup: string | null = null;
|
||||
let realAgentRecordBackup: string | null = null;
|
||||
|
||||
beforeAll(() => {
|
||||
realPortBackup = readIfExists(PORT_FILE);
|
||||
realTokenBackup = readIfExists(TOKEN_FILE);
|
||||
realAgentRecordBackup = readIfExists(AGENT_RECORD_FILE);
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
if (realPortBackup !== null) {
|
||||
fs.mkdirSync(stateDir, { recursive: true });
|
||||
fs.writeFileSync(PORT_FILE, realPortBackup);
|
||||
} else {
|
||||
try { fs.unlinkSync(PORT_FILE); } catch {}
|
||||
}
|
||||
if (realTokenBackup !== null) {
|
||||
fs.mkdirSync(stateDir, { recursive: true });
|
||||
fs.writeFileSync(TOKEN_FILE, realTokenBackup);
|
||||
} else {
|
||||
try { fs.unlinkSync(TOKEN_FILE); } catch {}
|
||||
}
|
||||
if (realAgentRecordBackup !== null) {
|
||||
fs.mkdirSync(stateDir, { recursive: true });
|
||||
fs.writeFileSync(AGENT_RECORD_FILE, realAgentRecordBackup);
|
||||
} else {
|
||||
try { fs.unlinkSync(AGENT_RECORD_FILE); } catch {}
|
||||
}
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
__resetRegistry();
|
||||
__resetShuttingDown();
|
||||
// Clean any leftover sentinels from a prior failed run so the "preserved"
|
||||
// assertion can't pass spuriously off a stale file.
|
||||
try { fs.unlinkSync(PORT_FILE); } catch {}
|
||||
try { fs.unlinkSync(TOKEN_FILE); } catch {}
|
||||
try { fs.unlinkSync(AGENT_RECORD_FILE); } catch {}
|
||||
});
|
||||
|
||||
test('1. ownsTerminalAgent:false preserves all three files and sends no signal', async () => {
|
||||
writeSentinels();
|
||||
const handle = buildFetchHandler(makeMinimalConfig({ ownsTerminalAgent: false }));
|
||||
const calls = await withStubs(async () => {
|
||||
await runShutdown(handle);
|
||||
});
|
||||
expect(readIfExists(PORT_FILE)).toBe(SENTINEL_PORT);
|
||||
expect(readIfExists(TOKEN_FILE)).toBe(SENTINEL_TOKEN);
|
||||
expect(readIfExists(AGENT_RECORD_FILE)).not.toBeNull();
|
||||
expect(terminationCalls(calls).length).toBe(0);
|
||||
});
|
||||
|
||||
test('2. ownsTerminalAgent:true deletes all three files; identity-based kill probes the recorded PID', async () => {
|
||||
writeSentinels();
|
||||
const handle = buildFetchHandler(makeMinimalConfig({ ownsTerminalAgent: true }));
|
||||
const calls = await withStubs(async () => {
|
||||
await runShutdown(handle);
|
||||
});
|
||||
expect(readIfExists(PORT_FILE)).toBeNull();
|
||||
expect(readIfExists(TOKEN_FILE)).toBeNull();
|
||||
expect(readIfExists(AGENT_RECORD_FILE)).toBeNull();
|
||||
// isProcessAlive sends signal 0; PID is the sentinel-dead PID, so the
|
||||
// probe returns false and no SIGTERM is sent.
|
||||
const probes = calls.filter(([pid, sig]) => pid === SENTINEL_DEAD_PID && sig === 0);
|
||||
expect(probes.length).toBeGreaterThan(0);
|
||||
expect(terminationCalls(calls).length).toBe(0);
|
||||
});
|
||||
|
||||
test('3. ownsTerminalAgent unset defaults to true (deletes all three; probes recorded PID)', async () => {
|
||||
writeSentinels();
|
||||
// Note: no ownsTerminalAgent in the overrides — uses the `?? true` default.
|
||||
const handle = buildFetchHandler(makeMinimalConfig());
|
||||
const calls = await withStubs(async () => {
|
||||
await runShutdown(handle);
|
||||
});
|
||||
expect(readIfExists(PORT_FILE)).toBeNull();
|
||||
expect(readIfExists(TOKEN_FILE)).toBeNull();
|
||||
expect(readIfExists(AGENT_RECORD_FILE)).toBeNull();
|
||||
const probes = calls.filter(([pid, sig]) => pid === SENTINEL_DEAD_PID && sig === 0);
|
||||
expect(probes.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
test('4. CLI start() call site passes ownsTerminalAgent: true literally (static grep)', () => {
|
||||
// Resolves browse/src/server.ts relative to this test file so the test
|
||||
// works regardless of cwd. import.meta.url is the test file's URL.
|
||||
const serverTsPath = path.resolve(
|
||||
new URL(import.meta.url).pathname,
|
||||
'..',
|
||||
'..',
|
||||
'src',
|
||||
'server.ts',
|
||||
);
|
||||
const source = fs.readFileSync(serverTsPath, 'utf-8');
|
||||
// Match the call site inside start()'s buildFetchHandler({...}) literal.
|
||||
// The pattern looks for the trailing comma and trailing context so the
|
||||
// match cannot be satisfied by the JSDoc reference earlier in the file.
|
||||
expect(source).toMatch(/ownsTerminalAgent:\s*true,\s*\/\/\s*CLI spawns terminal-agent\.ts/);
|
||||
});
|
||||
});
|
||||
|
|
@ -1,8 +1,7 @@
|
|||
import { describe, test, expect, beforeEach, mock } from 'bun:test';
|
||||
import { describe, test, expect, beforeEach } from 'bun:test';
|
||||
import {
|
||||
resolveConfigFromEnv,
|
||||
buildFetchHandler,
|
||||
__testInternals__,
|
||||
type ServerConfig,
|
||||
type ServerHandle,
|
||||
type Surface,
|
||||
|
|
@ -12,8 +11,6 @@ import { __resetRegistry, initRegistry } from '../src/token-registry';
|
|||
import { BrowserManager } from '../src/browser-manager';
|
||||
import { resolveConfig } from '../src/config';
|
||||
import * as crypto from 'crypto';
|
||||
import * as fs from 'node:fs';
|
||||
import * as path from 'node:path';
|
||||
|
||||
/**
|
||||
* Tests for the factory-export API surface added so gbrowser (phoenix) can
|
||||
|
|
@ -384,141 +381,3 @@ describe('buildFetchHandler factory contract', () => {
|
|||
expect(() => initRegistry('second-token-pad-to-16-chars')).toThrow(/already initialized/i);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Idle timer + onDisconnect dual-instance fix (v1.42.3.0) ──────────
|
||||
//
|
||||
// Before this fix, module-level handlers (idleCheckTick, parent watchdog,
|
||||
// SIGTERM, onDisconnect default wire) all read the module-level
|
||||
// BrowserManager directly. For embedders (gbrowser) that pass their own
|
||||
// BrowserManager into buildFetchHandler, the module-level instance never
|
||||
// has launchHeaded() called on it — so connectionMode stays 'launched'
|
||||
// forever and headed mode never short-circuits idle-shutdown. Result:
|
||||
// 30-min auto-shutdown of overlay sessions.
|
||||
//
|
||||
// Fix: introduce `let activeBrowserManager` indirection (symmetric with
|
||||
// the existing `let activeShutdown` pattern). buildFetchHandler retargets
|
||||
// it at cfg.browserManager AND chains cfg.browserManager.onDisconnect to
|
||||
// activeShutdown (without clobbering any caller-provided handler).
|
||||
|
||||
function makeMockBrowserManager(mode: 'launched' | 'headed') {
|
||||
return {
|
||||
getConnectionMode: () => mode,
|
||||
isWatching: () => false,
|
||||
stopWatch: () => {},
|
||||
close: async () => {},
|
||||
onDisconnect: null as ((code?: number) => void | Promise<void>) | null,
|
||||
};
|
||||
}
|
||||
|
||||
describe('idle timer + onDisconnect dual-instance fix', () => {
|
||||
beforeEach(() => {
|
||||
__resetRegistry();
|
||||
// Reset module state every test. Bun memoizes the server.ts module
|
||||
// import for the whole test process, so `lastActivity`, `tunnelActive`,
|
||||
// `activeShutdown`, `activeBrowserManager`, and `isShuttingDown` leak
|
||||
// between tests. We reset what we touch here; the rest is fresh
|
||||
// because each test calls buildFetchHandler with a new mock instance.
|
||||
__testInternals__.setTunnelActive(false);
|
||||
__testInternals__.setLastActivity(Date.now());
|
||||
__testInternals__.resetShutdownState();
|
||||
});
|
||||
|
||||
test('CRITICAL — REGRESSION: headed embedder does not auto-shutdown at idle', () => {
|
||||
const exitMock = mock((_code?: number) => { throw new Error('process.exit called'); });
|
||||
const originalExit = process.exit;
|
||||
(process as any).exit = exitMock;
|
||||
try {
|
||||
const mockBM = makeMockBrowserManager('headed');
|
||||
buildFetchHandler(makeMinimalConfig({ browserManager: mockBM as any }));
|
||||
// Drive lastActivity past the idle threshold via the test seam instead
|
||||
// of mutating Date.now — the leaked module-level setInterval would
|
||||
// see fake-time and could fire shutdown if the timing aligned.
|
||||
__testInternals__.setLastActivity(Date.now() - (31 * 60 * 1000));
|
||||
__testInternals__.idleCheckTick();
|
||||
expect(exitMock).not.toHaveBeenCalled();
|
||||
} finally {
|
||||
(process as any).exit = originalExit;
|
||||
}
|
||||
});
|
||||
|
||||
test('headless still auto-shuts down at idle (paired defensive)', async () => {
|
||||
// Non-throwing mock: idleCheckTick fires shutdown as a fire-and-forget
|
||||
// async call. Throwing from process.exit becomes an unhandled rejection
|
||||
// that the test runner catches. Recording the call is enough.
|
||||
const exitMock = mock((_code?: number) => {});
|
||||
const originalExit = process.exit;
|
||||
(process as any).exit = exitMock;
|
||||
try {
|
||||
const mockBM = makeMockBrowserManager('launched');
|
||||
buildFetchHandler(makeMinimalConfig({ browserManager: mockBM as any }));
|
||||
__testInternals__.setLastActivity(Date.now() - (31 * 60 * 1000));
|
||||
__testInternals__.idleCheckTick();
|
||||
// Drain microtasks: shutdown awaits flushBuffers + cfgBrowserManager.close
|
||||
// before reaching process.exit.
|
||||
await Promise.resolve();
|
||||
await Promise.resolve();
|
||||
await new Promise<void>(r => setImmediate(r));
|
||||
await new Promise<void>(r => setImmediate(r));
|
||||
expect(exitMock).toHaveBeenCalled();
|
||||
} finally {
|
||||
(process as any).exit = originalExit;
|
||||
}
|
||||
});
|
||||
|
||||
test('buildFetchHandler chains cfgBrowserManager.onDisconnect, preserving caller-set handler', async () => {
|
||||
const mockBM = makeMockBrowserManager('headed');
|
||||
const callerCb = mock(async (_code?: number) => {});
|
||||
mockBM.onDisconnect = callerCb;
|
||||
buildFetchHandler(makeMinimalConfig({ browserManager: mockBM as any }));
|
||||
// gstack should have wrapped the caller-installed handler instead of
|
||||
// clobbering it (Codex finding: BrowserManager.onDisconnect is a public
|
||||
// field; gbrowser may set it before calling buildFetchHandler).
|
||||
expect(typeof mockBM.onDisconnect).toBe('function');
|
||||
expect(mockBM.onDisconnect).not.toBe(callerCb);
|
||||
// Verify the chain: invoking the wrapped handler runs the caller
|
||||
// callback AND reaches activeShutdown (which calls process.exit at the
|
||||
// very end of its async path). Stubbing process.exit to throw aborts
|
||||
// the chain before isShuttingDown can leak into later tests.
|
||||
const exitMock = mock((_code?: number) => { throw new Error('process.exit called'); });
|
||||
const originalExit = process.exit;
|
||||
(process as any).exit = exitMock;
|
||||
try {
|
||||
await expect((mockBM.onDisconnect as any)(0)).rejects.toThrow('process.exit called');
|
||||
expect(callerCb).toHaveBeenCalledWith(0);
|
||||
expect(exitMock).toHaveBeenCalledWith(0);
|
||||
} finally {
|
||||
(process as any).exit = originalExit;
|
||||
}
|
||||
});
|
||||
|
||||
test('tunnelActive blocks idle-shutdown even in headless mode', () => {
|
||||
const exitMock = mock((_code?: number) => { throw new Error('process.exit called'); });
|
||||
const originalExit = process.exit;
|
||||
(process as any).exit = exitMock;
|
||||
try {
|
||||
const mockBM = makeMockBrowserManager('launched');
|
||||
buildFetchHandler(makeMinimalConfig({ browserManager: mockBM as any }));
|
||||
__testInternals__.setTunnelActive(true);
|
||||
__testInternals__.setLastActivity(Date.now() - (31 * 60 * 1000));
|
||||
__testInternals__.idleCheckTick();
|
||||
expect(exitMock).not.toHaveBeenCalled();
|
||||
} finally {
|
||||
(process as any).exit = originalExit;
|
||||
}
|
||||
});
|
||||
|
||||
test('lifecycle handlers (idleCheckTick + parent watchdog + SIGTERM) read activeBrowserManager, not module-level browserManager', () => {
|
||||
// Static guard against a future refactor reintroducing a stale read.
|
||||
// The 3 lifecycle sites this plan fixed all call getConnectionMode via
|
||||
// the indirection. Other module-level browserManager reads inside
|
||||
// handleCommandInternalImpl (informational mode reporting in response
|
||||
// payloads) are out of scope and intentionally untouched.
|
||||
const src = fs.readFileSync(path.join(__dirname, '..', 'src', 'server.ts'), 'utf-8');
|
||||
const factoryStart = src.indexOf('export function buildFetchHandler');
|
||||
expect(factoryStart).toBeGreaterThan(0);
|
||||
const moduleLevel = src.slice(0, factoryStart);
|
||||
const activeCount = (moduleLevel.match(/activeBrowserManager\.getConnectionMode\(\)/g) || []).length;
|
||||
// Edit 2 (idleCheckTick), Edit 3 (parent watchdog), Edit 6 (SIGTERM).
|
||||
expect(activeCount).toBe(3);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,94 +0,0 @@
|
|||
import { describe, test, expect } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
// Server-side route shape for the v1.44 lease + restart + dispose +
|
||||
// lease-refresh wiring. Live route exercises require the terminal-agent
|
||||
// loopback to be live (e2e-tier); these static-grep tripwires pin the
|
||||
// load-bearing protocol invariants.
|
||||
|
||||
const SERVER_TS = path.resolve(new URL(import.meta.url).pathname, '..', '..', 'src', 'server.ts');
|
||||
|
||||
describe('server: PTY lease routes (v1.44+ Commit 2)', () => {
|
||||
test('1. /pty-session returns the 4-tuple shape (sessionId, attachToken, leaseExpiresAt)', () => {
|
||||
const src = fs.readFileSync(SERVER_TS, 'utf-8');
|
||||
const block = sliceBetween(src, "url.pathname === '/pty-session' &&", "url.pathname === '/pty-session/reattach'");
|
||||
expect(block).toContain('mintLease()');
|
||||
expect(block).toContain('grantPtyToken(minted.token, lease.sessionId)');
|
||||
expect(block).toContain('sessionId: lease.sessionId');
|
||||
expect(block).toContain('attachToken: minted.token');
|
||||
expect(block).toContain('leaseExpiresAt: lease.expiresAt');
|
||||
// Backward compat: legacy ptySessionToken alias preserved for one release.
|
||||
expect(block).toContain('ptySessionToken: minted.token');
|
||||
});
|
||||
|
||||
test('2. /pty-session/reattach validates lease + mints fresh attachToken', () => {
|
||||
const src = fs.readFileSync(SERVER_TS, 'utf-8');
|
||||
const block = sliceBetween(src, "url.pathname === '/pty-session/reattach'", "url.pathname === '/pty-restart'");
|
||||
// Validate-first: rejects unknown/expired sessionId with 410 Gone so
|
||||
// the client knows to fall back to a fresh /pty-session.
|
||||
expect(block).toContain('validateLease(sessionId)');
|
||||
expect(block).toContain('status: 410');
|
||||
// Mint fresh token bound to SAME sessionId.
|
||||
expect(block).toContain('grantPtyToken(minted.token, sessionId!)');
|
||||
});
|
||||
|
||||
test('3. /pty-restart is one transaction — dispose + revoke + fresh mint', () => {
|
||||
const src = fs.readFileSync(SERVER_TS, 'utf-8');
|
||||
const block = sliceBetween(src, "url.pathname === '/pty-restart'", "url.pathname === '/pty-dispose'");
|
||||
// Disposes old session (best-effort — missing sessionId is non-fatal).
|
||||
expect(block).toContain('restartPtySession(oldSessionId)');
|
||||
expect(block).toContain('revokeLease(oldSessionId)');
|
||||
// Then mints fresh sessionId + lease + attachToken in the same handler.
|
||||
expect(block).toContain('mintLease()');
|
||||
expect(block).toContain('grantPtyToken(minted.token, lease.sessionId)');
|
||||
// Returns the same 4-tuple shape so the client doesn't need a
|
||||
// separate /pty-session round-trip.
|
||||
expect(block).toContain('attachToken: minted.token');
|
||||
expect(block).toContain('leaseExpiresAt: lease.expiresAt');
|
||||
});
|
||||
|
||||
test('4. /pty-dispose accepts body-token (sendBeacon-compatible)', () => {
|
||||
const src = fs.readFileSync(SERVER_TS, 'utf-8');
|
||||
const block = sliceBetween(src, "url.pathname === '/pty-dispose'", "url.pathname === '/internal/lease-refresh'");
|
||||
// sendBeacon can't set custom headers, so the route MUST accept the
|
||||
// auth token in the request body. Otherwise pagehide cleanup fails
|
||||
// silently every time the user closes the browser.
|
||||
expect(block).toContain('body?.authToken');
|
||||
expect(block).toContain('authedByBody');
|
||||
// Both auth paths must validate against authToken — never just trust
|
||||
// a body-supplied token without the equality check.
|
||||
expect(block).toContain('authTokenFromBody === authToken');
|
||||
});
|
||||
|
||||
test('5. /internal/lease-refresh resets the daemon idle timer (T6)', () => {
|
||||
const src = fs.readFileSync(SERVER_TS, 'utf-8');
|
||||
const block = sliceBetween(src, "url.pathname === '/internal/lease-refresh'", '─── /pty-inject-scan');
|
||||
expect(block).toContain('refreshLease(sessionId)');
|
||||
expect(block).toContain('resetIdleTimer()');
|
||||
// Refresh failure (unknown / expired) MUST 410, not 200, so the
|
||||
// agent knows to close the WS and force a clean re-auth.
|
||||
expect(block).toContain('status: 410');
|
||||
});
|
||||
|
||||
test('6. grantPtyToken loopback carries sessionId binding', () => {
|
||||
const src = fs.readFileSync(SERVER_TS, 'utf-8');
|
||||
expect(src).toMatch(/grantPtyToken\(token: string, sessionId\?: string\)/);
|
||||
expect(src).toContain('sessionId ? { token, sessionId } : { token }');
|
||||
});
|
||||
|
||||
test('7. restartPtySession helper exists and POSTs the agent /internal/restart', () => {
|
||||
const src = fs.readFileSync(SERVER_TS, 'utf-8');
|
||||
expect(src).toMatch(/async function restartPtySession\(sessionId: string\)/);
|
||||
expect(src).toContain('/internal/restart');
|
||||
expect(src).toContain('JSON.stringify({ sessionId })');
|
||||
});
|
||||
});
|
||||
|
||||
function sliceBetween(source: string, start: string, end: string): string {
|
||||
const i = source.indexOf(start);
|
||||
if (i === -1) throw new Error(`marker not found: ${start}`);
|
||||
const j = source.indexOf(end, i + start.length);
|
||||
if (j === -1) throw new Error(`end marker not found: ${end}`);
|
||||
return source.slice(i, j);
|
||||
}
|
||||
|
|
@ -113,45 +113,17 @@ describe('sanitizeLoneSurrogates — wiring invariants', () => {
|
|||
expect(SERVER_SRC).toContain('result: sanitizeLoneSurrogates(cr.result)');
|
||||
});
|
||||
|
||||
test('SSE activity feed routes outbound frames through createSseEndpoint', () => {
|
||||
// v1.51 refactor: /activity/stream no longer inlines its own
|
||||
// ReadableStream/sanitizer wiring; it routes through createSseEndpoint
|
||||
// which applies sanitizeReplacer to every JSON.stringify. The grep
|
||||
// pins both halves of the contract: the endpoint uses the helper,
|
||||
// and the helper does the sanitization.
|
||||
const activityBlock = SERVER_SRC.match(
|
||||
/if \(url\.pathname === '\/activity\/stream'\)[\s\S]*?createSseEndpoint\(/,
|
||||
);
|
||||
expect(activityBlock).not.toBeNull();
|
||||
test('SSE activity feed sanitizes outbound frames via sanitizeReplacer', () => {
|
||||
// Replacer must run DURING stringify; post-stringify regex is ineffective
|
||||
// because JSON.stringify converts \uD800 → "\\ud800" before our regex sees it.
|
||||
expect(SERVER_SRC).toContain('JSON.stringify(entry, sanitizeReplacer)');
|
||||
});
|
||||
|
||||
test('SSE inspector stream routes outbound frames through createSseEndpoint', () => {
|
||||
// Same v1.51 refactor invariant for /inspector/events.
|
||||
const inspectorBlock = SERVER_SRC.match(
|
||||
/if \(url\.pathname === '\/inspector\/events'[\s\S]*?createSseEndpoint\(/,
|
||||
);
|
||||
expect(inspectorBlock).not.toBeNull();
|
||||
test('SSE inspector stream sanitizes outbound frames via sanitizeReplacer', () => {
|
||||
expect(SERVER_SRC).toContain('JSON.stringify(event, sanitizeReplacer)');
|
||||
});
|
||||
|
||||
test('createSseEndpoint applies sanitizeReplacer to every JSON.stringify', () => {
|
||||
// The helper is the single source of truth for SSE sanitization now.
|
||||
// If a future refactor moves stringify off the replacer (e.g. someone
|
||||
// adds a fast-path encode), this test fails and the surrogate-escape
|
||||
// class regresses across every SSE endpoint at once.
|
||||
const helperPath = path.resolve(import.meta.dir, '..', 'src', 'sse-helpers.ts');
|
||||
const helperSrc = fs.readFileSync(helperPath, 'utf-8');
|
||||
expect(helperSrc).toContain('JSON.stringify(');
|
||||
expect(helperSrc).toContain('sanitizeReplacer');
|
||||
// The sanitizer itself uses stripLoneSurrogates (the shared utility in
|
||||
// sanitize.ts) — not a private copy. Re-confirms the helper is wired
|
||||
// to the canonical sanitizer, not a drift'd duplicate.
|
||||
expect(helperSrc).toContain("import { stripLoneSurrogates } from './sanitize'");
|
||||
});
|
||||
|
||||
test('sanitizeReplacer is a function defined in server.ts (for non-SSE egress)', () => {
|
||||
// server.ts keeps its own sanitizeReplacer for the non-SSE JSON egress
|
||||
// paths (handleCommandInternal etc.). The SSE path uses sse-helpers.ts's
|
||||
// own sanitizeReplacer; both must exist independently.
|
||||
test('sanitizeReplacer is a function defined in server.ts', () => {
|
||||
expect(SERVER_SRC).toContain('function sanitizeReplacer(');
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1589,17 +1589,19 @@ describe('tool calls collapse into reasoning disclosure', () => {
|
|||
});
|
||||
|
||||
// ─── Idle timeout disabled in headed mode (server.ts) ───────────
|
||||
//
|
||||
// The original 'idle check skips in headed mode' string-grep test was deleted
|
||||
// in v1.42.3.0 — it would have passed even with the dual-instance bug present
|
||||
// because it only grepped for "=== 'headed'" + 'return' in the same window.
|
||||
// Behavioral coverage lives in browse/test/server-factory.test.ts under the
|
||||
// 'idle timer + onDisconnect dual-instance fix' describe block, which
|
||||
// exercises the headed/headless/tunnel branches of idleCheckTick directly.
|
||||
|
||||
describe('idle timeout behavior (server.ts)', () => {
|
||||
const serverSrc = fs.readFileSync(path.join(ROOT, 'src', 'server.ts'), 'utf-8');
|
||||
|
||||
test('idle check skips in headed mode', () => {
|
||||
const idleCheck = serverSrc.slice(
|
||||
serverSrc.indexOf('idleCheckInterval'),
|
||||
serverSrc.indexOf('idleCheckInterval') + 300,
|
||||
);
|
||||
expect(idleCheck).toContain("=== 'headed'");
|
||||
expect(idleCheck).toContain('return');
|
||||
});
|
||||
|
||||
test('sidebar-command resets idle timer', () => {
|
||||
const sidebarCmd = serverSrc.slice(
|
||||
serverSrc.indexOf("url.pathname === '/sidebar-command'"),
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue