diff --git a/CHANGELOG.md b/CHANGELOG.md index fed81815c..f8b47c8f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -71,6 +71,73 @@ If you've been hitting the 35-minute hang on `/sync-gbrain`, it's gone. The arch - `TODOS.md` filed P2: investigate `gbrain import` perf on large staging dirs (5,131 files takes >10 minutes when 501 takes 10 seconds — gbrain-side N+1 SQL or auto-link reconciliation suspected). P3: cache "no changes since last import" at the prepare-batch level for true no-op fast paths. - `Plan completion audit` ran via subagent on this branch: 17/21 DONE, 1 CHANGED (D3 made opt-in), 2 deferred (F8 benchmark harness as separate work, 24-path unit coverage went integration-only). +## [1.32.0.0] - 2026-05-10 + +## **Seven contributor PRs land. Three are security or hardening.** +## **Root-token comparison, IPv6 link-local, NUL transcripts, sidebar tabs, build resilience, model IDs, CJK escape — all fixed in one wave.** + +Seven community PRs land together, hand-picked through `/plan-eng-review` plus a Codex outside-voice review that reshaped the wave mid-flight. The headline fixes are real: the root-token authentication path no longer throws on a multibyte input that matches JS character length but not UTF-8 byte length, direct `http://[fe80::N]/` URLs are now rejected the same way ULA addresses already were, `gbrain put` strips NUL bytes from pasted transcript content so Postgres doesn't reject the write, and the build script doesn't tear down when run on a fresh worktree with no git HEAD yet. + +Two PRs in the original 9-PR plan got moved to follow-up reviews after Codex caught load-bearing problems: the SVG-XSS fix (#1153) needs a sanitizer integration rebuild, and the hook-command variable swap (#1141) needs runtime verification in plugin + dev-symlink modes. Both will land as their own PRs. + +### The numbers that matter + +Diff against `main` at v1.31.1.0, measured from the seven landed PRs after eng + Codex review reshaping. The wave is intentionally repo-local — no new dependencies, no risky integration changes. + +| Metric | v1.31.1.0 | v1.32.0.0 | Δ | +|---|---|---|---| +| Community PRs landed | 3 | 7 | **+4** | +| Security / hardening fixes | 0 | 3 | **+3** | +| Behavior changes that ship to users | 1 | 7 | **+6** | +| Free tests | 379 | 380 | +1 | +| Memory-ingest tests | 18 | 19 | +1 | +| LOC (excluding mechanical regen) | — | ~150 | — | +| SKILL.md files regenerated (CJK preamble cascade) | — | 35 | — | +| Preamble byte budget | 36,500 | 39,000 | +2,500 | + +The seven shipped PRs cover three categories. **Security:** root-token UTF-8 compare hardened, IPv6 link-local blocked, sidebar tab awareness expanded. **Correctness:** gbrain ingestion tolerates pasted-NUL transcripts, build resilient to unborn HEAD. **Polish:** AskUserQuestion preamble forbids `\uXXXX` escaping of CJK characters, eval suite tracks the current Opus model ID. + +### What this means for users + +If you run `pair-agent` and someone hits your tunnel with a multibyte token guess that happens to match length, the auth path returns false instead of crashing. If a transcript you ingest into `gbrain` has a NUL byte in pasted output, the write succeeds instead of returning `invalid byte sequence`. If you bring up `bun run build` on a brand-new Conductor worktree before the first commit, the build runs to completion. If your sidebar agent watches a tab on a non-localhost site, it now actually sees the URL and title. If you ask Claude a long question in Chinese, you stop getting `\u`-escaped codepoints rendered as nonsense glyphs. + +### Itemized changes + +#### Added + +- **#1257** Extension manifest gets the `tabs` permission. Sidebar tab awareness off-localhost now works — `chrome.tabs.query()` returns real `url`/`title` for sites outside `host_permissions` instead of undefined, so `snapshotTabs` writes real values into `tabs.json` and `active-tab.json` instead of silently skipping. Heads up: this widens the extension's permission scope; users will see the broader prompt on next install. Contributed by @fredchu. + +#### Fixed + +- **#1416** `isRootToken` constant-time compare hardened. Compares UTF-8 byte lengths via `Buffer.byteLength` before `crypto.timingSafeEqual`, which throws on length-mismatched buffers. A multibyte input whose JS string length matches but byte length differs now returns false instead of crashing on the auth path. Four regression tests cover multibyte byte-length mismatch, extra-prefix length mismatch, same-length last-byte flip, and empty-input-against-set-root. Contributed by @RagavRida. +- **#1411** `gstack-memory-ingest` strips NUL bytes from the transcript body before piping to `gbrain put`. Postgres rejects 0x00 in UTF-8 text columns, and some Claude Code transcripts contain NUL inside pasted content or tool output. The fix uses `body.replace(/\x00/g, "")` so the regex literal stays reviewable in diffs and survives editors that strip control bytes. New regression test reuses the existing fake-gbrain writer harness at `test/gstack-memory-ingest.test.ts:376`. Contributed by @billy-armstrong. +- **#1249** URL validation now blocks direct IPv6 link-local navigation. `fe80::/10` is centralised into `BLOCKED_IPV6_PREFIXES = ['fc', 'fd', 'fe8', 'fe9', 'fea', 'feb']` so `http://[fe80::N]/` is rejected by the same path that already blocked ULA addresses. Previously the link-local guard only fired during AAAA resolution; direct-literal URLs slipped through. Contributed by @hiSandog. +- **#1207** `bun run build` resilient to missing git HEAD. The three chained `.version` writes (`browse/dist`, `design/dist`, `make-pdf/dist`) each now use `{ git rev-parse HEAD 2>/dev/null || true; } > ...`, so an unborn HEAD produces an empty file. `readVersionHash` already returns null on empty/trim, and the CLI's stale-binary check short-circuits on null — the "no version known" path flows through existing null handling without polluting `state.binaryVersion` with a sentinel string. Contributed by @topitopongsala. +- **#1205** AskUserQuestion preamble forbids `\uXXXX` escaping of non-ASCII characters. Adds rule 12 plus a self-check item: models that hand-escape CJK strings get codepoints wrong, so `管理工具` ends up rendered as `㄃3用箱`. Long ≠ escape. Keep characters literal. The new rule cascades through the gen-skill-docs pipeline; 35 SKILL.md files regenerate to pick it up. Contributed by @joe51317-dotcom. +- **#1392** Mechanical bump of remaining `claude-opus-4-6` → `4-7` references across the E2E eval suite. Covers `test/helpers/eval-store.ts` and five `test/skill-e2e-*.test.ts` files. Contributed by @johnnysoftware7. + +#### For contributors + +- The AskUserQuestion preamble byte budget ratchets from 36,500 → 39,000 to absorb the new CJK rule (rule 12 + self-check item). Generated SKILL.md files for all 35 tier-≥2 skills regenerate as a single mechanical commit. +- Two PRs from the original 9-PR plan moved to follow-up reviews after Codex outside-voice caught load-bearing problems: #1153 (SVG sanitizer) needs the sanitizer integration rebuilt against the current `setTabContent` boundary in `browse/src/write-commands.ts:319` (the original PR removed `.svg` from the allowlist; the right fix is to keep it allowed and sanitize via DOMPurify before `setTabContent`). #1141 (CLAUDE_PLUGIN_ROOT) needs runtime verification in both plugin-installed and dev-symlink modes plus scope expansion to the non-frontmatter shell snippet at `investigate/SKILL.md.tmpl:107`. +- Five gate-tier evals hardened against non-determinism / TTY rendering quirks after the wave's first `test:gate` run surfaced them as flakes (verified pre-existing on `main`, then fixed): `office-hours-builder-wildness` retiers `gate` → `periodic` because LLM-judge creativity scoring belongs in periodic per the tier-classification rules. `plan-design-with-ui` AUQ-detection tail expands 2.5KB → 5KB so the full Step 0 box-rendered AUQ fits inside the regex window. `ask-user-question-format-compliance` budget stretches 300s → 540s (poll), 360s → 600s (PTY session), 420s → 660s (bun wrapper) to accommodate `/plan-ceo-review`'s multi-bash-block preamble on substantive branches. `benchmark-providers` gemini smoke drops the brittle `toContain('ok')` assertion in favor of a shape check on the adapter result. `skillify` scrape-prototype-path accepts JSON shape variants (`results`, `data`, `hits`, bare arrays of `{title, score}` objects) instead of grepping for the literal `"items":[` key. +- Housekeeping: the three source PRs absorbed into v1.31.1.0 (#1242, #1394, #1393) get closed with credit comments pointing at the merge SHA. + +## [1.31.1.0] - 2026-05-10 + +## **Three small community fixes land cleanly.** +## **`/careful` works on macOS again, Codex Step 0 stops colliding, `/make-pdf` setup runs in the right place.** + +A short patch wave from three contributors. macOS users who ran `/careful` with `rm -rf node_modules` were silently hitting the warning gate instead of the safe exception path because BSD sed doesn't understand `\s`. The Codex skill's `## Step 0: Check codex binary` header was colliding with the platform-detect prelude that also runs first. `/make-pdf`'s SETUP block was rendered after the Telemetry footer instead of immediately after the Preamble Bash, so `$P` could be referenced before it was set. Each fix is tightly scoped and ships with a regression test (or template ordering invariant) that catches the original failure shape. + +This release came out of a contributor-wave triage pass that closed ~75 stale PRs, dropped 11 candidates that needed focused review with specific feedback to each contributor, and lined the survivors through `/plan-eng-review` + Codex outside-voice review before merge. One additional security PR (token-registry timing-safe comparison) was rejected at the codex-review gate after Codex caught a subtle multi-byte UTF-8 buffer-mismatch bug that would have thrown on the auth path instead of returning false; that finding now lives as feedback on the original PR. + +### Fixed + +- **#1242** `careful/bin/check-careful.sh` uses `[[:space:]]` instead of `\s` in the safe-rm exception regex. macOS sed -E does not support `\s`, which silently broke the exception detection — `rm -rf node_modules` now correctly skips the warning gate on macOS, matching Linux behavior. Removes the `detectSafeRmWorks()` platform-conditional from `test/hook-scripts.test.ts` so both platforms are tested at the same bar. Contributed by @ToraDady. +- **#1394** Codex skill `## Step 0: Check codex binary` renamed to `## Step 0.4: Check codex binary` so the header no longer collides with the new platform-detect prelude (also numbered Step 0). Affects both `codex/SKILL.md.tmpl` and the regenerated `codex/SKILL.md`. Contributed by @mvanhorn. +- **#1393** `/make-pdf` MAKE-PDF SETUP block moves from after the Telemetry footer to right after the Preamble Bash, so `$P` is set before any subsequent step references it. The implementation switches from the `{{MAKE_PDF_SETUP}}` placeholder pattern to programmatic insertion via `generateMakePdfSetup` in `scripts/resolvers/preamble.ts`, gated on `ctx.skillName === 'make-pdf'`. New `make-pdf setup ordering` test in `test/gen-skill-docs.test.ts` asserts the SETUP block sits after the Preamble heading and before Plan Mode / Telemetry / workflow headings. Contributed by @jbetala7. + ## [1.31.0.0] - 2026-05-09 ## **AskUserQuestion stops getting silently buried in plan files.** diff --git a/careful/bin/check-careful.sh b/careful/bin/check-careful.sh index c8bc2c7ab..d9c39e48c 100755 --- a/careful/bin/check-careful.sh +++ b/careful/bin/check-careful.sh @@ -28,7 +28,7 @@ CMD_LOWER=$(printf '%s' "$CMD" | tr '[:upper:]' '[:lower:]') # --- Check for safe exceptions (rm -rf of build artifacts) --- if printf '%s' "$CMD" | grep -qE 'rm\s+(-[a-zA-Z]*r[a-zA-Z]*\s+|--recursive\s+)' 2>/dev/null; then SAFE_ONLY=true - RM_ARGS=$(printf '%s' "$CMD" | sed -E 's/.*rm\s+(-[a-zA-Z]+\s+)*//;s/--recursive\s*//') + RM_ARGS=$(printf '%s' "$CMD" | sed -E 's/.*rm[[:space:]]+(-[a-zA-Z]+[[:space:]]+)*//;s/--recursive[[:space:]]*//') for target in $RM_ARGS; do case "$target" in */node_modules|node_modules|*/\.next|\.next|*/dist|dist|*/__pycache__|__pycache__|*/\.cache|\.cache|*/build|build|*/\.turbo|\.turbo|*/coverage|coverage) diff --git a/codex/SKILL.md b/codex/SKILL.md index 6be6ccdf9..464401fdf 100644 --- a/codex/SKILL.md +++ b/codex/SKILL.md @@ -792,7 +792,7 @@ assumptions, catches things you might miss. Present its output faithfully, not s --- -## Step 0: Check codex binary +## Step 0.4: Check codex binary ```bash CODEX_BIN=$(which codex 2>/dev/null || echo "") diff --git a/codex/SKILL.md.tmpl b/codex/SKILL.md.tmpl index 90dd1119c..ed118a119 100644 --- a/codex/SKILL.md.tmpl +++ b/codex/SKILL.md.tmpl @@ -39,7 +39,7 @@ assumptions, catches things you might miss. Present its output faithfully, not s --- -## Step 0: Check codex binary +## Step 0.4: Check codex binary ```bash CODEX_BIN=$(which codex 2>/dev/null || echo "") diff --git a/make-pdf/SKILL.md b/make-pdf/SKILL.md index f116687da..927b637d9 100644 --- a/make-pdf/SKILL.md +++ b/make-pdf/SKILL.md @@ -102,6 +102,42 @@ echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH" [ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true ``` +## MAKE-PDF SETUP (run this check BEFORE any make-pdf command) + +```bash +_ROOT=$(git rev-parse --show-toplevel 2>/dev/null) +P="" +[ -n "$MAKE_PDF_BIN" ] && [ -x "$MAKE_PDF_BIN" ] && P="$MAKE_PDF_BIN" +[ -z "$P" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.claude/skills/gstack/make-pdf/dist/pdf" ] && P="$_ROOT/.claude/skills/gstack/make-pdf/dist/pdf" +[ -z "$P" ] && P="$HOME/.claude/skills/gstack/make-pdf/dist/pdf" +if [ -x "$P" ]; then + echo "MAKE_PDF_READY: $P" + alias _p_="$P" # shellcheck alias helper (not exported) + export P # available as $P in subsequent blocks within the same skill invocation +else + echo "MAKE_PDF_NOT_AVAILABLE (run './setup' in the gstack repo to build it)" +fi +``` + +If `MAKE_PDF_NOT_AVAILABLE` is printed: tell the user the binary is not +built. Have them run `./setup` from the gstack repo, then retry. + +If `MAKE_PDF_READY` is printed: `$P` is the binary path for the rest of +the skill. Use `$P` (not an explicit path) so the skill body stays portable. + +Core commands: +- `$P generate [output.pdf]` — render markdown to PDF (80% use case) +- `$P generate --cover --toc essay.md out.pdf` — full publication layout +- `$P generate --watermark DRAFT memo.md draft.pdf` — diagonal DRAFT watermark +- `$P preview ` — render HTML and open in browser (fast iteration) +- `$P setup` — verify browse + Chromium + pdftotext and run a smoke test +- `$P --help` — full flag reference + +Output contract: +- `stdout`: ONLY the output path on success. One line. +- `stderr`: progress (`Rendering HTML... Generating PDF...`) unless `--quiet`. +- Exit 0 success / 1 bad args / 2 render error / 3 Paged.js timeout / 4 browse unavailable. + ## Plan Mode Safe Operations In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`codex review`, writes to `~/.gstack/`, writes to the plan file, and `open` for generated artifacts. @@ -489,42 +525,6 @@ On Linux, install `fonts-liberation` for correct rendering — Helvetica and Ari aren't present by default, and Liberation Sans is the standard metric-compatible fallback. CI and Docker builds install it automatically via Dockerfile.ci. -## MAKE-PDF SETUP (run this check BEFORE any make-pdf command) - -```bash -_ROOT=$(git rev-parse --show-toplevel 2>/dev/null) -P="" -[ -n "$MAKE_PDF_BIN" ] && [ -x "$MAKE_PDF_BIN" ] && P="$MAKE_PDF_BIN" -[ -z "$P" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.claude/skills/gstack/make-pdf/dist/pdf" ] && P="$_ROOT/.claude/skills/gstack/make-pdf/dist/pdf" -[ -z "$P" ] && P="$HOME/.claude/skills/gstack/make-pdf/dist/pdf" -if [ -x "$P" ]; then - echo "MAKE_PDF_READY: $P" - alias _p_="$P" # shellcheck alias helper (not exported) - export P # available as $P in subsequent blocks within the same skill invocation -else - echo "MAKE_PDF_NOT_AVAILABLE (run './setup' in the gstack repo to build it)" -fi -``` - -If `MAKE_PDF_NOT_AVAILABLE` is printed: tell the user the binary is not -built. Have them run `./setup` from the gstack repo, then retry. - -If `MAKE_PDF_READY` is printed: `$P` is the binary path for the rest of -the skill. Use `$P` (not an explicit path) so the skill body stays portable. - -Core commands: -- `$P generate [output.pdf]` — render markdown to PDF (80% use case) -- `$P generate --cover --toc essay.md out.pdf` — full publication layout -- `$P generate --watermark DRAFT memo.md draft.pdf` — diagonal DRAFT watermark -- `$P preview ` — render HTML and open in browser (fast iteration) -- `$P setup` — verify browse + Chromium + pdftotext and run a smoke test -- `$P --help` — full flag reference - -Output contract: -- `stdout`: ONLY the output path on success. One line. -- `stderr`: progress (`Rendering HTML... Generating PDF...`) unless `--quiet`. -- Exit 0 success / 1 bad args / 2 render error / 3 Paged.js timeout / 4 browse unavailable. - ## Core patterns ### 80% case — memo/letter diff --git a/make-pdf/SKILL.md.tmpl b/make-pdf/SKILL.md.tmpl index 0827492a8..d134ee62a 100644 --- a/make-pdf/SKILL.md.tmpl +++ b/make-pdf/SKILL.md.tmpl @@ -41,8 +41,6 @@ On Linux, install `fonts-liberation` for correct rendering — Helvetica and Ari aren't present by default, and Liberation Sans is the standard metric-compatible fallback. CI and Docker builds install it automatically via Dockerfile.ci. -{{MAKE_PDF_SETUP}} - ## Core patterns ### 80% case — memo/letter diff --git a/scripts/resolvers/preamble.ts b/scripts/resolvers/preamble.ts index b866e90b1..97698bfcf 100644 --- a/scripts/resolvers/preamble.ts +++ b/scripts/resolvers/preamble.ts @@ -58,6 +58,7 @@ import { generateContextHealth } from './preamble/generate-context-health'; // Tier 3+ repo mode + search import { generateRepoModeSection } from './preamble/generate-repo-mode-section'; import { generateSearchBeforeBuildingSection } from './preamble/generate-search-before-building'; +import { generateMakePdfSetup } from './make-pdf'; // Standalone export used directly by the resolver registry export { generateTestFailureTriage } from './preamble/generate-test-failure-triage'; @@ -81,7 +82,8 @@ export function generatePreamble(ctx: TemplateContext): string { } const sections = [ generatePreambleBash(ctx), - // Plan-mode-skill semantics at position 1: after bash (so _SESSION_ID / + ...(ctx.skillName === 'make-pdf' ? [generateMakePdfSetup(ctx)] : []), + // Plan-mode-skill semantics stays near the top: after bash (so _SESSION_ID / // _BRANCH / _TEL env vars are live) and before all onboarding gates so // models read the authoritative "AskUserQuestion satisfies plan mode's // end-of-turn" rule before any other instruction. Renders for all skills diff --git a/test/gen-skill-docs.test.ts b/test/gen-skill-docs.test.ts index 86cdac953..23a4965e7 100644 --- a/test/gen-skill-docs.test.ts +++ b/test/gen-skill-docs.test.ts @@ -1098,6 +1098,26 @@ describe('Plan status footer in preamble', () => { }); }); +// --- make-pdf setup ordering --- + +describe('make-pdf setup ordering', () => { + test('MAKE-PDF SETUP appears before generic preamble footer sections', () => { + const content = fs.readFileSync(path.join(ROOT, 'make-pdf', 'SKILL.md'), 'utf-8'); + const preambleIdx = content.indexOf('## Preamble (run first)'); + const setupIdx = content.indexOf('## MAKE-PDF SETUP'); + const planModeIdx = content.indexOf('## Plan Mode Safe Operations'); + const telemetryIdx = content.indexOf('## Telemetry (run last)'); + const workflowIdx = content.indexOf('# make-pdf: publication-quality PDFs from markdown'); + + expect(preambleIdx).toBeGreaterThanOrEqual(0); + expect(setupIdx).toBeGreaterThan(preambleIdx); + expect(setupIdx).toBeLessThan(planModeIdx); + expect(setupIdx).toBeLessThan(telemetryIdx); + expect(setupIdx).toBeLessThan(workflowIdx); + expect(content.match(/^## MAKE-PDF SETUP/gm)?.length ?? 0).toBe(1); + }); +}); + // --- Skill invocation during plan mode in preamble --- describe('Skill invocation during plan mode in preamble', () => { diff --git a/test/hook-scripts.test.ts b/test/hook-scripts.test.ts index 850b5b983..f1ffe1239 100644 --- a/test/hook-scripts.test.ts +++ b/test/hook-scripts.test.ts @@ -56,13 +56,6 @@ function withFreezeDir(freezePath: string, fn: (stateDir: string) => void) { } } -// Detect whether the safe-rm-targets regex works on this platform. -// macOS sed -E does not support \s, so the safe exception check fails there. -function detectSafeRmWorks(): boolean { - const { output } = runHook(CAREFUL_SCRIPT, carefulInput('rm -rf node_modules')); - return output.permissionDecision === undefined; -} - // ============================================================ // check-careful.sh tests // ============================================================ @@ -88,24 +81,13 @@ describe('check-careful.sh', () => { test('rm -rf node_modules allows (safe exception)', () => { const { exitCode, output } = runHook(CAREFUL_SCRIPT, carefulInput('rm -rf node_modules')); expect(exitCode).toBe(0); - if (detectSafeRmWorks()) { - // GNU sed: safe exception triggers, allows through - expect(output.permissionDecision).toBeUndefined(); - } else { - // macOS sed: safe exception regex uses \\s which is unsupported, - // so the safe-targets check fails and the command warns - expect(output.permissionDecision).toBe('ask'); - } + expect(output.permissionDecision).toBeUndefined(); }); test('rm -rf .next dist allows (multiple safe targets)', () => { const { exitCode, output } = runHook(CAREFUL_SCRIPT, carefulInput('rm -rf .next dist')); expect(exitCode).toBe(0); - if (detectSafeRmWorks()) { - expect(output.permissionDecision).toBeUndefined(); - } else { - expect(output.permissionDecision).toBe('ask'); - } + expect(output.permissionDecision).toBeUndefined(); }); test('rm -rf node_modules /var/data warns (mixed safe+unsafe)', () => {