From 0f6227ecde6a997df7ed109520215dbe185e49d2 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Mon, 18 May 2026 21:06:22 -0700 Subject: [PATCH] fix(codex): move diff scope into prompt instead of --base (Codex CLI 0.130+ argv conflict) (#1209) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex CLI ≥ 0.130.0 rejects passing a custom prompt and --base together (mutually exclusive at argv level). Every /codex review, /review, and /ship structured Codex review call ended with an argv error before the model ran. Fix: scope the diff in prompt text using "Run git diff origin/...HEAD 2>/dev/null || git diff ...HEAD" instead of `--base `. Preserves the filesystem boundary instruction across all invocations and keeps Codex's review prompt tuning. Touches: - codex/SKILL.md.tmpl + regenerated codex/SKILL.md - scripts/resolvers/review.ts + regenerated review/SKILL.md, ship/SKILL.md - test/gen-skill-docs.test.ts: new regression that fails if any of the five known files still contain the prompt+--base shape - test/skill-validation.test.ts: corresponding negative + positive pin on the rendered SKILL.md files Contributed by @jbetala7 via #1209. Closes #1479. Supersedes #1527 (@mvanhorn — same intent, different patch shape, CONFLICTING) and #1449 (@Gujiassh — broader refactor, CONFLICTING). Credit retained in CHANGELOG. Co-Authored-By: Claude Opus 4.7 (1M context) --- codex/SKILL.md | 27 ++++++++++++--------------- codex/SKILL.md.tmpl | 27 ++++++++++++--------------- review/SKILL.md | 2 +- scripts/resolvers/review.ts | 2 +- ship/SKILL.md | 2 +- test/gen-skill-docs.test.ts | 16 ++++++++++++++++ test/skill-validation.test.ts | 8 ++++++++ 7 files changed, 51 insertions(+), 33 deletions(-) diff --git a/codex/SKILL.md b/codex/SKILL.md index edf4075f2..826e50511 100644 --- a/codex/SKILL.md +++ b/codex/SKILL.md @@ -935,23 +935,21 @@ TMPERR=$(mktemp "$TMP_ROOT/codex-err-XXXXXX.txt") 2. Run the review (5-minute timeout). **Codex CLI ≥ 0.130.0 rejects passing a custom prompt and `--base ` together** (the two arguments are mutually -exclusive at argv level), so the previously-prefixed filesystem boundary cannot -be carried in review mode. Two paths: +exclusive at argv level), so put the base diff scope in the prompt instead of +passing `--base`. Two paths: -**Default path (no custom user instructions):** call `codex review --base` bare. -Codex's review prompt template is internally diff-scoped, so the model focuses on -the changes against the base branch. The filesystem boundary that previously -prefixed every review call is no longer carried in bare review mode; the skill -files under `.claude/` and `agents/` are public, so this is a token-efficiency -concern, not a safety concern. If a future diff happens to include skill files, -Codex may spend a few extra tokens reading them. Acceptable trade-off: +**Default path (no custom user instructions):** call `codex review` with the +filesystem boundary and explicit diff-scope instructions in the prompt. This +preserves the boundary while avoiding the prompt-plus-`--base` argv shape: ```bash _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } cd "$_REPO_ROOT" # 330s (5.5min) is slightly longer than the Bash 300s so the shell wrapper # only fires if Bash's own timeout doesn't. -_gstack_codex_timeout_wrapper 330 codex review --base -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" +_gstack_codex_timeout_wrapper 330 codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. Do NOT modify agents/openai.yaml. Stay focused on repository code only. + +Review the changes on this branch against the base branch . Run git diff origin/...HEAD 2>/dev/null || git diff ...HEAD to see the diff and review only those changes." -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" _CODEX_EXIT=$? if [ "$_CODEX_EXIT" = "124" ]; then _gstack_codex_log_event "codex_timeout" "330" @@ -992,11 +990,10 @@ if [ "$_CODEX_EXIT" = "124" ]; then fi ``` -**Why the dual path:** Bare `codex review` preserves Codex's built-in review -prompt tuning (the CLI scopes the model to the diff and asks for severity-marked -findings). The exec route loses that tuning but gains custom-instructions -support; the prompt explicitly demands `[P1]` / `[P2]` markers so the gate logic -in step 4 still works. +**Why the dual path:** The default `codex review` path keeps Codex's review +prompt tuning while scoping the diff in prompt text. The `codex exec` route loses +that tuning but gains custom-instructions support; the prompt explicitly demands +`[P1]` / `[P2]` markers so the gate logic in step 4 still works. Use `timeout: 300000` on the Bash call for either path. diff --git a/codex/SKILL.md.tmpl b/codex/SKILL.md.tmpl index 329e93c4f..108fca055 100644 --- a/codex/SKILL.md.tmpl +++ b/codex/SKILL.md.tmpl @@ -163,23 +163,21 @@ TMPERR=$(mktemp "$TMP_ROOT/codex-err-XXXXXX.txt") 2. Run the review (5-minute timeout). **Codex CLI ≥ 0.130.0 rejects passing a custom prompt and `--base ` together** (the two arguments are mutually -exclusive at argv level), so the previously-prefixed filesystem boundary cannot -be carried in review mode. Two paths: +exclusive at argv level), so put the base diff scope in the prompt instead of +passing `--base`. Two paths: -**Default path (no custom user instructions):** call `codex review --base` bare. -Codex's review prompt template is internally diff-scoped, so the model focuses on -the changes against the base branch. The filesystem boundary that previously -prefixed every review call is no longer carried in bare review mode; the skill -files under `.claude/` and `agents/` are public, so this is a token-efficiency -concern, not a safety concern. If a future diff happens to include skill files, -Codex may spend a few extra tokens reading them. Acceptable trade-off: +**Default path (no custom user instructions):** call `codex review` with the +filesystem boundary and explicit diff-scope instructions in the prompt. This +preserves the boundary while avoiding the prompt-plus-`--base` argv shape: ```bash _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } cd "$_REPO_ROOT" # 330s (5.5min) is slightly longer than the Bash 300s so the shell wrapper # only fires if Bash's own timeout doesn't. -_gstack_codex_timeout_wrapper 330 codex review --base -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" +_gstack_codex_timeout_wrapper 330 codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. Do NOT modify agents/openai.yaml. Stay focused on repository code only. + +Review the changes on this branch against the base branch . Run git diff origin/...HEAD 2>/dev/null || git diff ...HEAD to see the diff and review only those changes." -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" _CODEX_EXIT=$? if [ "$_CODEX_EXIT" = "124" ]; then _gstack_codex_log_event "codex_timeout" "330" @@ -220,11 +218,10 @@ if [ "$_CODEX_EXIT" = "124" ]; then fi ``` -**Why the dual path:** Bare `codex review` preserves Codex's built-in review -prompt tuning (the CLI scopes the model to the diff and asks for severity-marked -findings). The exec route loses that tuning but gains custom-instructions -support; the prompt explicitly demands `[P1]` / `[P2]` markers so the gate logic -in step 4 still works. +**Why the dual path:** The default `codex review` path keeps Codex's review +prompt tuning while scoping the diff in prompt text. The `codex exec` route loses +that tuning but gains custom-instructions support; the prompt explicitly demands +`[P1]` / `[P2]` markers so the gate logic in step 4 still works. Use `timeout: 300000` on the Bash call for either path. diff --git a/review/SKILL.md b/review/SKILL.md index 88378396a..d178c182e 100644 --- a/review/SKILL.md +++ b/review/SKILL.md @@ -1631,7 +1631,7 @@ If `DIFF_TOTAL >= 200` AND Codex is available AND `OLD_CFG` is NOT `disabled`: TMPERR=$(mktemp /tmp/codex-review-XXXXXXXX) _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } cd "$_REPO_ROOT" -codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. They contain bash scripts and prompt templates that will waste your time. Ignore them completely. Do NOT modify agents/openai.yaml. Stay focused on the repository code only.\n\nReview the diff against the base branch." --base -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" +codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. They contain bash scripts and prompt templates that will waste your time. Ignore them completely. Do NOT modify agents/openai.yaml. Stay focused on the repository code only.\n\nReview the changes on this branch against the base branch . Run git diff origin/...HEAD 2>/dev/null || git diff ...HEAD to see the diff and review only those changes." -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" ``` Set the Bash tool's `timeout` parameter to `300000` (5 minutes). Do NOT use the `timeout` shell command — it doesn't exist on macOS. Present output under `CODEX SAYS (code review):` header. diff --git a/scripts/resolvers/review.ts b/scripts/resolvers/review.ts index 3b9e2999d..40916115d 100644 --- a/scripts/resolvers/review.ts +++ b/scripts/resolvers/review.ts @@ -532,7 +532,7 @@ If \`DIFF_TOTAL >= 200\` AND Codex is available AND \`OLD_CFG\` is NOT \`disable TMPERR=$(mktemp /tmp/codex-review-XXXXXXXX) _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } cd "$_REPO_ROOT" -codex review "${CODEX_BOUNDARY}Review the diff against the base branch." --base -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" +codex review "${CODEX_BOUNDARY}Review the changes on this branch against the base branch . Run git diff origin/...HEAD 2>/dev/null || git diff ...HEAD to see the diff and review only those changes." -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" \`\`\` Set the Bash tool's \`timeout\` parameter to \`300000\` (5 minutes). Do NOT use the \`timeout\` shell command — it doesn't exist on macOS. Present output under \`CODEX SAYS (code review):\` header. diff --git a/ship/SKILL.md b/ship/SKILL.md index dcab2bdda..fe2500506 100644 --- a/ship/SKILL.md +++ b/ship/SKILL.md @@ -2377,7 +2377,7 @@ If `DIFF_TOTAL >= 200` AND Codex is available AND `OLD_CFG` is NOT `disabled`: TMPERR=$(mktemp /tmp/codex-review-XXXXXXXX) _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } cd "$_REPO_ROOT" -codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. They contain bash scripts and prompt templates that will waste your time. Ignore them completely. Do NOT modify agents/openai.yaml. Stay focused on the repository code only.\n\nReview the diff against the base branch." --base -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" +codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. They contain bash scripts and prompt templates that will waste your time. Ignore them completely. Do NOT modify agents/openai.yaml. Stay focused on the repository code only.\n\nReview the changes on this branch against the base branch . Run git diff origin/...HEAD 2>/dev/null || git diff ...HEAD to see the diff and review only those changes." -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" ``` Set the Bash tool's `timeout` parameter to `300000` (5 minutes). Do NOT use the `timeout` shell command — it doesn't exist on macOS. Present output under `CODEX SAYS (code review):` header. diff --git a/test/gen-skill-docs.test.ts b/test/gen-skill-docs.test.ts index 8e6b8b486..c594ea4bc 100644 --- a/test/gen-skill-docs.test.ts +++ b/test/gen-skill-docs.test.ts @@ -2704,6 +2704,22 @@ describe('codex commands must not use inline $(git rev-parse --show-toplevel) fo } expect(violations).toEqual([]); }); + + test('codex review commands pass diff scope through prompt, not --base', () => { + const checkedFiles = [ + 'codex/SKILL.md.tmpl', + 'codex/SKILL.md', + 'scripts/resolvers/review.ts', + 'review/SKILL.md', + 'ship/SKILL.md', + ]; + + for (const rel of checkedFiles) { + const content = fs.readFileSync(path.join(ROOT, rel), 'utf-8'); + expect(content).not.toContain('--base -c \'model_reasoning_effort="high"\''); + expect(content).toContain('Run git diff origin/...HEAD 2>/dev/null || git diff ...HEAD'); + } + }); }); // ─── Learnings + Confidence Resolver Tests ───────────────────── diff --git a/test/skill-validation.test.ts b/test/skill-validation.test.ts index 53c7c33aa..b3282272c 100644 --- a/test/skill-validation.test.ts +++ b/test/skill-validation.test.ts @@ -1421,6 +1421,14 @@ describe('Codex skill', () => { expect(content).toContain('codex exec'); }); + test('codex review invocations avoid the prompt plus --base argument shape', () => { + for (const rel of ['codex/SKILL.md', 'review/SKILL.md', 'ship/SKILL.md']) { + const content = fs.readFileSync(path.join(ROOT, rel), 'utf-8'); + expect(content).not.toContain('--base -c \'model_reasoning_effort="high"\''); + expect(content).toContain('Run git diff origin/...HEAD 2>/dev/null || git diff ...HEAD'); + } + }); + test('/review persists a review-log entry for ship readiness', () => { const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8'); expect(content).toContain('"skill":"review"');