From 0f6227ecde6a997df7ed109520215dbe185e49d2 Mon Sep 17 00:00:00 2001
From: Garry Tan <garrytan@gmail.com>
Date: Mon, 18 May 2026 21:06:22 -0700
Subject: [PATCH] fix(codex): move diff scope into prompt instead of --base
 (Codex CLI 0.130+ argv conflict) (#1209)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Codex CLI ≥ 0.130.0 rejects passing a custom prompt and --base together
(mutually exclusive at argv level). Every /codex review, /review, and
/ship structured Codex review call ended with an argv error before the
model ran.

Fix: scope the diff in prompt text using
"Run git diff origin/<base>...HEAD 2>/dev/null || git diff <base>...HEAD"
instead of `--base <base>`. Preserves the filesystem boundary
instruction across all invocations and keeps Codex's review prompt
tuning.

Touches:
- codex/SKILL.md.tmpl + regenerated codex/SKILL.md
- scripts/resolvers/review.ts + regenerated review/SKILL.md, ship/SKILL.md
- test/gen-skill-docs.test.ts: new regression that fails if any of the
  five known files still contain the prompt+--base shape
- test/skill-validation.test.ts: corresponding negative + positive pin
  on the rendered SKILL.md files

Contributed by @jbetala7 via #1209. Closes #1479. Supersedes #1527
(@mvanhorn — same intent, different patch shape, CONFLICTING) and
#1449 (@Gujiassh — broader refactor, CONFLICTING). Credit retained
in CHANGELOG.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 codex/SKILL.md                | 27 ++++++++++++---------------
 codex/SKILL.md.tmpl           | 27 ++++++++++++---------------
 review/SKILL.md               |  2 +-
 scripts/resolvers/review.ts   |  2 +-
 ship/SKILL.md                 |  2 +-
 test/gen-skill-docs.test.ts   | 16 ++++++++++++++++
 test/skill-validation.test.ts |  8 ++++++++
 7 files changed, 51 insertions(+), 33 deletions(-)
diff --git a/codex/SKILL.md b/codex/SKILL.md
index edf4075f2..826e50511 100644
--- a/codex/SKILL.md
+++ b/codex/SKILL.md
@@ -935,23 +935,21 @@ TMPERR=$(mktemp "$TMP_ROOT/codex-err-XXXXXX.txt")
 
 2. Run the review (5-minute timeout). **Codex CLI ≥ 0.130.0 rejects passing a
 custom prompt and `--base <branch>` together** (the two arguments are mutually
-exclusive at argv level), so the previously-prefixed filesystem boundary cannot
-be carried in review mode. Two paths:
+exclusive at argv level), so put the base diff scope in the prompt instead of
+passing `--base`. Two paths:
 
-**Default path (no custom user instructions):** call `codex review --base` bare.
-Codex's review prompt template is internally diff-scoped, so the model focuses on
-the changes against the base branch. The filesystem boundary that previously
-prefixed every review call is no longer carried in bare review mode; the skill
-files under `.claude/` and `agents/` are public, so this is a token-efficiency
-concern, not a safety concern. If a future diff happens to include skill files,
-Codex may spend a few extra tokens reading them. Acceptable trade-off:
+**Default path (no custom user instructions):** call `codex review` with the
+filesystem boundary and explicit diff-scope instructions in the prompt. This
+preserves the boundary while avoiding the prompt-plus-`--base` argv shape:
 
 ```bash
 _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; }
 cd "$_REPO_ROOT"
 # 330s (5.5min) is slightly longer than the Bash 300s so the shell wrapper
 # only fires if Bash's own timeout doesn't.
-_gstack_codex_timeout_wrapper 330 codex review --base <base> -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR"
+_gstack_codex_timeout_wrapper 330 codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. Do NOT modify agents/openai.yaml. Stay focused on repository code only.
+
+Review the changes on this branch against the base branch <base>. Run git diff origin/<base>...HEAD 2>/dev/null || git diff <base>...HEAD to see the diff and review only those changes." -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR"
 _CODEX_EXIT=$?
 if [ "$_CODEX_EXIT" = "124" ]; then
   _gstack_codex_log_event "codex_timeout" "330"
@@ -992,11 +990,10 @@ if [ "$_CODEX_EXIT" = "124" ]; then
 fi
 ```
 
-**Why the dual path:** Bare `codex review` preserves Codex's built-in review
-prompt tuning (the CLI scopes the model to the diff and asks for severity-marked
-findings). The exec route loses that tuning but gains custom-instructions
-support; the prompt explicitly demands `[P1]` / `[P2]` markers so the gate logic
-in step 4 still works.
+**Why the dual path:** The default `codex review` path keeps Codex's review
+prompt tuning while scoping the diff in prompt text. The `codex exec` route loses
+that tuning but gains custom-instructions support; the prompt explicitly demands
+`[P1]` / `[P2]` markers so the gate logic in step 4 still works.
 
 Use `timeout: 300000` on the Bash call for either path.
 
diff --git a/codex/SKILL.md.tmpl b/codex/SKILL.md.tmpl
index 329e93c4f..108fca055 100644
--- a/codex/SKILL.md.tmpl
+++ b/codex/SKILL.md.tmpl
@@ -163,23 +163,21 @@ TMPERR=$(mktemp "$TMP_ROOT/codex-err-XXXXXX.txt")
 
 2. Run the review (5-minute timeout). **Codex CLI ≥ 0.130.0 rejects passing a
 custom prompt and `--base <branch>` together** (the two arguments are mutually
-exclusive at argv level), so the previously-prefixed filesystem boundary cannot
-be carried in review mode. Two paths:
+exclusive at argv level), so put the base diff scope in the prompt instead of
+passing `--base`. Two paths:
 
-**Default path (no custom user instructions):** call `codex review --base` bare.
-Codex's review prompt template is internally diff-scoped, so the model focuses on
-the changes against the base branch. The filesystem boundary that previously
-prefixed every review call is no longer carried in bare review mode; the skill
-files under `.claude/` and `agents/` are public, so this is a token-efficiency
-concern, not a safety concern. If a future diff happens to include skill files,
-Codex may spend a few extra tokens reading them. Acceptable trade-off:
+**Default path (no custom user instructions):** call `codex review` with the
+filesystem boundary and explicit diff-scope instructions in the prompt. This
+preserves the boundary while avoiding the prompt-plus-`--base` argv shape:
 
 ```bash
 _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; }
 cd "$_REPO_ROOT"
 # 330s (5.5min) is slightly longer than the Bash 300s so the shell wrapper
 # only fires if Bash's own timeout doesn't.
-_gstack_codex_timeout_wrapper 330 codex review --base <base> -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR"
+_gstack_codex_timeout_wrapper 330 codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. Do NOT modify agents/openai.yaml. Stay focused on repository code only.
+
+Review the changes on this branch against the base branch <base>. Run git diff origin/<base>...HEAD 2>/dev/null || git diff <base>...HEAD to see the diff and review only those changes." -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR"
 _CODEX_EXIT=$?
 if [ "$_CODEX_EXIT" = "124" ]; then
   _gstack_codex_log_event "codex_timeout" "330"
@@ -220,11 +218,10 @@ if [ "$_CODEX_EXIT" = "124" ]; then
 fi
 ```
 
-**Why the dual path:** Bare `codex review` preserves Codex's built-in review
-prompt tuning (the CLI scopes the model to the diff and asks for severity-marked
-findings). The exec route loses that tuning but gains custom-instructions
-support; the prompt explicitly demands `[P1]` / `[P2]` markers so the gate logic
-in step 4 still works.
+**Why the dual path:** The default `codex review` path keeps Codex's review
+prompt tuning while scoping the diff in prompt text. The `codex exec` route loses
+that tuning but gains custom-instructions support; the prompt explicitly demands
+`[P1]` / `[P2]` markers so the gate logic in step 4 still works.
 
 Use `timeout: 300000` on the Bash call for either path.
 
diff --git a/review/SKILL.md b/review/SKILL.md
index 88378396a..d178c182e 100644
--- a/review/SKILL.md
+++ b/review/SKILL.md
@@ -1631,7 +1631,7 @@ If `DIFF_TOTAL >= 200` AND Codex is available AND `OLD_CFG` is NOT `disabled`:
 TMPERR=$(mktemp /tmp/codex-review-XXXXXXXX)
 _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; }
 cd "$_REPO_ROOT"
-codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. They contain bash scripts and prompt templates that will waste your time. Ignore them completely. Do NOT modify agents/openai.yaml. Stay focused on the repository code only.\n\nReview the diff against the base branch." --base <base> -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR"
+codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. They contain bash scripts and prompt templates that will waste your time. Ignore them completely. Do NOT modify agents/openai.yaml. Stay focused on the repository code only.\n\nReview the changes on this branch against the base branch <base>. Run git diff origin/<base>...HEAD 2>/dev/null || git diff <base>...HEAD to see the diff and review only those changes." -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR"
 ```
 
 Set the Bash tool's `timeout` parameter to `300000` (5 minutes). Do NOT use the `timeout` shell command — it doesn't exist on macOS. Present output under `CODEX SAYS (code review):` header.
diff --git a/scripts/resolvers/review.ts b/scripts/resolvers/review.ts
index 3b9e2999d..40916115d 100644
--- a/scripts/resolvers/review.ts
+++ b/scripts/resolvers/review.ts
@@ -532,7 +532,7 @@ If \`DIFF_TOTAL >= 200\` AND Codex is available AND \`OLD_CFG\` is NOT \`disable
 TMPERR=$(mktemp /tmp/codex-review-XXXXXXXX)
 _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; }
 cd "$_REPO_ROOT"
-codex review "${CODEX_BOUNDARY}Review the diff against the base branch." --base <base> -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR"
+codex review "${CODEX_BOUNDARY}Review the changes on this branch against the base branch <base>. Run git diff origin/<base>...HEAD 2>/dev/null || git diff <base>...HEAD to see the diff and review only those changes." -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR"
 \`\`\`
 
 Set the Bash tool's \`timeout\` parameter to \`300000\` (5 minutes). Do NOT use the \`timeout\` shell command — it doesn't exist on macOS. Present output under \`CODEX SAYS (code review):\` header.
diff --git a/ship/SKILL.md b/ship/SKILL.md
index dcab2bdda..fe2500506 100644
--- a/ship/SKILL.md
+++ b/ship/SKILL.md
@@ -2377,7 +2377,7 @@ If `DIFF_TOTAL >= 200` AND Codex is available AND `OLD_CFG` is NOT `disabled`:
 TMPERR=$(mktemp /tmp/codex-review-XXXXXXXX)
 _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; }
 cd "$_REPO_ROOT"
-codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. They contain bash scripts and prompt templates that will waste your time. Ignore them completely. Do NOT modify agents/openai.yaml. Stay focused on the repository code only.\n\nReview the diff against the base branch." --base <base> -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR"
+codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. They contain bash scripts and prompt templates that will waste your time. Ignore them completely. Do NOT modify agents/openai.yaml. Stay focused on the repository code only.\n\nReview the changes on this branch against the base branch <base>. Run git diff origin/<base>...HEAD 2>/dev/null || git diff <base>...HEAD to see the diff and review only those changes." -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR"
 ```
 
 Set the Bash tool's `timeout` parameter to `300000` (5 minutes). Do NOT use the `timeout` shell command — it doesn't exist on macOS. Present output under `CODEX SAYS (code review):` header.
diff --git a/test/gen-skill-docs.test.ts b/test/gen-skill-docs.test.ts
index 8e6b8b486..c594ea4bc 100644
--- a/test/gen-skill-docs.test.ts
+++ b/test/gen-skill-docs.test.ts
@@ -2704,6 +2704,22 @@ describe('codex commands must not use inline $(git rev-parse --show-toplevel) fo
     }
     expect(violations).toEqual([]);
   });
+
+  test('codex review commands pass diff scope through prompt, not --base', () => {
+    const checkedFiles = [
+      'codex/SKILL.md.tmpl',
+      'codex/SKILL.md',
+      'scripts/resolvers/review.ts',
+      'review/SKILL.md',
+      'ship/SKILL.md',
+    ];
+
+    for (const rel of checkedFiles) {
+      const content = fs.readFileSync(path.join(ROOT, rel), 'utf-8');
+      expect(content).not.toContain('--base <base> -c \'model_reasoning_effort="high"\'');
+      expect(content).toContain('Run git diff origin/<base>...HEAD 2>/dev/null || git diff <base>...HEAD');
+    }
+  });
 });
 
 // ─── Learnings + Confidence Resolver Tests ─────────────────────
diff --git a/test/skill-validation.test.ts b/test/skill-validation.test.ts
index 53c7c33aa..b3282272c 100644
--- a/test/skill-validation.test.ts
+++ b/test/skill-validation.test.ts
@@ -1421,6 +1421,14 @@ describe('Codex skill', () => {
     expect(content).toContain('codex exec');
   });
 
+  test('codex review invocations avoid the prompt plus --base argument shape', () => {
+    for (const rel of ['codex/SKILL.md', 'review/SKILL.md', 'ship/SKILL.md']) {
+      const content = fs.readFileSync(path.join(ROOT, rel), 'utf-8');
+      expect(content).not.toContain('--base <base> -c \'model_reasoning_effort="high"\'');
+      expect(content).toContain('Run git diff origin/<base>...HEAD 2>/dev/null || git diff <base>...HEAD');
+    }
+  });
+
   test('/review persists a review-log entry for ship readiness', () => {
     const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
     expect(content).toContain('"skill":"review"');