diff --git a/codex/SKILL.md b/codex/SKILL.md index d74a52588..dbc6bbcb6 100644 --- a/codex/SKILL.md +++ b/codex/SKILL.md @@ -955,6 +955,13 @@ if [ "$_CODEX_EXIT" = "124" ]; then _gstack_codex_log_event "codex_timeout" "330" _gstack_codex_log_hang "review" "$(wc -c < "$TMPERR" 2>/dev/null || echo 0)" echo "Codex stalled past 5.5 minutes. Common causes: model API stall, long prompt, network issue. Try re-running. If persistent, split the prompt or check ~/.codex/logs/." +elif [ "$_CODEX_EXIT" != "0" ]; then + # Surface non-zero exits (parse errors, arg-shape breaks, etc.) so the + # calling agent doesn't read "no output" as a silent model/API stall and + # burn 30-60min misdiagnosing it. See #1327. + echo "[codex exit $_CODEX_EXIT] $(head -1 "$TMPERR" 2>/dev/null || echo "no stderr captured")" + head -20 "$TMPERR" 2>/dev/null | sed 's/^/ /' || true + _gstack_codex_log_event "codex_nonzero_exit" "review:$_CODEX_EXIT" fi ``` @@ -1245,6 +1252,12 @@ if [ "$_CODEX_EXIT" = "124" ]; then _gstack_codex_log_event "codex_timeout" "600" _gstack_codex_log_hang "challenge" "$(wc -c < "$TMPERR" 2>/dev/null || echo 0)" echo "Codex stalled past 10 minutes. Common causes: model API stall, long prompt, network issue. Try re-running. If persistent, split the prompt or check ~/.codex/logs/." +elif [ "$_CODEX_EXIT" != "0" ]; then + # Surface non-zero exits so the calling agent doesn't read "no output" as + # a silent model/API stall. See #1327. + echo "[codex exit $_CODEX_EXIT] $(head -1 "$TMPERR" 2>/dev/null || echo "no stderr captured")" + head -20 "$TMPERR" 2>/dev/null | sed 's/^/ /' || true + _gstack_codex_log_event "codex_nonzero_exit" "challenge:$_CODEX_EXIT" fi # Fix 2: surface auth errors from captured stderr instead of dropping them if grep -qiE "auth|login|unauthorized" "$TMPERR" 2>/dev/null; then @@ -1392,6 +1405,12 @@ if [ "$_CODEX_EXIT" = "124" ]; then _gstack_codex_log_event "codex_timeout" "600" _gstack_codex_log_hang "consult" "$(wc -c < "$TMPERR" 2>/dev/null || echo 0)" echo "Codex stalled past 10 minutes. Common causes: model API stall, long prompt, network issue. Try re-running. If persistent, split the prompt or check ~/.codex/logs/." +elif [ "$_CODEX_EXIT" != "0" ]; then + # Surface non-zero exits so the calling agent doesn't read "no output" as + # a silent model/API stall. See #1327. + echo "[codex exit $_CODEX_EXIT] $(head -1 "$TMPERR" 2>/dev/null || echo "no stderr captured")" + head -20 "$TMPERR" 2>/dev/null | sed 's/^/ /' || true + _gstack_codex_log_event "codex_nonzero_exit" "consult:$_CODEX_EXIT" fi ``` @@ -1414,6 +1433,12 @@ if [ "$_CODEX_EXIT" = "124" ]; then _gstack_codex_log_event "codex_timeout" "600" _gstack_codex_log_hang "consult-resume" "$(wc -c < "$TMPERR" 2>/dev/null || echo 0)" echo "Codex stalled past 10 minutes. Common causes: model API stall, long prompt, network issue. Try re-running. If persistent, split the prompt or check ~/.codex/logs/." +elif [ "$_CODEX_EXIT" != "0" ]; then + # Surface non-zero exits so the calling agent doesn't read "no output" as + # a silent model/API stall. See #1327. + echo "[codex exit $_CODEX_EXIT] $(head -1 "$TMPERR" 2>/dev/null || echo "no stderr captured")" + head -20 "$TMPERR" 2>/dev/null | sed 's/^/ /' || true + _gstack_codex_log_event "codex_nonzero_exit" "consult-resume:$_CODEX_EXIT" fi 5. Capture session ID from the streamed output. The parser prints `SESSION_ID:` diff --git a/codex/SKILL.md.tmpl b/codex/SKILL.md.tmpl index 6cda8a5a2..333de7d8d 100644 --- a/codex/SKILL.md.tmpl +++ b/codex/SKILL.md.tmpl @@ -183,6 +183,13 @@ if [ "$_CODEX_EXIT" = "124" ]; then _gstack_codex_log_event "codex_timeout" "330" _gstack_codex_log_hang "review" "$(wc -c < "$TMPERR" 2>/dev/null || echo 0)" echo "Codex stalled past 5.5 minutes. Common causes: model API stall, long prompt, network issue. Try re-running. If persistent, split the prompt or check ~/.codex/logs/." +elif [ "$_CODEX_EXIT" != "0" ]; then + # Surface non-zero exits (parse errors, arg-shape breaks, etc.) so the + # calling agent doesn't read "no output" as a silent model/API stall and + # burn 30-60min misdiagnosing it. See #1327. + echo "[codex exit $_CODEX_EXIT] $(head -1 "$TMPERR" 2>/dev/null || echo "no stderr captured")" + head -20 "$TMPERR" 2>/dev/null | sed 's/^/ /' || true + _gstack_codex_log_event "codex_nonzero_exit" "review:$_CODEX_EXIT" fi ``` @@ -366,6 +373,12 @@ if [ "$_CODEX_EXIT" = "124" ]; then _gstack_codex_log_event "codex_timeout" "600" _gstack_codex_log_hang "challenge" "$(wc -c < "$TMPERR" 2>/dev/null || echo 0)" echo "Codex stalled past 10 minutes. Common causes: model API stall, long prompt, network issue. Try re-running. If persistent, split the prompt or check ~/.codex/logs/." +elif [ "$_CODEX_EXIT" != "0" ]; then + # Surface non-zero exits so the calling agent doesn't read "no output" as + # a silent model/API stall. See #1327. + echo "[codex exit $_CODEX_EXIT] $(head -1 "$TMPERR" 2>/dev/null || echo "no stderr captured")" + head -20 "$TMPERR" 2>/dev/null | sed 's/^/ /' || true + _gstack_codex_log_event "codex_nonzero_exit" "challenge:$_CODEX_EXIT" fi # Fix 2: surface auth errors from captured stderr instead of dropping them if grep -qiE "auth|login|unauthorized" "$TMPERR" 2>/dev/null; then @@ -513,6 +526,12 @@ if [ "$_CODEX_EXIT" = "124" ]; then _gstack_codex_log_event "codex_timeout" "600" _gstack_codex_log_hang "consult" "$(wc -c < "$TMPERR" 2>/dev/null || echo 0)" echo "Codex stalled past 10 minutes. Common causes: model API stall, long prompt, network issue. Try re-running. If persistent, split the prompt or check ~/.codex/logs/." +elif [ "$_CODEX_EXIT" != "0" ]; then + # Surface non-zero exits so the calling agent doesn't read "no output" as + # a silent model/API stall. See #1327. + echo "[codex exit $_CODEX_EXIT] $(head -1 "$TMPERR" 2>/dev/null || echo "no stderr captured")" + head -20 "$TMPERR" 2>/dev/null | sed 's/^/ /' || true + _gstack_codex_log_event "codex_nonzero_exit" "consult:$_CODEX_EXIT" fi ``` @@ -535,6 +554,12 @@ if [ "$_CODEX_EXIT" = "124" ]; then _gstack_codex_log_event "codex_timeout" "600" _gstack_codex_log_hang "consult-resume" "$(wc -c < "$TMPERR" 2>/dev/null || echo 0)" echo "Codex stalled past 10 minutes. Common causes: model API stall, long prompt, network issue. Try re-running. If persistent, split the prompt or check ~/.codex/logs/." +elif [ "$_CODEX_EXIT" != "0" ]; then + # Surface non-zero exits so the calling agent doesn't read "no output" as + # a silent model/API stall. See #1327. + echo "[codex exit $_CODEX_EXIT] $(head -1 "$TMPERR" 2>/dev/null || echo "no stderr captured")" + head -20 "$TMPERR" 2>/dev/null | sed 's/^/ /' || true + _gstack_codex_log_event "codex_nonzero_exit" "consult-resume:$_CODEX_EXIT" fi 5. Capture session ID from the streamed output. The parser prints `SESSION_ID:`