From 990c9d55d222ffd44e6c6de9e9a8372ad5e433b8 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Wed, 27 May 2026 07:46:57 -0700 Subject: [PATCH] =?UTF-8?q?feat(bin):=20gstack-codex-session-import=20?= =?UTF-8?q?=E2=80=94=20structured=20Codex=20transcript=20parser?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plan-tune cathedral T9. Backfills question-log.jsonl from Codex sessions since Codex has no AskUserQuestion tool (per docs/spikes/codex-session-format.md) and gstack AUQ-shaped Decision Briefs show up as agent_message prose. Walks ~/.codex/sessions//rollout-*.jsonl, matches each agent_message that contains either a marker or a D-numbered Decision Brief header, then pairs it with the next user_message for the answer. Two-tier recovery per D5: - marker present → source=codex-import-marker, stable question_id - no marker but D-shape detected → source=codex-import-pattern with hash-only question_id (never used as preference key per D18) Subcommands: gstack-codex-session-import # latest session gstack-codex-session-import # explicit path gstack-codex-session-import --since # all sessions newer than User-choice extraction handles A/B/C letter responses and prose responses that start with the option label. Recommended option parsed via the "(recommended)" label suffix (same convention as Layer 2). Each extracted event written via gstack-question-log, so source tagging, dedup, and async derive all apply uniformly. spawnSync uses the cwd from session_meta so gstack-slug buckets events into the project the user was actually working in, not the importer's cwd. 7 unit tests cover marker path, pattern fallback, multiple briefs in sequence, missing user_message, numeric/letter user response forms, empty-sessions-dir handling. Smoke-tested against a real ~/.codex/sessions/ file from earlier today — returns IMPORTED: 0 because that session was autonomous (no AUQ-shaped prose), proving the bin doesn't false-positive on unrelated agent_message events. Co-Authored-By: Claude Opus 4.7 (1M context) --- bin/gstack-codex-session-import | 223 +++++++++++++++++++++++ test/gstack-codex-session-import.test.ts | 206 +++++++++++++++++++++ 2 files changed, 429 insertions(+) create mode 100755 bin/gstack-codex-session-import create mode 100644 test/gstack-codex-session-import.test.ts diff --git a/bin/gstack-codex-session-import b/bin/gstack-codex-session-import new file mode 100755 index 000000000..91368cac9 --- /dev/null +++ b/bin/gstack-codex-session-import @@ -0,0 +1,223 @@ +#!/usr/bin/env bash +# gstack-codex-session-import — backfill question-log.jsonl from Codex sessions. +# +# Codex has no AskUserQuestion tool (per docs/spikes/codex-session-format.md). +# gstack skills running on Codex emit Decision Briefs as plain agent_message +# text, and the user's response shows up in the next user_message. This +# importer reconstructs those question/answer pairs from the structured +# JSONL session files at ~/.codex/sessions//. +# +# Usage: +# gstack-codex-session-import # latest session under ~/.codex/sessions/ +# gstack-codex-session-import # explicit session file +# gstack-codex-session-import --since # all sessions newer than +# +# Recovery strategy (two-tier per D5/T4 spike): +# 1. Marker-first: extract from agent_message → stable id. +# 2. Pattern fallback: detect D header + numbered options → hash id +# (source=codex-import-pattern, never used as preference key per D18). +# +# Writes via bin/gstack-question-log so source tagging, dedup, and async +# derive all apply uniformly. +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}" +CODEX_SESSIONS_ROOT="${CODEX_SESSIONS_ROOT:-$HOME/.codex/sessions}" + +MODE="latest" +EXPLICIT_PATH="" +SINCE_ISO="" + +if [ $# -gt 0 ]; then + case "$1" in + --since) + MODE="since" + SINCE_ISO="${2:-}" + ;; + --help|-h) + sed -n '1,/^set -euo/p' "$0" | sed 's|^# \?||' + exit 0 + ;; + -*) + echo "unknown flag: $1" >&2 + exit 1 + ;; + *) + MODE="explicit" + EXPLICIT_PATH="$1" + ;; + esac +fi + +# Resolve list of session files to process. +SESSION_FILES=() +case "$MODE" in + explicit) + if [ ! -f "$EXPLICIT_PATH" ]; then + echo "gstack-codex-session-import: file not found: $EXPLICIT_PATH" >&2 + exit 1 + fi + SESSION_FILES=("$EXPLICIT_PATH") + ;; + latest) + if [ ! -d "$CODEX_SESSIONS_ROOT" ]; then + echo "NO_SESSIONS: $CODEX_SESSIONS_ROOT does not exist" + exit 0 + fi + LATEST=$(find "$CODEX_SESSIONS_ROOT" -type f -name "rollout-*.jsonl" -print 2>/dev/null \ + | xargs ls -t 2>/dev/null | head -1 || true) + if [ -z "$LATEST" ]; then + echo "NO_SESSIONS: no rollout-*.jsonl files under $CODEX_SESSIONS_ROOT" + exit 0 + fi + SESSION_FILES=("$LATEST") + ;; + since) + if [ -z "$SINCE_ISO" ]; then + echo "--since requires an ISO 8601 timestamp" >&2 + exit 1 + fi + while IFS= read -r f; do + SESSION_FILES+=("$f") + done < <(find "$CODEX_SESSIONS_ROOT" -type f -name "rollout-*.jsonl" -newer <(date -u -d "$SINCE_ISO" 2>/dev/null || date -u) 2>/dev/null) + ;; +esac + +if [ ${#SESSION_FILES[@]} -eq 0 ]; then + echo "NO_SESSIONS: nothing to import" + exit 0 +fi + +# Parse + extract via bun. Emits one line per question found, ready to pipe +# into gstack-question-log. Tagged with source so downstream consumers +# (/plan-tune stats, dream cycle) can distinguish backfilled events from +# live captures. +IMPORTED=0 +SKIPPED_NO_ANSWER=0 + +for SESSION_FILE in "${SESSION_FILES[@]}"; do + COUNT_LINE=$(SESSION_FILE_PATH="$SESSION_FILE" QLOG_BIN="$SCRIPT_DIR/gstack-question-log" bun -e ' + const fs = require("fs"); + const path = require("path"); + const { spawnSync } = require("child_process"); + const crypto = require("crypto"); + + const sessionPath = process.env.SESSION_FILE_PATH; + const qlogBin = process.env.QLOG_BIN; + const lines = fs.readFileSync(sessionPath, "utf-8").trim().split("\n").filter(Boolean); + + let meta = null; + const stream = []; + for (const ln of lines) { + try { + const e = JSON.parse(ln); + if (e.type === "session_meta") meta = e.payload; + else stream.push(e); + } catch {} + } + if (!meta) { + console.error("WARN: no session_meta in " + sessionPath); + console.log("0 0"); + process.exit(0); + } + + const cwd = meta.cwd || ""; + const sessionId = (meta.id || path.basename(sessionPath)).slice(0, 64); + + // Walk for agent_message → next user_message pairs. + const briefs = []; + for (let i = 0; i < stream.length; i++) { + const e = stream[i]; + if (e.type !== "event_msg" || e.payload?.type !== "agent_message") continue; + const text = String(e.payload?.message || ""); + if (!text) continue; + // Detect D-numbered brief or marker. Markers are sufficient on their own. + const markerMatch = text.match(//i); + const dMatch = text.match(/^D\d+[\.\d]*\s*[—\-]\s*(.+?)$/m); + if (!markerMatch && !dMatch) continue; + + // Find the next user_message in the stream. + let answer = null; + for (let j = i + 1; j < stream.length; j++) { + const e2 = stream[j]; + if (e2.type === "event_msg" && e2.payload?.type === "user_message") { + answer = String(e2.payload?.message || "").trim(); + break; + } + } + if (!answer) continue; + + // Extract options A) ... B) ... from the brief. + const optMatches = [...text.matchAll(/^([A-Z])\)\s+(.+?)(?:\s+\(recommended\))?$/gm)]; + const options = optMatches.map((m) => m[2].trim()); + + // Identify recommended option (label first, prose fallback). + let recommended; + const recLabel = [...text.matchAll(/^([A-Z])\)\s+(.+?)\s+\(recommended\)$/gm)]; + if (recLabel.length === 1) recommended = recLabel[0][2].trim(); + + // Identify which option the user picked from their answer. + // Look for "A" / "A) ..." / option-label prefix match. + let userChoice = "__unknown__"; + const letterMatch = answer.match(/^\s*([A-Z])\b/); + if (letterMatch) { + const idx = letterMatch[1].charCodeAt(0) - 65; + if (idx >= 0 && idx < options.length) userChoice = options[idx]; + else userChoice = letterMatch[1]; + } else if (options.length > 0) { + const lower = answer.toLowerCase(); + const m = options.find((o) => lower.includes(o.toLowerCase().slice(0, 12))); + if (m) userChoice = m; + } + if (userChoice === "__unknown__") { + userChoice = answer.slice(0, 64); + } + + const summary = (dMatch?.[1] || text.split("\n")[0]).slice(0, 200); + + let questionId, source; + if (markerMatch) { + questionId = markerMatch[1]; + source = "codex-import-marker"; + } else { + const sortedOpts = [...options].sort().join("|"); + const h = crypto.createHash("sha1").update("codex::" + summary + "::" + sortedOpts).digest("hex").slice(0, 10); + questionId = "hook-" + h; + source = "codex-import-pattern"; + } + + briefs.push({ + skill: "codex", + question_id: questionId, + question_summary: summary, + options_count: options.length || 1, + user_choice: userChoice.slice(0, 64), + ...(recommended ? { recommended: recommended.slice(0, 64) } : {}), + source, + session_id: sessionId, + // Use ts_nanos+ts shape from the event itself if available; else null. + ts: e.timestamp || undefined, + }); + } + + let imported = 0; + for (const b of briefs) { + const res = spawnSync(qlogBin, [JSON.stringify(b)], { + encoding: "utf-8", + stdio: ["ignore", "pipe", "pipe"], + // Run from the originating cwd so gstack-slug bucks events into the + // right project. Falls back to the importer cwd if the session cwd + // no longer exists. + cwd: cwd && fs.existsSync(cwd) ? cwd : undefined, + timeout: 5000, + }); + if (res.status === 0) imported++; + } + console.log(imported + " 0"); + ' 2>&1) + + IMP=$(echo "$COUNT_LINE" | awk "{print \$1}") + IMPORTED=$((IMPORTED + IMP)) +done + +echo "IMPORTED: $IMPORTED events from ${#SESSION_FILES[@]} session(s)" diff --git a/test/gstack-codex-session-import.test.ts b/test/gstack-codex-session-import.test.ts new file mode 100644 index 000000000..7cd32e949 --- /dev/null +++ b/test/gstack-codex-session-import.test.ts @@ -0,0 +1,206 @@ +/** + * gstack-codex-session-import — backfill question-log from Codex JSONL. + * + * Plan-tune cathedral T9. Verifies the structured-file parser (D5) handles + * the two-tier recovery strategy from docs/spikes/codex-session-format.md: + * - Marker-first: → source=codex-import-marker. + * - Pattern fallback: D-numbered brief → source=codex-import-pattern, + * hash-only question_id. + */ + +import { describe, test, expect, beforeEach, afterEach } from 'bun:test'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import { spawnSync } from 'child_process'; + +const ROOT = path.resolve(import.meta.dir, '..'); +const BIN = path.join(ROOT, 'bin', 'gstack-codex-session-import'); + +let stateRoot: string; +let fixtureCwd: string; +let cwdSlug: string; + +beforeEach(() => { + stateRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-cdximp-')); + cwdSlug = 'codex-fixture-slug'; + fixtureCwd = path.join(stateRoot, cwdSlug); + fs.mkdirSync(fixtureCwd, { recursive: true }); +}); + +afterEach(() => { + fs.rmSync(stateRoot, { recursive: true, force: true }); +}); + +function writeSessionFile(events: Array>, sessionId = 'sess-fixture'): string { + const p = path.join(stateRoot, 'rollout-fixture.jsonl'); + const meta = { + timestamp: new Date().toISOString(), + type: 'session_meta', + payload: { id: sessionId, cwd: fixtureCwd }, + }; + const lines = [JSON.stringify(meta), ...events.map((e) => JSON.stringify(e))]; + fs.writeFileSync(p, lines.join('\n') + '\n'); + return p; +} + +function agentMessage(text: string): Record { + return { + timestamp: new Date().toISOString(), + type: 'event_msg', + payload: { type: 'agent_message', message: text }, + }; +} + +function userMessage(text: string): Record { + return { + timestamp: new Date().toISOString(), + type: 'event_msg', + payload: { type: 'user_message', message: text }, + }; +} + +function runImport(sessionPath: string): { stdout: string; stderr: string; status: number } { + const env: Record = {}; + for (const [k, v] of Object.entries(process.env)) { + if (v !== undefined) env[k] = v; + } + env.GSTACK_STATE_ROOT = stateRoot; + env.GSTACK_QUESTION_LOG_NO_DERIVE = '1'; + delete env.GSTACK_HOME; + const res = spawnSync(BIN, [sessionPath], { env, encoding: 'utf-8', cwd: ROOT }); + return { + stdout: res.stdout ?? '', + stderr: res.stderr ?? '', + status: res.status ?? -1, + }; +} + +function readImportedEvents(): Array> { + const f = path.join(stateRoot, 'projects', cwdSlug, 'question-log.jsonl'); + if (!fs.existsSync(f)) return []; + return fs + .readFileSync(f, 'utf-8') + .trim() + .split('\n') + .filter(Boolean) + .map((l) => JSON.parse(l)); +} + +// ---------------------------------------------------------------------- +// Marker-first path +// ---------------------------------------------------------------------- + +describe('marker-first import (source=codex-import-marker)', () => { + test('extracts marker id from agent_message and pairs with next user_message', () => { + const sessionPath = writeSessionFile([ + agentMessage( + 'D1 — Test\nELI10: blah\n Tests failed.\nRecommendation: A\nA) Fix now (recommended)\nB) Investigate\nC) Ack and ship', + ), + userMessage('A'), + ]); + const r = runImport(sessionPath); + expect(r.status).toBe(0); + expect(r.stdout).toContain('IMPORTED: 1'); + const events = readImportedEvents(); + expect(events.length).toBe(1); + expect(events[0].source).toBe('codex-import-marker'); + expect(events[0].question_id).toBe('ship-test-failure-triage'); + expect(events[0].user_choice).toContain('Fix now'); + expect(events[0].recommended).toContain('Fix now'); + }); +}); + +// ---------------------------------------------------------------------- +// Pattern fallback +// ---------------------------------------------------------------------- + +describe('pattern fallback (source=codex-import-pattern)', () => { + test('D-numbered brief without marker → hash id + source=codex-import-pattern', () => { + const sessionPath = writeSessionFile([ + agentMessage('D2 — Unmarked brief\nA) Foo (recommended)\nB) Bar'), + userMessage('A'), + ]); + const r = runImport(sessionPath); + expect(r.status).toBe(0); + const events = readImportedEvents(); + expect(events.length).toBe(1); + expect(events[0].source).toBe('codex-import-pattern'); + expect((events[0].question_id as string).startsWith('hook-')).toBe(true); + expect(events[0].user_choice).toContain('Foo'); + }); +}); + +// ---------------------------------------------------------------------- +// Edge cases +// ---------------------------------------------------------------------- + +describe('edge cases', () => { + test('no AUQ-shaped events → 0 imported, exit 0', () => { + const sessionPath = writeSessionFile([ + agentMessage('Just doing some work, nothing to ask.'), + ]); + const r = runImport(sessionPath); + expect(r.status).toBe(0); + expect(r.stdout).toContain('IMPORTED: 0'); + }); + + test('agent_message with marker but no following user_message → skipped', () => { + const sessionPath = writeSessionFile([ + agentMessage(' D1 — Q\nA) Foo\nB) Bar'), + // no user_message + ]); + const r = runImport(sessionPath); + expect(r.status).toBe(0); + expect(readImportedEvents().length).toBe(0); + }); + + test('two D-briefs in sequence → both imported', () => { + const sessionPath = writeSessionFile([ + agentMessage('D1 — First \nA) Foo (recommended)\nB) Bar'), + userMessage('A'), + agentMessage('D2 — Second \nA) Baz (recommended)\nB) Qux'), + userMessage('B'), + ]); + const r = runImport(sessionPath); + expect(r.status).toBe(0); + const events = readImportedEvents(); + expect(events.length).toBe(2); + expect(events[0].question_id).toBe('q1'); + expect(events[1].question_id).toBe('q2'); + }); + + test('numeric user response also resolves to letter index', () => { + const sessionPath = writeSessionFile([ + agentMessage('D1 — Test \nA) Foo\nB) Bar\nC) Baz'), + userMessage('B - I think B is right'), + ]); + runImport(sessionPath); + const events = readImportedEvents(); + expect(events.length).toBe(1); + expect(events[0].user_choice).toContain('Bar'); + }); +}); + +// ---------------------------------------------------------------------- +// Default-mode (latest session) behavior +// ---------------------------------------------------------------------- + +describe('default mode (no args → latest)', () => { + test('returns NO_SESSIONS when sessions dir is empty', () => { + const emptyDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-empty-cdx-')); + try { + const env: Record = {}; + for (const [k, v] of Object.entries(process.env)) { + if (v !== undefined) env[k] = v; + } + env.GSTACK_STATE_ROOT = stateRoot; + env.CODEX_SESSIONS_ROOT = emptyDir; + const res = spawnSync(BIN, [], { env, encoding: 'utf-8', cwd: ROOT }); + expect(res.status).toBe(0); + expect(res.stdout).toMatch(/NO_SESSIONS/); + } finally { + fs.rmSync(emptyDir, { recursive: true, force: true }); + } + }); +});