diff --git a/bin/gstack-distill-free-text b/bin/gstack-distill-free-text new file mode 100755 index 000000000..27fb303da --- /dev/null +++ b/bin/gstack-distill-free-text @@ -0,0 +1,286 @@ +#!/usr/bin/env bash +# gstack-distill-free-text — Layer 8 "dream cycle" batch distiller. +# +# Reads auq-other free-text events from this project's question-log.jsonl, +# sends them to Claude via the Anthropic SDK, and writes structured proposals +# the user can review via /plan-tune distill. Proposals require explicit +# user Y before applying — never autonomous (Codex #15 trust boundary). +# +# Usage: +# gstack-distill-free-text # sync, prompts at end +# gstack-distill-free-text --background # spawn detached; results +# # surface on next /plan-tune +# gstack-distill-free-text --dry-run # show prompt, no API call +# gstack-distill-free-text --status # show last-run stats +# +# Per D7 cathedral cap: max 3 distills/day per slug. Cumulative cost log +# appended to $GSTACK_STATE_ROOT/distill-cost.jsonl. +# Per D6: Anthropic SDK direct call, fail-loud on missing ANTHROPIC_API_KEY. +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}" +eval "$("$SCRIPT_DIR/gstack-slug" 2>/dev/null || true)" +SLUG="${SLUG:-unknown}" +PROJECT_DIR="$GSTACK_HOME/projects/$SLUG" +LOG_FILE="$PROJECT_DIR/question-log.jsonl" +PROPOSAL_FILE="$PROJECT_DIR/distillation-proposals.json" +COST_LOG="$GSTACK_HOME/distill-cost.jsonl" +mkdir -p "$PROJECT_DIR" + +MODE="sync" +case "${1:-}" in + --background) MODE="background" ;; + --dry-run) MODE="dry-run" ;; + --status) MODE="status" ;; + --help|-h) + sed -n '1,/^set -euo/p' "$0" | sed 's|^# \?||' + exit 0 + ;; + '') ;; + *) echo "unknown arg: $1" >&2; exit 1 ;; +esac + +# --- Status subcommand -------------------------------------------------- + +if [ "$MODE" = "status" ]; then + COST_LOG_PATH="$COST_LOG" SLUG_PATH="$SLUG" bun -e ' + const fs = require("fs"); + const slug = process.env.SLUG_PATH; + const path = process.env.COST_LOG_PATH; + if (!fs.existsSync(path)) { console.log("no distill runs yet"); process.exit(0); } + const lines = fs.readFileSync(path, "utf-8").trim().split("\n").filter(Boolean); + const mine = lines.map((l) => JSON.parse(l)).filter((e) => e.slug === slug); + if (mine.length === 0) { console.log("no distill runs yet for slug=" + slug); process.exit(0); } + const totalUsd = mine.reduce((a, e) => a + (e.cost_usd_est || 0), 0); + const todayIso = new Date().toISOString().slice(0, 10); + const today = mine.filter((e) => (e.ts || "").startsWith(todayIso)); + console.log("RUNS: " + mine.length); + console.log("TODAY: " + today.length + " / 3"); + console.log("ESTIMATED_TOTAL_USD: $" + totalUsd.toFixed(4)); + const last = mine[mine.length - 1]; + console.log("LAST_RUN: " + (last.ts || "?") + " | " + (last.proposals_count || 0) + " proposals"); + ' + exit 0 +fi + +# --- Background mode: detach + invoke self synchronously --------------- + +if [ "$MODE" = "background" ]; then + nohup "$0" >/dev/null 2>&1 & + echo "DISTILL_SPAWNED: pid=$!" + exit 0 +fi + +# --- Rate cap check (D7: max 3/day per slug) --------------------------- + +DAILY_COUNT=$(COST_LOG_PATH="$COST_LOG" SLUG_PATH="$SLUG" bun -e ' + const fs = require("fs"); + const slug = process.env.SLUG_PATH; + const path = process.env.COST_LOG_PATH; + if (!fs.existsSync(path)) { console.log("0"); process.exit(0); } + const todayIso = new Date().toISOString().slice(0, 10); + const lines = fs.readFileSync(path, "utf-8").trim().split("\n").filter(Boolean); + const n = lines + .map((l) => { try { return JSON.parse(l); } catch { return null; } }) + .filter((e) => e && e.slug === slug && (e.ts || "").startsWith(todayIso)) + .length; + console.log(String(n)); +') + +if [ "$DAILY_COUNT" -ge 3 ] 2>/dev/null; then + echo "RATE_CAPPED: $DAILY_COUNT distills today (3/day limit). Use --status for run history." + exit 0 +fi + +# --- Gather unprocessed auq-other events from this project ------------- + +if [ ! -f "$LOG_FILE" ]; then + echo "NO_LOG: no question-log.jsonl in $PROJECT_DIR" + exit 0 +fi + +EVENTS_JSON=$(LOG_FILE_PATH="$LOG_FILE" bun -e ' + const fs = require("fs"); + const lines = fs.readFileSync(process.env.LOG_FILE_PATH, "utf-8").trim().split("\n").filter(Boolean); + const out = []; + for (const l of lines) { + try { + const e = JSON.parse(l); + if (e.source === "auq-other" && !e.distilled_at && e.free_text) { + out.push({ + ts: e.ts, + question_id: e.question_id, + question_summary: e.question_summary, + free_text: e.free_text, + session_id: e.session_id, + }); + } + } catch {} + } + process.stdout.write(JSON.stringify(out)); +') + +EVENT_COUNT=$(printf '%s' "$EVENTS_JSON" | bun -e 'const a = JSON.parse(await Bun.stdin.text()); console.log(a.length);') +if [ "$EVENT_COUNT" -eq 0 ]; then + echo "NO_FREE_TEXT: nothing to distill" + exit 0 +fi + +# --- Build distill prompt --------------------------------------------- + +# Heredoc into temp file (avoids $(cat <<'PROMPT'...) which choked the +# bash parser on apostrophes elsewhere in the script). +DISTILL_PROMPT_FILE=$(mktemp) +trap 'rm -f "$DISTILL_PROMPT_FILE"' EXIT +cat > "$DISTILL_PROMPT_FILE" <<'PROMPT' +You are gstack dream-cycle distiller. Below are free-text responses the +user typed into AskUserQuestion prompts (option "Other") across recent gstack +sessions. For each response, extract structured signal that should update the +user plan-tune profile or preferences. + +Return strict JSON with this shape: +{ + "proposals": [ + { + "kind": "preference" | "declared-nudge" | "memory-nugget", + "confidence": 0.0-1.0, + "source_quotes": ["", ""], + "question_id": "", + "preference": "never-ask" | "always-ask" | "ask-only-for-one-way", + "dimension": "scope_appetite | risk_tolerance | detail_preference | autonomy | architecture_care", + "direction": "up | down", + "magnitude": "small | medium | large", + "rationale": "", + "nugget": "", + "applies_to_signal_keys": ["scope-appetite", "..."] + } + ] +} + +Rules: +- Reject any proposal where confidence < 0.7. +- Quote VERBATIM from the user free_text. Never paraphrase a source quote. +- A single user response may produce multiple proposals. +- If nothing meaningful to extract, return {"proposals": []}. +- No commentary outside the JSON. +PROMPT +DISTILL_PROMPT=$(cat "$DISTILL_PROMPT_FILE") + +# --- Dry-run: emit prompt + events, exit ------------------------------ + +if [ "$MODE" = "dry-run" ]; then + echo "=== DISTILL PROMPT ===" + echo "$DISTILL_PROMPT" + echo + echo "=== EVENTS ($EVENT_COUNT) ===" + echo "$EVENTS_JSON" | bun -e 'console.log(JSON.stringify(JSON.parse(await Bun.stdin.text()), null, 2));' + exit 0 +fi + +# --- SDK call: fail-loud on missing key ------------------------------- + +if [ -z "${ANTHROPIC_API_KEY:-}" ]; then + cat <&2 +gstack-distill-free-text: ANTHROPIC_API_KEY not set. + +Dream-cycle distillation needs an API key for the SDK call. Set +ANTHROPIC_API_KEY in your environment, or run with --dry-run to see +what would be sent without actually calling. + +Note: this is a separate billing/auth surface from your interactive +Claude Code session (per Codex correction in D6). +EOF + exit 1 +fi + +# Run the SDK call in bun. Emits JSON: {proposals_count, cost_usd_est}. +RESULT=$(EVENTS_JSON="$EVENTS_JSON" DISTILL_PROMPT="$DISTILL_PROMPT" \ + PROPOSAL_FILE_PATH="$PROPOSAL_FILE" LOG_FILE_PATH="$LOG_FILE" \ + ANTHROPIC_API_KEY="$ANTHROPIC_API_KEY" \ + bun --cwd "$ROOT_DIR" -e ' + const fs = require("fs"); + const Anthropic = require("@anthropic-ai/sdk").default; + const client = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY }); + + const events = JSON.parse(process.env.EVENTS_JSON); + const prompt = process.env.DISTILL_PROMPT + "\n\nFREE-TEXT RESPONSES (JSON array):\n" + JSON.stringify(events, null, 2); + + // Pricing (Haiku 4.5 — cheap, fast, sufficient for structured extraction). + // Per token, USD: input $0.001/1k = 1e-6, output $0.005/1k = 5e-6. + const INPUT_PER_TOKEN = 1e-6; + const OUTPUT_PER_TOKEN = 5e-6; + + const resp = await client.messages.create({ + model: "claude-haiku-4-5-20251001", + max_tokens: 4096, + messages: [{ role: "user", content: prompt }], + }); + + const text = resp.content.map((b) => (b.type === "text" ? b.text : "")).join(""); + + // Strip optional fenced code blocks the model may wrap JSON in. + const stripped = text.replace(/^```(?:json)?\s*/i, "").replace(/```\s*$/i, "").trim(); + let parsed; + try { parsed = JSON.parse(stripped); } catch (e) { + process.stderr.write("DISTILL: model returned non-JSON: " + text.slice(0, 200) + "\n"); + process.exit(1); + } + + const proposals = Array.isArray(parsed.proposals) ? parsed.proposals : []; + // Keep only proposals with confidence >= 0.7 (model is told this rule; + // double-check in case it slipped). + const filtered = proposals.filter((p) => typeof p.confidence === "number" && p.confidence >= 0.7); + + // Write proposals file (overwrite — only the latest run is reviewable). + fs.writeFileSync(process.env.PROPOSAL_FILE_PATH, JSON.stringify({ + generated_at: new Date().toISOString(), + source_event_count: events.length, + proposals: filtered, + }, null, 2)); + + // Mark source events as distilled_at so they do not re-propose. + // Update question-log.jsonl in place: read all, rewrite with distilled_at + // set on the matching events. Match by ts + question_id. + const logPath = process.env.LOG_FILE_PATH; + const distilledAt = new Date().toISOString(); + const matchKeys = new Set(events.map((e) => (e.ts || "") + "::" + (e.question_id || ""))); + const lines = fs.readFileSync(logPath, "utf-8").split("\n"); + const out = []; + for (const ln of lines) { + if (!ln.trim()) { out.push(ln); continue; } + try { + const e = JSON.parse(ln); + const key = (e.ts || "") + "::" + (e.question_id || ""); + if (matchKeys.has(key)) { + e.distilled_at = distilledAt; + out.push(JSON.stringify(e)); + } else { + out.push(ln); + } + } catch { out.push(ln); } + } + fs.writeFileSync(logPath, out.join("\n")); + + // Cost estimate from usage tokens. + const usage = resp.usage || {}; + const inTok = usage.input_tokens || 0; + const outTok = usage.output_tokens || 0; + const cost = inTok * INPUT_PER_TOKEN + outTok * OUTPUT_PER_TOKEN; + + process.stdout.write(JSON.stringify({ + proposals_count: filtered.length, + rejected_low_confidence: proposals.length - filtered.length, + input_tokens: inTok, + output_tokens: outTok, + cost_usd_est: cost, + })); +') + +# Append cost log line. +TS=$(date -u +%Y-%m-%dT%H:%M:%SZ) +echo "{\"ts\":\"$TS\",\"slug\":\"$SLUG\",$(echo "$RESULT" | sed 's/^{//; s/}$//')}" >> "$COST_LOG" + +echo "DISTILL_COMPLETE:" +echo " proposals_file: $PROPOSAL_FILE" +echo " $RESULT" diff --git a/test/distill-free-text.test.ts b/test/distill-free-text.test.ts new file mode 100644 index 000000000..3e1f69d29 --- /dev/null +++ b/test/distill-free-text.test.ts @@ -0,0 +1,227 @@ +/** + * gstack-distill-free-text — Layer 8 dream cycle (plan-tune cathedral T10). + * + * Covers the SDK-free paths: status, dry-run, rate cap, no-event handling. + * The real API call path is exercised by the E2E test in T16; here we + * verify the bin's deterministic plumbing without burning tokens. + */ + +import { describe, test, expect, beforeEach, afterEach } from 'bun:test'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import { spawnSync } from 'child_process'; + +const ROOT = path.resolve(import.meta.dir, '..'); +const BIN = path.join(ROOT, 'bin', 'gstack-distill-free-text'); +const QLOG_BIN = path.join(ROOT, 'bin', 'gstack-question-log'); + +let stateRoot: string; +let fixtureCwd: string; +let cwdSlug: string; + +beforeEach(() => { + stateRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-dist-')); + cwdSlug = 'distill-fixture'; + fixtureCwd = path.join(stateRoot, cwdSlug); + fs.mkdirSync(fixtureCwd, { recursive: true }); +}); + +afterEach(() => { + fs.rmSync(stateRoot, { recursive: true, force: true }); +}); + +function makeEnv(extra: Record = {}): Record { + const env: Record = {}; + for (const [k, v] of Object.entries(process.env)) { + if (v !== undefined) env[k] = v; + } + env.GSTACK_STATE_ROOT = stateRoot; + env.GSTACK_QUESTION_LOG_NO_DERIVE = '1'; + delete env.GSTACK_HOME; + return { ...env, ...extra }; +} + +function run(args: string[]): { stdout: string; stderr: string; status: number } { + const res = spawnSync(BIN, args, { + env: makeEnv(), + encoding: 'utf-8', + cwd: fixtureCwd, + }); + return { + stdout: res.stdout ?? '', + stderr: res.stderr ?? '', + status: res.status ?? -1, + }; +} + +function writeAuqOtherEvent(text: string): void { + spawnSync( + QLOG_BIN, + [ + JSON.stringify({ + skill: 'plan-tune', + question_id: 'hook-distill00', + question_summary: 'Test question for distillation', + options_count: 2, + user_choice: 'Other', + source: 'auq-other', + free_text: text, + session_id: 's-distill', + tool_use_id: 'tu-distill-' + Math.random().toString(36).slice(2, 8), + }), + ], + { + env: makeEnv(), + cwd: fixtureCwd, + encoding: 'utf-8', + }, + ); +} + +function writeCostLogEntry(slug: string, dateIso: string): void { + fs.mkdirSync(stateRoot, { recursive: true }); + fs.appendFileSync( + path.join(stateRoot, 'distill-cost.jsonl'), + JSON.stringify({ ts: dateIso, slug, proposals_count: 0, cost_usd_est: 0 }) + '\n', + ); +} + +// ---------------------------------------------------------------------- +// Status subcommand +// ---------------------------------------------------------------------- + +describe('--status', () => { + test('reports "no runs yet" when cost log absent', () => { + const r = run(['--status']); + expect(r.status).toBe(0); + expect(r.stdout).toMatch(/no distill runs/); + }); + + test('reports counts when prior runs exist', () => { + writeCostLogEntry(cwdSlug, new Date().toISOString()); + writeCostLogEntry(cwdSlug, new Date().toISOString()); + const r = run(['--status']); + expect(r.status).toBe(0); + expect(r.stdout).toContain('RUNS: 2'); + expect(r.stdout).toContain('TODAY: 2 / 3'); + }); +}); + +// ---------------------------------------------------------------------- +// Rate cap (D7) +// ---------------------------------------------------------------------- + +describe('rate cap (3/day per slug)', () => { + test('exits with RATE_CAPPED when 3 runs already logged today', () => { + const today = new Date().toISOString(); + writeCostLogEntry(cwdSlug, today); + writeCostLogEntry(cwdSlug, today); + writeCostLogEntry(cwdSlug, today); + const r = run([]); + expect(r.status).toBe(0); + expect(r.stdout).toMatch(/RATE_CAPPED/); + }); + + test('yesterday runs do not count against today cap', () => { + const today = new Date().toISOString(); + const yesterday = new Date(Date.now() - 25 * 60 * 60 * 1000).toISOString(); + writeCostLogEntry(cwdSlug, yesterday); + writeCostLogEntry(cwdSlug, yesterday); + writeCostLogEntry(cwdSlug, yesterday); + writeCostLogEntry(cwdSlug, today); + const r = run([]); + // Not capped — proceeds past the cap check; will hit NO_LOG next. + expect(r.status).toBe(0); + expect(r.stdout).not.toMatch(/RATE_CAPPED/); + }); + + test('other slugs in cost log do not count against this slug', () => { + const today = new Date().toISOString(); + writeCostLogEntry('other-slug', today); + writeCostLogEntry('other-slug', today); + writeCostLogEntry('other-slug', today); + const r = run([]); + expect(r.stdout).not.toMatch(/RATE_CAPPED/); + }); +}); + +// ---------------------------------------------------------------------- +// No events / no log +// ---------------------------------------------------------------------- + +describe('no-event paths', () => { + test('exits NO_LOG when question-log.jsonl missing', () => { + const r = run([]); + expect(r.status).toBe(0); + expect(r.stdout).toMatch(/NO_LOG/); + }); + + test('exits NO_FREE_TEXT when log has events but none are auq-other', () => { + spawnSync( + QLOG_BIN, + [ + JSON.stringify({ + skill: 'plan-tune', + question_id: 'hook-other00', + question_summary: 'Q', + options_count: 2, + user_choice: 'A', + source: 'hook', + session_id: 's', + tool_use_id: 'tu-x', + }), + ], + { env: makeEnv(), cwd: fixtureCwd, encoding: 'utf-8' }, + ); + const r = run([]); + expect(r.status).toBe(0); + expect(r.stdout).toMatch(/NO_FREE_TEXT/); + }); +}); + +// ---------------------------------------------------------------------- +// Dry-run +// ---------------------------------------------------------------------- + +describe('--dry-run', () => { + test('emits the distill prompt + events JSON without calling API', () => { + writeAuqOtherEvent('I always include tests with new features'); + writeAuqOtherEvent('Skip design review for typo fixes'); + // Strip ANTHROPIC_API_KEY to prove no API call happens. + const env = makeEnv(); + delete env.ANTHROPIC_API_KEY; + const res = spawnSync(BIN, ['--dry-run'], { env, cwd: fixtureCwd, encoding: 'utf-8' }); + expect(res.status).toBe(0); + expect(res.stdout).toContain('DISTILL PROMPT'); + expect(res.stdout).toContain('always include tests'); + }); +}); + +// ---------------------------------------------------------------------- +// API key required +// ---------------------------------------------------------------------- + +describe('API auth', () => { + test('fails loud when ANTHROPIC_API_KEY missing on sync run', () => { + writeAuqOtherEvent('Some free text response that needs distilling'); + const env = makeEnv(); + delete env.ANTHROPIC_API_KEY; + const res = spawnSync(BIN, [], { env, cwd: fixtureCwd, encoding: 'utf-8' }); + expect(res.status).not.toBe(0); + expect(res.stderr).toMatch(/ANTHROPIC_API_KEY/); + expect(res.stderr).toMatch(/separate billing/); + }); +}); + +// ---------------------------------------------------------------------- +// Background spawn +// ---------------------------------------------------------------------- + +describe('--background', () => { + test('detaches and exits with DISTILL_SPAWNED', () => { + const r = run(['--background']); + expect(r.status).toBe(0); + expect(r.stdout).toMatch(/DISTILL_SPAWNED: pid=\d+/); + }); +});