gstack/lib/gstack-decision-semantic.ts

94 lines
3.9 KiB
TypeScript

/**
* gstack-decision-semantic — OPTIONAL gbrain enhancement for decision resurfacing.
*
* This is the ONLY decision module that touches gbrain. The reliable core
* (lib/gstack-decision.ts) has zero gbrain imports and works with gbrain OFF; this
* module is loaded lazily by `gstack-decision-search` only on `--semantic`, and every
* path degrades to `null` (caller shows the reliable file results) when gbrain is
* absent, unconfigured, times out, or returns nothing. It NEVER throws and NEVER
* hangs (10s spawn timeout). We do not wire core function to this — gbrain is an
* enhancement, never a dependency (the code-search lesson).
*
* Surface reality (verified against gbrain 0.42.x, not guessed):
* - `gbrain search "<q>"` prints TEXT lines `[score] slug -- snippet`, NOT JSON
* (so we parse the text surface; execGbrainJson would always null here).
* - The curated-memory source is the one whose local_path is the gstack brain
* worktree (`~/.gstack-brain-worktree`), id `default` by convention — NOT a
* `gstack-brain-<user>` id. Scoping search to it keeps code/doc corpora out.
*/
import { spawnGbrain } from "./gbrain-exec";
import { parseSourcesList } from "./gbrain-sources";
const TIMEOUT_MS = 10_000;
const BRAIN_WORKTREE_SUFFIX = ".gstack-brain-worktree";
export interface SemanticHit {
score: number;
slug: string;
snippet: string;
}
/**
* Resolve the curated-memory source id (the gstack brain worktree). Returns null
* when gbrain is down/unparseable OR no worktree-backed source is registered — the
* caller then searches unscoped (best-effort) rather than failing.
*/
export function resolveMemorySourceId(env?: NodeJS.ProcessEnv): string | null {
const r = spawnGbrain(["sources", "list", "--json"], { baseEnv: env, timeout: TIMEOUT_MS });
if (r.status !== 0) return null;
let rows;
try {
rows = parseSourcesList(JSON.parse(r.stdout || "null"));
} catch {
return null;
}
const atWorktree = rows.filter(
(s) => typeof s.local_path === "string" && s.local_path.endsWith(BRAIN_WORKTREE_SUFFIX),
);
const pick = atWorktree.find((s) => s.id === "default") ?? atWorktree[0];
return pick?.id ?? null;
}
/**
* Parse gbrain search's text output into scored hits. Lines look like:
* `[0.4361] slug -- snippet text...`
* Non-matching lines (banners, blanks) are skipped. Exported for deterministic
* unit testing of the parser without a live gbrain.
*/
export function parseSearchHits(stdout: string, minScore: number, limit: number): SemanticHit[] {
const hits: SemanticHit[] = [];
for (const line of stdout.split("\n")) {
const m = line.match(/^\[([\d.]+)\]\s+(\S+)\s+--\s+(.*)$/);
if (!m) continue;
const score = parseFloat(m[1]);
if (!Number.isFinite(score) || score < minScore) continue;
hits.push({ score, slug: m[2], snippet: m[3].trim() });
}
return hits.slice(0, limit);
}
/**
* Semantic recall over the curated-memory source. Returns parsed hits, or `null`
* when gbrain is unavailable / errors (caller MUST degrade to the reliable file
* results on null). An empty array means gbrain ran but found nothing relevant
* (e.g. memory not synced yet) — also honest, distinct from null. Never throws,
* never hangs.
*/
export function semanticRecall(
query: string,
env?: NodeJS.ProcessEnv,
minScore = 0.1,
limit = 3,
): SemanticHit[] | null {
if (!query.trim()) return null;
// Require the curated-memory source. If it's absent (gbrain down OR no worktree-backed
// source), degrade to null rather than searching UNSCOPED — an unscoped search pulls
// code/doc corpora that would be mislabeled as "related decisions" (Codex finding).
const sourceId = resolveMemorySourceId(env);
if (!sourceId) return null;
const r = spawnGbrain(["search", query, "--source", sourceId], { baseEnv: env, timeout: TIMEOUT_MS });
if (r.status !== 0) return null; // gbrain down / not on PATH / errored → degrade
return parseSearchHits(r.stdout || "", minScore, limit);
}