mirror of https://github.com/garrytan/gstack.git
fix(gbrain-sync): cut source-id slugs on hyphen boundaries (+ #1357)
Cherry-picked from #1481 by drummerms and extended with the explicit HTTPS-remote regression case for #1357 (decision D2=A). `constrainSourceId` truncated the slug with `slug.slice(-tailBudget)`, which cut mid-word when the boundary fell inside a token. For a repo where the combined `prefix-org-repo-pathhash` exceeded 32 chars, this produced embarrassing artifacts like `gstack-code-kill-270c0001-c32152` (from `drummerms-av-sow-wiz-skill-270c0001`). Two changes carried from #1481, adapted for the #1468 hostpathhash: 1. `constrainSourceId` now walks hyphen-separated tokens from the right, accumulating whole tokens until adding the next would exceed `tailBudget`. When no token fits, falls through to the existing `${prefix}-${hash}` form. 2. `deriveCodeSourceId` now retries with `repo-only-hostpathhash` (dropping the org segment) when the full `org-repo-hostpathhash` triggers truncation. Keeps the repo name readable when it fits at all. Plus a new test asserting the source id is period-free for the exact HTTPS-with-.git remote shape from #1357 (`https://github.com/foo/bar.git`). canonicalizeRemote strips `.git`; the sanitizer strips any residual non-alnum. The test closes #1357 by pinning the property. Closes #1357 Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
cdd3a2f77a
commit
cfd2b5d792
|
|
@ -186,7 +186,14 @@ function deriveCodeSourceId(repoPath: string): string {
|
|||
if (remote) {
|
||||
const segs = remote.split("/").filter(Boolean);
|
||||
const slugSource = segs.slice(-2).join("-");
|
||||
return constrainSourceId("gstack-code", `${slugSource}-${hostPathHash}`);
|
||||
const fullId = constrainSourceId("gstack-code", `${slugSource}-${hostPathHash}`);
|
||||
// If the org+repo+hostpathhash fits cleanly (suffix preserved), use it.
|
||||
if (fullId.endsWith(`-${hostPathHash}`)) return fullId;
|
||||
// Otherwise drop the org prefix and retry with just repo+hostpathhash so
|
||||
// the repo name stays readable. If that still doesn't fit,
|
||||
// constrainSourceId falls back to a deterministic hash-only form.
|
||||
const repoOnly = segs[segs.length - 1] || "repo";
|
||||
return constrainSourceId("gstack-code", `${repoOnly}-${hostPathHash}`);
|
||||
}
|
||||
const base = repoPath.split("/").pop() || "repo";
|
||||
return constrainSourceId("gstack-code", `${base}-${hostPathHash}`);
|
||||
|
|
@ -383,6 +390,10 @@ export function removeOrphanedSource(oldId: string): boolean {
|
|||
* Build a gbrain-valid source id (1-32 lowercase alnum + interior hyphens). Sanitizes
|
||||
* `raw`, prefixes with `prefix`, and falls back to a hashed-tail form when total length
|
||||
* would exceed 32 chars.
|
||||
*
|
||||
* Truncation cuts on hyphen boundaries (whole-word units) from the right, never
|
||||
* mid-word. Inputs like "drummerms-av-sow-wiz-skill-270c0001" produce
|
||||
* "${prefix}-270c0001-<hash>", not "${prefix}-kill-270c0001-<hash>".
|
||||
*/
|
||||
function constrainSourceId(prefix: string, raw: string): string {
|
||||
const MAX = 32;
|
||||
|
|
@ -401,7 +412,20 @@ function constrainSourceId(prefix: string, raw: string): string {
|
|||
// Total budget: prefix + "-" + tail + "-" + hash
|
||||
const tailBudget = MAX - prefix.length - 2 - hash.length;
|
||||
if (tailBudget < 1) return `${prefix}-${hash}`;
|
||||
const tail = slug.slice(-tailBudget).replace(/^-+|-+$/g, "");
|
||||
// Cut on hyphen boundaries instead of mid-word. Walk tokens from the right,
|
||||
// accumulating until adding the next token would exceed tailBudget. This
|
||||
// preserves readable suffixes (pathhash, repo name) and avoids embarrassing
|
||||
// mid-word artifacts like "skill" → "kill".
|
||||
const tokens = slug.split("-").filter(Boolean);
|
||||
const kept: string[] = [];
|
||||
let len = 0;
|
||||
for (let i = tokens.length - 1; i >= 0; i--) {
|
||||
const add = kept.length === 0 ? tokens[i].length : tokens[i].length + 1;
|
||||
if (len + add > tailBudget) break;
|
||||
kept.unshift(tokens[i]);
|
||||
len += add;
|
||||
}
|
||||
const tail = kept.join("-");
|
||||
return tail ? `${prefix}-${tail}-${hash}` : `${prefix}-${hash}`;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -728,6 +728,74 @@ describe("planHostnameFoldMigration", () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe("constrainSourceId truncation (hyphen-boundary cut)", () => {
|
||||
// PR #1481 (Drummerms): the old slug.slice(-tailBudget) cut mid-word when
|
||||
// the boundary fell inside a token. For a long repo like
|
||||
// `drummerms-av-sow-wiz-skill-270c0001` the truncated tail used to end in
|
||||
// `kill-270c0001` (from `skill`). The new tokenized cut walks hyphen
|
||||
// boundaries from the right and only keeps whole tokens.
|
||||
//
|
||||
// Exercised via the dry-run preview (`gbrain sources add gstack-code-…`),
|
||||
// since constrainSourceId is module-private.
|
||||
it("never produces mid-word truncation artifacts like `kill` (from `skill`)", () => {
|
||||
const home = makeTestHome();
|
||||
const gstackHome = join(home, ".gstack");
|
||||
mkdirSync(gstackHome, { recursive: true });
|
||||
const repo = mkdtempSync(join(tmpdir(), "gstack-hyphen-cut-"));
|
||||
spawnSync("git", ["init", "--quiet", "-b", "main"], { cwd: repo });
|
||||
// Remote chosen to be long enough that constrainSourceId truncates and
|
||||
// the boundary lands inside the word `skill`.
|
||||
spawnSync("git", ["remote", "add", "origin", "https://github.com/drummerms-av-sow-wiz/skill-270c0001.git"], { cwd: repo });
|
||||
|
||||
const r = spawnSync("bun", [SCRIPT, "--dry-run", "--code-only", "--quiet"], {
|
||||
encoding: "utf-8",
|
||||
timeout: 60000,
|
||||
cwd: repo,
|
||||
env: { ...process.env, HOME: home, GSTACK_HOME: gstackHome },
|
||||
});
|
||||
expect(r.status).toBe(0);
|
||||
const id = (r.stdout || "").match(/gbrain sources add (\S+)/)?.[1];
|
||||
expect(id).toBeTruthy();
|
||||
// The id must not contain the mid-word fragment `kill` (left over from
|
||||
// slicing inside `skill`). Tokens that survive truncation must be whole.
|
||||
expect(id).not.toMatch(/(^|-)kill(-|$)/);
|
||||
// Still gbrain-valid.
|
||||
expect(id!.length).toBeLessThanOrEqual(32);
|
||||
expect(id!).toMatch(/^[a-z0-9](?:[a-z0-9-]{0,30}[a-z0-9])?$/);
|
||||
|
||||
rmSync(repo, { recursive: true, force: true });
|
||||
rmSync(home, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
// Closes #1357: HTTPS remotes ending in `.git` used to pass periods through
|
||||
// to the source id. canonicalizeRemote strips the `.git` suffix; the
|
||||
// sanitizer also strips any residual non-alnum. Test asserts the source id
|
||||
// is period-free for the exact case from the issue.
|
||||
it("produces a period-free source id for HTTPS remotes ending in .git (#1357)", () => {
|
||||
const home = makeTestHome();
|
||||
const gstackHome = join(home, ".gstack");
|
||||
mkdirSync(gstackHome, { recursive: true });
|
||||
const repo = mkdtempSync(join(tmpdir(), "gstack-https-period-"));
|
||||
spawnSync("git", ["init", "--quiet", "-b", "main"], { cwd: repo });
|
||||
spawnSync("git", ["remote", "add", "origin", "https://github.com/foo/bar.git"], { cwd: repo });
|
||||
|
||||
const r = spawnSync("bun", [SCRIPT, "--dry-run", "--code-only", "--quiet"], {
|
||||
encoding: "utf-8",
|
||||
timeout: 60000,
|
||||
cwd: repo,
|
||||
env: { ...process.env, HOME: home, GSTACK_HOME: gstackHome },
|
||||
});
|
||||
expect(r.status).toBe(0);
|
||||
const id = (r.stdout || "").match(/gbrain sources add (\S+)/)?.[1];
|
||||
expect(id).toBeTruthy();
|
||||
expect(id).not.toContain(".");
|
||||
expect(id!).toMatch(/^[a-z0-9](?:[a-z0-9-]{0,30}[a-z0-9])?$/);
|
||||
|
||||
rmSync(repo, { recursive: true, force: true });
|
||||
rmSync(home, { recursive: true, force: true });
|
||||
});
|
||||
});
|
||||
|
||||
describe("sourceLocalPath", () => {
|
||||
let bindir: string;
|
||||
beforeEach(() => {
|
||||
|
|
|
|||
Loading…
Reference in New Issue