From 73fa0be2f5b6e9b466fe6f08da0418958e44e83f Mon Sep 17 00:00:00 2001 From: Jayesh Betala Date: Thu, 28 May 2026 12:07:21 +0530 Subject: [PATCH] fix(jsonl-merge): make equal-ts resolution converge across machines The JSONL append merge driver sorted timestamped entries by (0, ts) with no further tiebreaker. Equal-ts entries then fell back to stable-sort insertion order (base, ours, theirs), but git assigns the local side to "ours", so two machines resolving the same conflict emitted equal-ts lines in opposite order. The merged files diverged and never converged. gstack-telemetry-log uses second-granularity timestamps, so same-ts collisions are routine. Add the line content as the final sort tiebreaker so the order is total and side-independent. Add a regression test that runs the driver with the two sides swapped and asserts identical output. --- bin/gstack-jsonl-merge | 13 ++++-- test/jsonl-merge.test.ts | 96 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 3 deletions(-) create mode 100644 test/jsonl-merge.test.ts diff --git a/bin/gstack-jsonl-merge b/bin/gstack-jsonl-merge index c777612ac..d2fa5744c 100755 --- a/bin/gstack-jsonl-merge +++ b/bin/gstack-jsonl-merge @@ -53,18 +53,25 @@ for path in paths: continue if line in seen: continue - # Prefer ISO ts field for sort; fall back to SHA-256. + # Prefer ISO ts field for sort; fall back to SHA-256. The line + # content is the final tiebreaker so the order is total: two + # entries sharing a ts must resolve identically regardless of + # which side they arrive on. Without it, equal-ts entries fall + # back to insertion order (base, ours, theirs), and since ours + # and theirs are swapped depending on which machine runs the + # merge, the two sides produce divergent files that never + # converge. sort_key = None try: obj = json.loads(line) ts = obj.get('ts') or obj.get('timestamp') if isinstance(ts, str): - sort_key = (0, ts) + sort_key = (0, ts, line) except (json.JSONDecodeError, ValueError, TypeError): pass if sort_key is None: h = hashlib.sha256(line.encode('utf-8')).hexdigest() - sort_key = (1, h) + sort_key = (1, h, line) seen[line] = sort_key except FileNotFoundError: # Absent base / absent ours / absent theirs are all valid. diff --git a/test/jsonl-merge.test.ts b/test/jsonl-merge.test.ts new file mode 100644 index 000000000..20bb7d877 --- /dev/null +++ b/test/jsonl-merge.test.ts @@ -0,0 +1,96 @@ +import { describe, test, expect, beforeEach, afterEach } from 'bun:test'; +import { execFileSync } from 'child_process'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; + +const ROOT = path.resolve(import.meta.dir, '..'); +const DRIVER = path.join(ROOT, 'bin', 'gstack-jsonl-merge'); + +let tmpDir: string; + +beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-jsonl-merge-')); +}); + +afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +/** + * Run the merge driver the way git does: `driver `. + * The driver writes the merged result back to the file. Returns that + * file's content. `base`/`ours`/`theirs` are arrays of JSONL lines (the file + * is created from them); pass `null` to omit a file entirely (git passes an + * absent path for an added file, which the driver must tolerate). + */ +function runMerge( + base: string[] | null, + ours: string[] | null, + theirs: string[] | null, +): string { + const write = (name: string, lines: string[] | null): string => { + const p = path.join(tmpDir, name); + if (lines === null) return path.join(tmpDir, `${name}.absent`); + fs.writeFileSync(p, lines.length ? lines.join('\n') + '\n' : ''); + return p; + }; + const basePath = write('base', base); + const oursPath = write('ours', ours); + const theirsPath = write('theirs', theirs); + execFileSync(DRIVER, [basePath, oursPath, theirsPath], { + encoding: 'utf-8', + timeout: 15000, + }); + return fs.readFileSync(oursPath, 'utf-8'); +} + +describe('gstack-jsonl-merge', () => { + test('equal-ts entries resolve identically regardless of side (convergence)', () => { + // Two machines append a different event in the same second, then each + // merges the other's push. Machine A sees its own line as "ours"; machine + // B sees the same line as "theirs". The merge must produce the same file + // on both, or the repos diverge and never reconcile. + const a = '{"ts":"2026-05-28T10:00:00Z","event":"a"}'; + const b = '{"ts":"2026-05-28T10:00:00Z","event":"b"}'; + + const machineA = runMerge([], [a], [b]); // a = ours, b = theirs + const machineB = runMerge([], [b], [a]); // b = ours, a = theirs + + expect(machineA).toBe(machineB); + // Both lines survive. + expect(machineA).toContain('"event":"a"'); + expect(machineA).toContain('"event":"b"'); + }); + + test('non-timestamped lines also resolve identically regardless of side', () => { + const a = '{"event":"a"}'; // no ts -> hash-ordered + const b = '{"event":"b"}'; + expect(runMerge([], [a], [b])).toBe(runMerge([], [b], [a])); + }); + + test('plain (non-JSON) lines resolve identically regardless of side', () => { + expect(runMerge([], ['zebra'], ['apple'])).toBe( + runMerge([], ['apple'], ['zebra']), + ); + }); + + test('exact-duplicate lines are deduped', () => { + const line = '{"ts":"2026-05-28T10:00:00Z","event":"a"}'; + const out = runMerge([line], [line], [line]); + expect(out.trimEnd().split('\n')).toEqual([line]); + }); + + test('timestamped entries sort ascending by ts', () => { + const early = '{"ts":"2026-05-28T09:00:00Z","event":"early"}'; + const late = '{"ts":"2026-05-28T11:00:00Z","event":"late"}'; + const out = runMerge([], [late], [early]).trimEnd().split('\n'); + expect(out).toEqual([early, late]); + }); + + test('absent ours/theirs files are tolerated (added-file merge)', () => { + const a = '{"ts":"2026-05-28T10:00:00Z","event":"a"}'; + const out = runMerge(null, [a], null); + expect(out.trimEnd()).toBe(a); + }); +});