fix(jsonl-merge): make equal-ts resolution converge across machines

The JSONL append merge driver sorted timestamped entries by (0, ts) with no
further tiebreaker. Equal-ts entries then fell back to stable-sort insertion
order (base, ours, theirs), but git assigns the local side to "ours", so two
machines resolving the same conflict emitted equal-ts lines in opposite order.
The merged files diverged and never converged. gstack-telemetry-log uses
second-granularity timestamps, so same-ts collisions are routine.

Add the line content as the final sort tiebreaker so the order is total and
side-independent. Add a regression test that runs the driver with the two
sides swapped and asserts identical output.
This commit is contained in:
Jayesh Betala 2026-05-28 12:07:21 +05:30 committed by Garry Tan
parent 62024d114c
commit 73fa0be2f5
No known key found for this signature in database
GPG Key ID: C1F69E85C74EFE1D
2 changed files with 106 additions and 3 deletions

View File

@ -53,18 +53,25 @@ for path in paths:
continue
if line in seen:
continue
# Prefer ISO ts field for sort; fall back to SHA-256.
# Prefer ISO ts field for sort; fall back to SHA-256. The line
# content is the final tiebreaker so the order is total: two
# entries sharing a ts must resolve identically regardless of
# which side they arrive on. Without it, equal-ts entries fall
# back to insertion order (base, ours, theirs), and since ours
# and theirs are swapped depending on which machine runs the
# merge, the two sides produce divergent files that never
# converge.
sort_key = None
try:
obj = json.loads(line)
ts = obj.get('ts') or obj.get('timestamp')
if isinstance(ts, str):
sort_key = (0, ts)
sort_key = (0, ts, line)
except (json.JSONDecodeError, ValueError, TypeError):
pass
if sort_key is None:
h = hashlib.sha256(line.encode('utf-8')).hexdigest()
sort_key = (1, h)
sort_key = (1, h, line)
seen[line] = sort_key
except FileNotFoundError:
# Absent base / absent ours / absent theirs are all valid.

96
test/jsonl-merge.test.ts Normal file
View File

@ -0,0 +1,96 @@
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
import { execFileSync } from 'child_process';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
const ROOT = path.resolve(import.meta.dir, '..');
const DRIVER = path.join(ROOT, 'bin', 'gstack-jsonl-merge');
let tmpDir: string;
beforeEach(() => {
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-jsonl-merge-'));
});
afterEach(() => {
fs.rmSync(tmpDir, { recursive: true, force: true });
});
/**
* Run the merge driver the way git does: `driver <base> <ours> <theirs>`.
* The driver writes the merged result back to the <ours> file. Returns that
* file's content. `base`/`ours`/`theirs` are arrays of JSONL lines (the file
* is created from them); pass `null` to omit a file entirely (git passes an
* absent path for an added file, which the driver must tolerate).
*/
function runMerge(
base: string[] | null,
ours: string[] | null,
theirs: string[] | null,
): string {
const write = (name: string, lines: string[] | null): string => {
const p = path.join(tmpDir, name);
if (lines === null) return path.join(tmpDir, `${name}.absent`);
fs.writeFileSync(p, lines.length ? lines.join('\n') + '\n' : '');
return p;
};
const basePath = write('base', base);
const oursPath = write('ours', ours);
const theirsPath = write('theirs', theirs);
execFileSync(DRIVER, [basePath, oursPath, theirsPath], {
encoding: 'utf-8',
timeout: 15000,
});
return fs.readFileSync(oursPath, 'utf-8');
}
describe('gstack-jsonl-merge', () => {
test('equal-ts entries resolve identically regardless of side (convergence)', () => {
// Two machines append a different event in the same second, then each
// merges the other's push. Machine A sees its own line as "ours"; machine
// B sees the same line as "theirs". The merge must produce the same file
// on both, or the repos diverge and never reconcile.
const a = '{"ts":"2026-05-28T10:00:00Z","event":"a"}';
const b = '{"ts":"2026-05-28T10:00:00Z","event":"b"}';
const machineA = runMerge([], [a], [b]); // a = ours, b = theirs
const machineB = runMerge([], [b], [a]); // b = ours, a = theirs
expect(machineA).toBe(machineB);
// Both lines survive.
expect(machineA).toContain('"event":"a"');
expect(machineA).toContain('"event":"b"');
});
test('non-timestamped lines also resolve identically regardless of side', () => {
const a = '{"event":"a"}'; // no ts -> hash-ordered
const b = '{"event":"b"}';
expect(runMerge([], [a], [b])).toBe(runMerge([], [b], [a]));
});
test('plain (non-JSON) lines resolve identically regardless of side', () => {
expect(runMerge([], ['zebra'], ['apple'])).toBe(
runMerge([], ['apple'], ['zebra']),
);
});
test('exact-duplicate lines are deduped', () => {
const line = '{"ts":"2026-05-28T10:00:00Z","event":"a"}';
const out = runMerge([line], [line], [line]);
expect(out.trimEnd().split('\n')).toEqual([line]);
});
test('timestamped entries sort ascending by ts', () => {
const early = '{"ts":"2026-05-28T09:00:00Z","event":"early"}';
const late = '{"ts":"2026-05-28T11:00:00Z","event":"late"}';
const out = runMerge([], [late], [early]).trimEnd().split('\n');
expect(out).toEqual([early, late]);
});
test('absent ours/theirs files are tolerated (added-file merge)', () => {
const a = '{"ts":"2026-05-28T10:00:00Z","event":"a"}';
const out = runMerge(null, [a], null);
expect(out.trimEnd()).toBe(a);
});
});