MicroFish/scripts/ci/tests/test_i18n_cjk_guard.py

359 lines
13 KiB
Python

"""Unit and integration tests for ``scripts/ci/i18n_cjk_guard.py``.
Stdlib-only tests using ``unittest``. Run from the repository root with::
python -m unittest scripts/ci/tests/test_i18n_cjk_guard.py
or as a script::
python scripts/ci/tests/test_i18n_cjk_guard.py
"""
from __future__ import annotations
import json
import os
import subprocess
import sys
import tempfile
import unittest
from pathlib import Path
_HERE = Path(__file__).resolve().parent
_GUARD_DIR = _HERE.parent
sys.path.insert(0, str(_GUARD_DIR))
import i18n_cjk_guard as guard # noqa: E402
def _git(repo: Path, *args: str) -> subprocess.CompletedProcess[str]:
"""Run a git command in ``repo`` and return the completed process."""
return subprocess.run(
["git", *args],
cwd=repo,
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
def _make_repo(tmp: Path) -> Path:
"""Initialize an isolated git repository at ``tmp`` and return the path."""
_git(tmp, "init", "-q", "-b", "main")
_git(tmp, "config", "user.email", "test@example.com")
_git(tmp, "config", "user.name", "Test")
return tmp
def _commit_file(repo: Path, rel: str, content: str | bytes) -> None:
"""Write a file under ``repo`` and commit it."""
target = repo / rel
target.parent.mkdir(parents=True, exist_ok=True)
if isinstance(content, str):
target.write_text(content, encoding="utf-8")
else:
target.write_bytes(content)
_git(repo, "add", "--", rel)
_git(repo, "commit", "-q", "-m", f"add {rel}")
class ScanLocaleCjkTests(unittest.TestCase):
"""``scan_locale_cjk`` returns one ``LocaleFinding`` per CJK leaf string."""
def test_clean_catalogue_returns_empty_list(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
en_path = Path(tmp) / "en.json"
en_path.write_text(
json.dumps(
{"common": {"confirm": "Confirm", "cancel": "Cancel"}},
indent=2,
),
encoding="utf-8",
)
self.assertEqual(guard.scan_locale_cjk(en_path), [])
def test_planted_cjk_returns_one_finding(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
en_path = Path(tmp) / "en.json"
data = {
"common": {
"confirm": "Confirm",
"cancel": "取消",
}
}
en_path.write_text(
json.dumps(data, indent=2, ensure_ascii=False),
encoding="utf-8",
)
findings = guard.scan_locale_cjk(en_path)
self.assertEqual(len(findings), 1)
key, line_no, snippet = findings[0]
self.assertEqual(key, "common.cancel")
self.assertGreaterEqual(line_no, 1)
self.assertIn("取消", snippet)
def test_long_value_is_truncated(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
en_path = Path(tmp) / "en.json"
value = "前置" + ("x" * 200)
en_path.write_text(
json.dumps({"k": value}, ensure_ascii=False),
encoding="utf-8",
)
findings = guard.scan_locale_cjk(en_path)
self.assertEqual(len(findings), 1)
self.assertLessEqual(len(findings[0][2]), guard.SNIPPET_MAX_LEN)
class CountPathCjkTests(unittest.TestCase):
"""``count_path_cjk`` shells out to ``git grep -nIP``."""
def test_returns_zero_for_empty_match(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
repo = _make_repo(Path(tmp))
_commit_file(repo, "src/a.txt", "hello world\n")
self.assertEqual(guard.count_path_cjk(repo, "src"), 0)
def test_counts_planted_cjk_lines(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
repo = _make_repo(Path(tmp))
_commit_file(
repo,
"src/a.py",
"# 一\nprint('hi')\n# 二三\nx = ''\n",
)
# Three lines contain CJK: # 一 ; # 二三 ; x = '四'.
self.assertEqual(guard.count_path_cjk(repo, "src"), 3)
def test_skips_binary_files(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
repo = _make_repo(Path(tmp))
# A "binary" blob containing CJK bytes; -I should exclude it.
_commit_file(
repo,
"src/blob.bin",
b"\x00\x01\x02\xe4\xb8\x80\x00\xff",
)
self.assertEqual(guard.count_path_cjk(repo, "src"), 0)
def test_skips_untracked_files(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
repo = _make_repo(Path(tmp))
_commit_file(repo, "src/.gitkeep", "")
(repo / "src" / "untracked.py").write_text(
"x = ''\n", encoding="utf-8"
)
self.assertEqual(guard.count_path_cjk(repo, "src"), 0)
class BaselineRoundTripTests(unittest.TestCase):
"""``read_baseline`` and ``write_baseline`` round-trip cleanly."""
def test_round_trip(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "baseline.txt"
counts = {"backend/app": 2792, "frontend/src": 902}
guard.write_baseline(path, counts)
self.assertTrue(path.read_text().endswith("\n"))
self.assertEqual(guard.read_baseline(path), counts)
def test_sorted_lexicographically_and_single_trailing_newline(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "baseline.txt"
guard.write_baseline(path, {"frontend/src": 1, "backend/app": 2})
text = path.read_text(encoding="utf-8")
data_lines = [
line for line in text.splitlines() if not line.startswith("#")
]
self.assertEqual(
data_lines,
["backend/app\t2", "frontend/src\t1"],
)
self.assertTrue(text.endswith("\n"))
self.assertFalse(text.endswith("\n\n"))
def test_missing_file_raises_baseline_error(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "missing.txt"
with self.assertRaises(guard.BaselineError):
guard.read_baseline(path)
def test_malformed_line_raises_baseline_error(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "baseline.txt"
path.write_text(
"# header\nbackend/app 100\n", encoding="utf-8"
)
with self.assertRaises(guard.BaselineError):
guard.read_baseline(path)
class RunCheckEndToEndTests(unittest.TestCase):
"""End-to-end test of ``run_check`` against a synthetic repo."""
def _make_full_repo(
self,
tmp: Path,
*,
en_json: dict,
backend_lines: int,
frontend_lines: int,
) -> tuple[Path, Path]:
repo = _make_repo(tmp)
_commit_file(
repo,
"locales/en.json",
json.dumps(en_json, indent=2, ensure_ascii=False),
)
if backend_lines:
content = "\n".join(f"# 中{i}" for i in range(backend_lines)) + "\n"
_commit_file(repo, "backend/app/x.py", content)
else:
_commit_file(repo, "backend/app/.gitkeep", "")
if frontend_lines:
content = "\n".join(f"// 中{i}" for i in range(frontend_lines)) + "\n"
_commit_file(repo, "frontend/src/x.js", content)
else:
_commit_file(repo, "frontend/src/.gitkeep", "")
baseline_path = repo / "baseline.txt"
return repo, baseline_path
def test_pass_within_baseline(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
repo, baseline_path = self._make_full_repo(
Path(tmp),
en_json={"k": "Confirm"},
backend_lines=3,
frontend_lines=2,
)
guard.write_baseline(
baseline_path,
{"backend/app": 5, "frontend/src": 5},
)
rc = guard.run_check(repo, baseline_path)
self.assertEqual(rc, 0)
def test_fail_on_locale_cjk(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
repo, baseline_path = self._make_full_repo(
Path(tmp),
en_json={"k": "中文"},
backend_lines=0,
frontend_lines=0,
)
guard.write_baseline(
baseline_path,
{"backend/app": 0, "frontend/src": 0},
)
rc = guard.run_check(repo, baseline_path)
self.assertEqual(rc, 1)
def test_fail_on_regression_with_refresh_hint(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
repo, baseline_path = self._make_full_repo(
Path(tmp),
en_json={"k": "Confirm"},
backend_lines=10,
frontend_lines=0,
)
guard.write_baseline(
baseline_path,
{"backend/app": 5, "frontend/src": 0},
)
# Capture stderr.
from io import StringIO
captured_err = StringIO()
old_err = sys.stderr
sys.stderr = captured_err
try:
rc = guard.run_check(repo, baseline_path)
finally:
sys.stderr = old_err
self.assertEqual(rc, 1)
err_text = captured_err.getvalue()
self.assertIn("cjk-regression", err_text)
self.assertIn(
"python scripts/ci/i18n_cjk_guard.py --update-baseline",
err_text,
)
def test_missing_en_json_fails(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
repo = _make_repo(Path(tmp))
_commit_file(repo, "backend/app/.gitkeep", "")
_commit_file(repo, "frontend/src/.gitkeep", "")
baseline_path = repo / "baseline.txt"
guard.write_baseline(
baseline_path,
{"backend/app": 0, "frontend/src": 0},
)
rc = guard.run_check(repo, baseline_path)
self.assertEqual(rc, 1)
def test_missing_baseline_fails(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
repo, baseline_path = self._make_full_repo(
Path(tmp),
en_json={"k": "Confirm"},
backend_lines=0,
frontend_lines=0,
)
# Do not write the baseline.
self.assertFalse(baseline_path.exists())
rc = guard.run_check(repo, baseline_path)
self.assertEqual(rc, 1)
class UpdateBaselineTests(unittest.TestCase):
"""``update_baseline`` writes current counts and exits 0."""
def test_update_then_check_passes(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
repo = _make_repo(Path(tmp))
_commit_file(
repo,
"locales/en.json",
json.dumps({"k": "Confirm"}, indent=2),
)
_commit_file(repo, "backend/app/x.py", "# 一\n# 二\n")
_commit_file(repo, "frontend/src/.gitkeep", "")
baseline_path = repo / "baseline.txt"
self.assertEqual(
guard.update_baseline(repo, baseline_path), 0
)
counts = guard.read_baseline(baseline_path)
self.assertEqual(counts["backend/app"], 2)
self.assertEqual(counts["frontend/src"], 0)
self.assertEqual(guard.run_check(repo, baseline_path), 0)
class CliSmokeTests(unittest.TestCase):
"""``main`` exposes the documented CLI surface."""
def test_help_flag_exits_zero(self) -> None:
guard_script = _GUARD_DIR / "i18n_cjk_guard.py"
proc = subprocess.run(
[sys.executable, str(guard_script), "--help"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
self.assertEqual(proc.returncode, 0)
for flag in ("--update-baseline", "--baseline", "--repo-root"):
self.assertIn(flag, proc.stdout)
def test_unknown_flag_exits_nonzero(self) -> None:
guard_script = _GUARD_DIR / "i18n_cjk_guard.py"
proc = subprocess.run(
[sys.executable, str(guard_script), "--no-such-flag"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
self.assertNotEqual(proc.returncode, 0)
if __name__ == "__main__":
unittest.main()