359 lines
13 KiB
Python
359 lines
13 KiB
Python
"""Unit and integration tests for ``scripts/ci/i18n_cjk_guard.py``.
|
|
|
|
Stdlib-only tests using ``unittest``. Run from the repository root with::
|
|
|
|
python -m unittest scripts/ci/tests/test_i18n_cjk_guard.py
|
|
|
|
or as a script::
|
|
|
|
python scripts/ci/tests/test_i18n_cjk_guard.py
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
import unittest
|
|
from pathlib import Path
|
|
|
|
_HERE = Path(__file__).resolve().parent
|
|
_GUARD_DIR = _HERE.parent
|
|
sys.path.insert(0, str(_GUARD_DIR))
|
|
|
|
import i18n_cjk_guard as guard # noqa: E402
|
|
|
|
|
|
def _git(repo: Path, *args: str) -> subprocess.CompletedProcess[str]:
|
|
"""Run a git command in ``repo`` and return the completed process."""
|
|
return subprocess.run(
|
|
["git", *args],
|
|
cwd=repo,
|
|
check=True,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
text=True,
|
|
)
|
|
|
|
|
|
def _make_repo(tmp: Path) -> Path:
|
|
"""Initialize an isolated git repository at ``tmp`` and return the path."""
|
|
_git(tmp, "init", "-q", "-b", "main")
|
|
_git(tmp, "config", "user.email", "test@example.com")
|
|
_git(tmp, "config", "user.name", "Test")
|
|
return tmp
|
|
|
|
|
|
def _commit_file(repo: Path, rel: str, content: str | bytes) -> None:
|
|
"""Write a file under ``repo`` and commit it."""
|
|
target = repo / rel
|
|
target.parent.mkdir(parents=True, exist_ok=True)
|
|
if isinstance(content, str):
|
|
target.write_text(content, encoding="utf-8")
|
|
else:
|
|
target.write_bytes(content)
|
|
_git(repo, "add", "--", rel)
|
|
_git(repo, "commit", "-q", "-m", f"add {rel}")
|
|
|
|
|
|
class ScanLocaleCjkTests(unittest.TestCase):
|
|
"""``scan_locale_cjk`` returns one ``LocaleFinding`` per CJK leaf string."""
|
|
|
|
def test_clean_catalogue_returns_empty_list(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
en_path = Path(tmp) / "en.json"
|
|
en_path.write_text(
|
|
json.dumps(
|
|
{"common": {"confirm": "Confirm", "cancel": "Cancel"}},
|
|
indent=2,
|
|
),
|
|
encoding="utf-8",
|
|
)
|
|
self.assertEqual(guard.scan_locale_cjk(en_path), [])
|
|
|
|
def test_planted_cjk_returns_one_finding(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
en_path = Path(tmp) / "en.json"
|
|
data = {
|
|
"common": {
|
|
"confirm": "Confirm",
|
|
"cancel": "取消",
|
|
}
|
|
}
|
|
en_path.write_text(
|
|
json.dumps(data, indent=2, ensure_ascii=False),
|
|
encoding="utf-8",
|
|
)
|
|
findings = guard.scan_locale_cjk(en_path)
|
|
self.assertEqual(len(findings), 1)
|
|
key, line_no, snippet = findings[0]
|
|
self.assertEqual(key, "common.cancel")
|
|
self.assertGreaterEqual(line_no, 1)
|
|
self.assertIn("取消", snippet)
|
|
|
|
def test_long_value_is_truncated(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
en_path = Path(tmp) / "en.json"
|
|
value = "前置" + ("x" * 200)
|
|
en_path.write_text(
|
|
json.dumps({"k": value}, ensure_ascii=False),
|
|
encoding="utf-8",
|
|
)
|
|
findings = guard.scan_locale_cjk(en_path)
|
|
self.assertEqual(len(findings), 1)
|
|
self.assertLessEqual(len(findings[0][2]), guard.SNIPPET_MAX_LEN)
|
|
|
|
|
|
class CountPathCjkTests(unittest.TestCase):
|
|
"""``count_path_cjk`` shells out to ``git grep -nIP``."""
|
|
|
|
def test_returns_zero_for_empty_match(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
repo = _make_repo(Path(tmp))
|
|
_commit_file(repo, "src/a.txt", "hello world\n")
|
|
self.assertEqual(guard.count_path_cjk(repo, "src"), 0)
|
|
|
|
def test_counts_planted_cjk_lines(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
repo = _make_repo(Path(tmp))
|
|
_commit_file(
|
|
repo,
|
|
"src/a.py",
|
|
"# 一\nprint('hi')\n# 二三\nx = '四'\n",
|
|
)
|
|
# Three lines contain CJK: # 一 ; # 二三 ; x = '四'.
|
|
self.assertEqual(guard.count_path_cjk(repo, "src"), 3)
|
|
|
|
def test_skips_binary_files(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
repo = _make_repo(Path(tmp))
|
|
# A "binary" blob containing CJK bytes; -I should exclude it.
|
|
_commit_file(
|
|
repo,
|
|
"src/blob.bin",
|
|
b"\x00\x01\x02\xe4\xb8\x80\x00\xff",
|
|
)
|
|
self.assertEqual(guard.count_path_cjk(repo, "src"), 0)
|
|
|
|
def test_skips_untracked_files(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
repo = _make_repo(Path(tmp))
|
|
_commit_file(repo, "src/.gitkeep", "")
|
|
(repo / "src" / "untracked.py").write_text(
|
|
"x = '中'\n", encoding="utf-8"
|
|
)
|
|
self.assertEqual(guard.count_path_cjk(repo, "src"), 0)
|
|
|
|
|
|
class BaselineRoundTripTests(unittest.TestCase):
|
|
"""``read_baseline`` and ``write_baseline`` round-trip cleanly."""
|
|
|
|
def test_round_trip(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "baseline.txt"
|
|
counts = {"backend/app": 2792, "frontend/src": 902}
|
|
guard.write_baseline(path, counts)
|
|
self.assertTrue(path.read_text().endswith("\n"))
|
|
self.assertEqual(guard.read_baseline(path), counts)
|
|
|
|
def test_sorted_lexicographically_and_single_trailing_newline(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "baseline.txt"
|
|
guard.write_baseline(path, {"frontend/src": 1, "backend/app": 2})
|
|
text = path.read_text(encoding="utf-8")
|
|
data_lines = [
|
|
line for line in text.splitlines() if not line.startswith("#")
|
|
]
|
|
self.assertEqual(
|
|
data_lines,
|
|
["backend/app\t2", "frontend/src\t1"],
|
|
)
|
|
self.assertTrue(text.endswith("\n"))
|
|
self.assertFalse(text.endswith("\n\n"))
|
|
|
|
def test_missing_file_raises_baseline_error(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "missing.txt"
|
|
with self.assertRaises(guard.BaselineError):
|
|
guard.read_baseline(path)
|
|
|
|
def test_malformed_line_raises_baseline_error(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "baseline.txt"
|
|
path.write_text(
|
|
"# header\nbackend/app 100\n", encoding="utf-8"
|
|
)
|
|
with self.assertRaises(guard.BaselineError):
|
|
guard.read_baseline(path)
|
|
|
|
|
|
class RunCheckEndToEndTests(unittest.TestCase):
|
|
"""End-to-end test of ``run_check`` against a synthetic repo."""
|
|
|
|
def _make_full_repo(
|
|
self,
|
|
tmp: Path,
|
|
*,
|
|
en_json: dict,
|
|
backend_lines: int,
|
|
frontend_lines: int,
|
|
) -> tuple[Path, Path]:
|
|
repo = _make_repo(tmp)
|
|
_commit_file(
|
|
repo,
|
|
"locales/en.json",
|
|
json.dumps(en_json, indent=2, ensure_ascii=False),
|
|
)
|
|
if backend_lines:
|
|
content = "\n".join(f"# 中{i}" for i in range(backend_lines)) + "\n"
|
|
_commit_file(repo, "backend/app/x.py", content)
|
|
else:
|
|
_commit_file(repo, "backend/app/.gitkeep", "")
|
|
if frontend_lines:
|
|
content = "\n".join(f"// 中{i}" for i in range(frontend_lines)) + "\n"
|
|
_commit_file(repo, "frontend/src/x.js", content)
|
|
else:
|
|
_commit_file(repo, "frontend/src/.gitkeep", "")
|
|
baseline_path = repo / "baseline.txt"
|
|
return repo, baseline_path
|
|
|
|
def test_pass_within_baseline(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
repo, baseline_path = self._make_full_repo(
|
|
Path(tmp),
|
|
en_json={"k": "Confirm"},
|
|
backend_lines=3,
|
|
frontend_lines=2,
|
|
)
|
|
guard.write_baseline(
|
|
baseline_path,
|
|
{"backend/app": 5, "frontend/src": 5},
|
|
)
|
|
rc = guard.run_check(repo, baseline_path)
|
|
self.assertEqual(rc, 0)
|
|
|
|
def test_fail_on_locale_cjk(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
repo, baseline_path = self._make_full_repo(
|
|
Path(tmp),
|
|
en_json={"k": "中文"},
|
|
backend_lines=0,
|
|
frontend_lines=0,
|
|
)
|
|
guard.write_baseline(
|
|
baseline_path,
|
|
{"backend/app": 0, "frontend/src": 0},
|
|
)
|
|
rc = guard.run_check(repo, baseline_path)
|
|
self.assertEqual(rc, 1)
|
|
|
|
def test_fail_on_regression_with_refresh_hint(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
repo, baseline_path = self._make_full_repo(
|
|
Path(tmp),
|
|
en_json={"k": "Confirm"},
|
|
backend_lines=10,
|
|
frontend_lines=0,
|
|
)
|
|
guard.write_baseline(
|
|
baseline_path,
|
|
{"backend/app": 5, "frontend/src": 0},
|
|
)
|
|
# Capture stderr.
|
|
from io import StringIO
|
|
|
|
captured_err = StringIO()
|
|
old_err = sys.stderr
|
|
sys.stderr = captured_err
|
|
try:
|
|
rc = guard.run_check(repo, baseline_path)
|
|
finally:
|
|
sys.stderr = old_err
|
|
self.assertEqual(rc, 1)
|
|
err_text = captured_err.getvalue()
|
|
self.assertIn("cjk-regression", err_text)
|
|
self.assertIn(
|
|
"python scripts/ci/i18n_cjk_guard.py --update-baseline",
|
|
err_text,
|
|
)
|
|
|
|
def test_missing_en_json_fails(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
repo = _make_repo(Path(tmp))
|
|
_commit_file(repo, "backend/app/.gitkeep", "")
|
|
_commit_file(repo, "frontend/src/.gitkeep", "")
|
|
baseline_path = repo / "baseline.txt"
|
|
guard.write_baseline(
|
|
baseline_path,
|
|
{"backend/app": 0, "frontend/src": 0},
|
|
)
|
|
rc = guard.run_check(repo, baseline_path)
|
|
self.assertEqual(rc, 1)
|
|
|
|
def test_missing_baseline_fails(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
repo, baseline_path = self._make_full_repo(
|
|
Path(tmp),
|
|
en_json={"k": "Confirm"},
|
|
backend_lines=0,
|
|
frontend_lines=0,
|
|
)
|
|
# Do not write the baseline.
|
|
self.assertFalse(baseline_path.exists())
|
|
rc = guard.run_check(repo, baseline_path)
|
|
self.assertEqual(rc, 1)
|
|
|
|
|
|
class UpdateBaselineTests(unittest.TestCase):
|
|
"""``update_baseline`` writes current counts and exits 0."""
|
|
|
|
def test_update_then_check_passes(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
repo = _make_repo(Path(tmp))
|
|
_commit_file(
|
|
repo,
|
|
"locales/en.json",
|
|
json.dumps({"k": "Confirm"}, indent=2),
|
|
)
|
|
_commit_file(repo, "backend/app/x.py", "# 一\n# 二\n")
|
|
_commit_file(repo, "frontend/src/.gitkeep", "")
|
|
baseline_path = repo / "baseline.txt"
|
|
self.assertEqual(
|
|
guard.update_baseline(repo, baseline_path), 0
|
|
)
|
|
counts = guard.read_baseline(baseline_path)
|
|
self.assertEqual(counts["backend/app"], 2)
|
|
self.assertEqual(counts["frontend/src"], 0)
|
|
self.assertEqual(guard.run_check(repo, baseline_path), 0)
|
|
|
|
|
|
class CliSmokeTests(unittest.TestCase):
|
|
"""``main`` exposes the documented CLI surface."""
|
|
|
|
def test_help_flag_exits_zero(self) -> None:
|
|
guard_script = _GUARD_DIR / "i18n_cjk_guard.py"
|
|
proc = subprocess.run(
|
|
[sys.executable, str(guard_script), "--help"],
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
text=True,
|
|
)
|
|
self.assertEqual(proc.returncode, 0)
|
|
for flag in ("--update-baseline", "--baseline", "--repo-root"):
|
|
self.assertIn(flag, proc.stdout)
|
|
|
|
def test_unknown_flag_exits_nonzero(self) -> None:
|
|
guard_script = _GUARD_DIR / "i18n_cjk_guard.py"
|
|
proc = subprocess.run(
|
|
[sys.executable, str(guard_script), "--no-such-flag"],
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
text=True,
|
|
)
|
|
self.assertNotEqual(proc.returncode, 0)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|