240 lines
8.4 KiB
Python
240 lines
8.4 KiB
Python
#!/usr/bin/env python3
|
||
"""Verify backend i18n externalization for ticket #6.
|
||
|
||
Two checks (both run by default):
|
||
|
||
* ``--logs``: scan the in-scope backend modules and report any Chinese
|
||
character (U+4E00-U+9FFF) that still appears inside the string-literal
|
||
arguments of ``logger.{info,warning,error,debug,exception}(...)`` calls or
|
||
inside the ``error`` / ``message`` field values of ``jsonify({...})`` calls.
|
||
|
||
* ``--parity``: load every ``*.json`` in ``locales/`` (excluding
|
||
``languages.json``) and verify that the recursive set of key paths is
|
||
identical across every file.
|
||
|
||
Exit code is 0 when both checks pass and 1 otherwise. Each finding is printed
|
||
on its own line as ``<file>:<line>: <reason>: <snippet>``. Final line is
|
||
``OK`` or ``N issues``.
|
||
|
||
The script depends only on the Python standard library so it can be invoked
|
||
from a clean checkout: ``python scripts/check_i18n_logs.py``.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import ast
|
||
import json
|
||
import re
|
||
import sys
|
||
from pathlib import Path
|
||
from typing import Iterable
|
||
|
||
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||
|
||
# In-scope backend modules per .kiro/specs/i18n-externalize-backend-logs/design.md.
|
||
# ``backend/app/__init__.py`` is also covered to satisfy the ticket's
|
||
# repo-wide grep guard, even though it lives outside the listed module set.
|
||
SOURCE_FILES = [
|
||
"backend/app/__init__.py",
|
||
"backend/app/services/report_agent.py",
|
||
"backend/app/services/zep_tools.py",
|
||
"backend/app/services/simulation_runner.py",
|
||
"backend/app/services/oasis_profile_generator.py",
|
||
"backend/app/services/simulation_config_generator.py",
|
||
"backend/app/services/zep_graph_memory_updater.py",
|
||
"backend/app/services/ontology_generator.py",
|
||
"backend/app/services/simulation_manager.py",
|
||
"backend/app/services/zep_entity_reader.py",
|
||
"backend/app/services/simulation_ipc.py",
|
||
"backend/app/services/graph_builder.py",
|
||
"backend/app/api/simulation.py",
|
||
"backend/app/api/report.py",
|
||
"backend/app/api/graph.py",
|
||
]
|
||
|
||
LOCALES_DIR = REPO_ROOT / "locales"
|
||
|
||
LOGGER_METHODS = {"debug", "info", "warning", "error", "exception", "critical"}
|
||
JSONIFY_TRANSLATED_FIELDS = {"error", "message"}
|
||
|
||
CHINESE_RE = re.compile(r"[一-鿿]")
|
||
|
||
|
||
def _has_chinese(text: str) -> bool:
|
||
return bool(CHINESE_RE.search(text))
|
||
|
||
|
||
def _string_literal_value(node: ast.AST) -> str | None:
|
||
"""Return the string value of a literal ``Constant``/``JoinedStr``, else None."""
|
||
if isinstance(node, ast.Constant) and isinstance(node.value, str):
|
||
return node.value
|
||
if isinstance(node, ast.JoinedStr):
|
||
parts: list[str] = []
|
||
for value in node.values:
|
||
if isinstance(value, ast.Constant) and isinstance(value.value, str):
|
||
parts.append(value.value)
|
||
else:
|
||
# Conservatively render dynamic interpolation segments as a
|
||
# placeholder so that surrounding Chinese text in the static
|
||
# parts is still detected.
|
||
parts.append("<EFBFBD>")
|
||
return "".join(parts)
|
||
return None
|
||
|
||
|
||
def _is_logger_call(node: ast.Call) -> bool:
|
||
func = node.func
|
||
return (
|
||
isinstance(func, ast.Attribute)
|
||
and func.attr in LOGGER_METHODS
|
||
and isinstance(func.value, ast.Name)
|
||
and func.value.id == "logger"
|
||
)
|
||
|
||
|
||
def _is_jsonify_call(node: ast.Call) -> bool:
|
||
func = node.func
|
||
if isinstance(func, ast.Name) and func.id == "jsonify":
|
||
return True
|
||
if isinstance(func, ast.Attribute) and func.attr == "jsonify":
|
||
return True
|
||
return False
|
||
|
||
|
||
def _scan_call_for_chinese(node: ast.Call, source_lines: list[str]) -> Iterable[tuple[int, str, str]]:
|
||
"""Yield (line, reason, snippet) for any Chinese in this call's arguments."""
|
||
if _is_logger_call(node):
|
||
for arg in node.args:
|
||
text = _string_literal_value(arg)
|
||
if text and _has_chinese(text):
|
||
yield (
|
||
arg.lineno,
|
||
"chinese inside logger call argument",
|
||
_snippet(source_lines, arg.lineno),
|
||
)
|
||
for kw in node.keywords:
|
||
text = _string_literal_value(kw.value) if kw.value is not None else None
|
||
if text and _has_chinese(text):
|
||
yield (
|
||
kw.value.lineno,
|
||
"chinese inside logger call keyword argument",
|
||
_snippet(source_lines, kw.value.lineno),
|
||
)
|
||
return
|
||
|
||
if _is_jsonify_call(node):
|
||
for arg in node.args:
|
||
yield from _scan_jsonify_arg(arg, source_lines)
|
||
|
||
|
||
def _scan_jsonify_arg(arg: ast.AST, source_lines: list[str]) -> Iterable[tuple[int, str, str]]:
|
||
"""Yield findings for Chinese inside ``error`` or ``message`` dict values."""
|
||
if isinstance(arg, ast.Dict):
|
||
for key, value in zip(arg.keys, arg.values):
|
||
if not isinstance(key, ast.Constant) or not isinstance(key.value, str):
|
||
continue
|
||
if key.value not in JSONIFY_TRANSLATED_FIELDS:
|
||
continue
|
||
text = _string_literal_value(value)
|
||
if text and _has_chinese(text):
|
||
yield (
|
||
value.lineno,
|
||
f"chinese inside jsonify {key.value} field",
|
||
_snippet(source_lines, value.lineno),
|
||
)
|
||
|
||
|
||
def _snippet(source_lines: list[str], lineno: int) -> str:
|
||
if 1 <= lineno <= len(source_lines):
|
||
return source_lines[lineno - 1].rstrip()
|
||
return ""
|
||
|
||
|
||
def check_logs() -> list[str]:
|
||
"""Return a list of findings (empty when clean)."""
|
||
findings: list[str] = []
|
||
for rel_path in SOURCE_FILES:
|
||
abs_path = REPO_ROOT / rel_path
|
||
if not abs_path.exists():
|
||
findings.append(f"{rel_path}:0: missing in-scope file: not found")
|
||
continue
|
||
source = abs_path.read_text(encoding="utf-8")
|
||
source_lines = source.splitlines()
|
||
try:
|
||
tree = ast.parse(source, filename=str(abs_path))
|
||
except SyntaxError as exc:
|
||
findings.append(f"{rel_path}:{exc.lineno or 0}: syntax error: {exc.msg}")
|
||
continue
|
||
for node in ast.walk(tree):
|
||
if not isinstance(node, ast.Call):
|
||
continue
|
||
for line, reason, snippet in _scan_call_for_chinese(node, source_lines):
|
||
findings.append(f"{rel_path}:{line}: {reason}: {snippet.strip()}")
|
||
return findings
|
||
|
||
|
||
def _collect_key_paths(obj, prefix: str = "") -> set[str]:
|
||
paths: set[str] = set()
|
||
if isinstance(obj, dict):
|
||
for k, v in obj.items():
|
||
child_prefix = f"{prefix}.{k}" if prefix else k
|
||
if isinstance(v, dict):
|
||
paths.update(_collect_key_paths(v, child_prefix))
|
||
else:
|
||
paths.add(child_prefix)
|
||
return paths
|
||
|
||
|
||
def check_parity() -> list[str]:
|
||
findings: list[str] = []
|
||
locale_files = sorted(p for p in LOCALES_DIR.glob("*.json") if p.name != "languages.json")
|
||
if len(locale_files) < 2:
|
||
return findings
|
||
key_sets: dict[str, set[str]] = {}
|
||
for path in locale_files:
|
||
try:
|
||
data = json.loads(path.read_text(encoding="utf-8"))
|
||
except json.JSONDecodeError as exc:
|
||
findings.append(f"{path.relative_to(REPO_ROOT)}:0: invalid JSON: {exc.msg}")
|
||
continue
|
||
key_sets[path.name] = _collect_key_paths(data)
|
||
if len(key_sets) < 2:
|
||
return findings
|
||
union = set().union(*key_sets.values())
|
||
for path_name, keys in key_sets.items():
|
||
missing = sorted(union - keys)
|
||
for key_path in missing:
|
||
findings.append(f"locales/{path_name}:0: missing key path: {key_path}")
|
||
return findings
|
||
|
||
|
||
def main(argv: list[str] | None = None) -> int:
|
||
parser = argparse.ArgumentParser(description=__doc__)
|
||
parser.add_argument("--logs", action="store_true", help="run the source-scan check only")
|
||
parser.add_argument("--parity", action="store_true", help="run the locale-parity check only")
|
||
args = parser.parse_args(argv)
|
||
|
||
run_logs = args.logs or not args.parity
|
||
run_parity = args.parity or not args.logs
|
||
# If neither flag is set, both default to True (handled above).
|
||
|
||
findings: list[str] = []
|
||
if run_logs:
|
||
findings.extend(check_logs())
|
||
if run_parity:
|
||
findings.extend(check_parity())
|
||
|
||
for finding in findings:
|
||
print(finding)
|
||
|
||
if findings:
|
||
print(f"{len(findings)} issues")
|
||
return 1
|
||
print("OK")
|
||
return 0
|
||
|
||
|
||
if __name__ == "__main__":
|
||
sys.exit(main())
|