From 74997fd08869296cf3cc315306db9584963ad958 Mon Sep 17 00:00:00 2001 From: Dominik Seemann Date: Thu, 7 May 2026 13:52:22 +0000 Subject: [PATCH] feat(i18n): externalize chinese log and api response strings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract every Chinese string inside backend logger.{info,warning,error, debug,exception} calls and inside user-facing jsonify({"error|message": ...}) responses across the listed in-scope modules into locales/{en,zh}.json under nested namespaces (log..*, api.{error,message}..*). Locale dictionaries stay structurally identical; the existing flat frontend-facing keys at log.* / api.* are left untouched. The locale helper (backend/app/utils/locale.py) now emits a single deduplicated mirofish.locale warning per (locale, key) pair when a translation is missing instead of silently returning the raw key, so unknown keys are visible without crashing requests or background tasks. A repo-root scripts/check_i18n_logs.py verifier performs an AST-aware source scan for residual Chinese inside the in-scope logger/jsonify calls and a recursive parity diff between en.json and zh.json — both modes pass. Why: backend logs and API errors previously emitted Chinese-only strings, leaving English-speaking operators with unreadable log aggregator output and API consumers with locale-mismatched error messages. The t() helper and per-thread set_locale propagation already existed; this change makes every backend caller route through them. Closes #6 --- .../i18n-externalize-backend-logs/design.md | 432 ++++++++++++++++++ .../requirements.md | 87 ++++ .../i18n-externalize-backend-logs/research.md | 111 +++++ .../i18n-externalize-backend-logs/spec.json | 23 + .../i18n-externalize-backend-logs/tasks.md | 129 ++++++ backend/app/__init__.py | 13 +- backend/app/api/graph.py | 61 +-- backend/app/api/report.py | 38 +- backend/app/api/simulation.py | 217 ++++----- .../app/services/oasis_profile_generator.py | 46 +- backend/app/services/report_agent.py | 2 +- .../services/simulation_config_generator.py | 28 +- backend/app/services/simulation_ipc.py | 11 +- backend/app/services/simulation_manager.py | 7 +- backend/app/services/simulation_runner.py | 82 ++-- backend/app/services/zep_entity_reader.py | 23 +- .../app/services/zep_graph_memory_updater.py | 35 +- backend/app/services/zep_tools.py | 105 +++-- backend/app/utils/locale.py | 57 ++- backend/tests/__init__.py | 0 backend/tests/conftest.py | 27 ++ backend/tests/test_locale.py | 104 +++++ .../tests/test_locale_request_resolution.py | 56 +++ locales/en.json | 372 ++++++++++++++- locales/zh.json | 372 ++++++++++++++- scripts/_apply_translations.py | 49 ++ scripts/_codemod_i18n.py | 303 ++++++++++++ scripts/_merge_locale_keys.py | 104 +++++ scripts/check_i18n_logs.py | 239 ++++++++++ 29 files changed, 2776 insertions(+), 357 deletions(-) create mode 100644 .kiro/specs/i18n-externalize-backend-logs/design.md create mode 100644 .kiro/specs/i18n-externalize-backend-logs/requirements.md create mode 100644 .kiro/specs/i18n-externalize-backend-logs/research.md create mode 100644 .kiro/specs/i18n-externalize-backend-logs/spec.json create mode 100644 .kiro/specs/i18n-externalize-backend-logs/tasks.md create mode 100644 backend/tests/__init__.py create mode 100644 backend/tests/conftest.py create mode 100644 backend/tests/test_locale.py create mode 100644 backend/tests/test_locale_request_resolution.py create mode 100644 scripts/_apply_translations.py create mode 100644 scripts/_codemod_i18n.py create mode 100644 scripts/_merge_locale_keys.py create mode 100644 scripts/check_i18n_logs.py diff --git a/.kiro/specs/i18n-externalize-backend-logs/design.md b/.kiro/specs/i18n-externalize-backend-logs/design.md new file mode 100644 index 00000000..a4f1c86e --- /dev/null +++ b/.kiro/specs/i18n-externalize-backend-logs/design.md @@ -0,0 +1,432 @@ +# Design — i18n-externalize-backend-logs + +## Overview + +**Purpose**: Externalize the ~250 Chinese strings inside backend `logger.{info,warning,error,debug,exception}(...)` calls and the ~79 Chinese strings inside user-facing `jsonify({"error|message": ...})` responses across 14 backend modules into the existing `locales/{en,zh}.json` dictionaries, so logs and API responses honor the active locale. + +**Users**: Backend operators monitoring Flask logs in English; API clients sending `Accept-Language: en` (frontend, integration tests, future ops dashboards). + +**Impact**: Switches the backend from emitting Chinese-only strings to locale-aware lookups via the existing `t()` helper. Adds ~330 new key/value pairs to `locales/en.json` and `locales/zh.json` under nested namespaces (`log..`, `api.error.`, `api.message.`). Adds a deduplicated missing-key warning inside `t()` so unknown keys are visible without crashing requests. No public API contract or HTTP behavior changes. + +### Goals +- Every `logger.*` call in the in-scope modules (R1) emits via `t("log.…", **fmt)`; every `jsonify({"error|message": ...})` (R2) emits via `t("api.error|message.…", **fmt)`. +- `locales/en.json` and `locales/zh.json` remain structurally identical (R3): same key tree, same nesting, same ordering. +- `t()` warns on missing keys but never raises (R4). +- A re-runnable verifier (`scripts/check_i18n_logs.py`) makes R5 mechanically checkable from CI/dev. + +### Non-Goals +- Prompt strings (handled by sibling specs `i18n-report-agent-prompts` already on this branch + #2/#3/#4/#5). +- Chinese docstrings/comments (#7). +- Re-architecting `t()` (no ICU, no pluralization, no new framework). +- Frontend `vue-i18n` changes beyond the new keys it does not consume (the frontend continues to read its existing flat `log.` and `api.` entries unchanged). +- Changing log levels, log structure, HTTP status codes, response field shapes. + +## Boundary Commitments + +### This Spec Owns +- Translation of every Chinese string literal that appears as a string argument of `logger.{info,warning,error,debug,exception}(...)` in the 14 in-scope modules. +- Translation of the `error` and `message` field values of every `jsonify({...})` (and equivalent `make_response(jsonify(...))` / `Response(json.dumps(...))`) call in `backend/app/api/{simulation,report,graph}.py`. +- All new keys placed under `log..*`, `api.error..*`, `api.message..*` in both `locales/en.json` and `locales/zh.json`. +- Missing-key warning behavior of `backend/app/utils/locale.py::t`. +- The verification script `scripts/check_i18n_logs.py`. + +### Out of Boundary +- Prompt template strings inside the same files — owned by `i18n-report-agent-prompts` and tickets #2/#3/#4/#5. +- Chinese docstrings, function-name docstrings, and inline `#` comments — owned by ticket #7. +- The existing flat frontend keys in `locales/{en,zh}.json` (e.g. `log.preparingGoBack`, `api.projectNotFound`) — these are consumed by Vue components and must remain untouched at their current paths. +- New locale languages, language detection rules, or `Accept-Language` parsing changes. +- The `success`, `traceback`, `data`, `progress`, `status` fields in API responses (only `error` / `message` are translated). + +### Allowed Dependencies +- `backend/app/utils/locale.py` (`t`, `set_locale`, `get_locale`). +- `backend/app/utils/logger.py` (`get_logger`). +- Standard library only for the verifier (`json`, `re`, `pathlib`, `sys`). +- Existing Flask request context for `Accept-Language`-driven locale resolution. + +### Revalidation Triggers +- Adding a new in-scope file (e.g. a new service module that emits Chinese log strings) → re-run verifier; extend script's file list if needed. +- Renaming the existing top-level `log` / `api` namespaces in the locale dictionaries → frontend code coupled to those keys breaks; coordinate with frontend specs. +- Changing `t()` placeholder syntax (`{name}`) or fallback behavior → all call sites and the verifier need re-checking. +- Adding a new locale file (e.g. `de.json`) → the parity check must be extended to every `*.json` in `/locales/`, not just `en` and `zh`. + +## Architecture + +### Existing Architecture Analysis +- `backend/app/utils/locale.py` is the single source for translation. It exposes `set_locale(locale)`, `get_locale()`, `t(key, **kwargs)`, and `get_language_instruction()`. Translations are loaded once at process start from every `*.json` in `/locales/` (excluding `languages.json`). +- `t()` resolves a dotted key, falls back to the `zh` dictionary if missing in the active locale, then returns the raw key string if both are missing. Today it is **silent** on miss. +- Locale is request-scoped via `Accept-Language` and background-thread-scoped via `set_locale(...)` / `_thread_local.locale`. Background threads (`SimulationRunner`, `OasisProfileGenerator`, `ZepGraphMemoryUpdater`, `GraphBuilder`, `report.py` task threads) already call `set_locale(...)` at entry — current coverage is sufficient and not extended by this spec. +- `report.py` is a precedent: it already imports `from ..utils.locale import t, get_locale, set_locale` and uses `t("api.…")` in 27 jsonify call sites. The work in this spec mirrors that pattern across the remaining files. +- Existing keys at `log.*` and `api.*` (depth 2) are consumed by the Vue frontend. New backend keys live one level deeper (`log..`, `api.error..`) and therefore do not shadow or conflict. + +### Architecture Pattern & Boundary Map + +```mermaid +graph TB + subgraph backend + Api[Flask API blueprints] + Services[Service layer] + Logger[get_logger factory] + Locale[utils.locale t set_locale get_locale] + end + + subgraph repo_root + EnJson[locales en.json] + ZhJson[locales zh.json] + end + + subgraph verification + Verifier[scripts check_i18n_logs.py] + end + + Api -->|t key fmt| Locale + Services -->|t key fmt| Locale + Locale -->|reads at startup| EnJson + Locale -->|reads at startup| ZhJson + Logger -->|emits records| LogSink[Log sink stdout aggregator] + Api -->|jsonify| HttpClient[HTTP client] + Verifier -->|scans source| Api + Verifier -->|scans source| Services + Verifier -->|parses| EnJson + Verifier -->|parses| ZhJson +``` + +**Architecture Integration**: +- **Selected pattern**: Centralized translation registry (single helper, file-backed dictionaries) — already in place. This spec extends the registry's contents, not its shape. +- **Domain/feature boundaries**: Locale dict is the only shared resource. Each in-scope module owns its own keys (`log..*`); collisions are prevented by per-module sub-namespaces. +- **Existing patterns preserved**: `from ..utils.locale import t` import shape; `logger = get_logger("mirofish.")` factory; per-thread `set_locale(...)` propagation; `report.py`-style `jsonify({"error": t("api.…", id=...)})`. +- **New components rationale**: The verifier (`scripts/check_i18n_logs.py`) is the only new file. It exists because R5 demands a re-runnable mechanical check, and lives outside `backend/app` so it doesn't ship with the runtime. +- **Steering compliance**: 4-space indentation, snake_case, double quotes (Python steering); no new dependencies; no new lint/format tooling; structure preserved. + +### Technology Stack + +| Layer | Choice / Version | Role in Feature | Notes | +|-------|------------------|-----------------|-------| +| Backend / Services | Python ≥3.11 + Flask 3.0 | Hosts `t()` calls and translated `jsonify` responses | No new deps | +| Backend / i18n | `backend/app/utils/locale.py` (in-tree, ~70 LoC) | Resolves keys against per-thread / per-request locale | Extended with deduped missing-key warning | +| Data / Storage | `locales/en.json`, `locales/zh.json` (file-backed, JSON, loaded at process start) | Holds new `log..*` and `api.{error,message}..*` entries | Both files must stay structurally identical | +| Tooling / Verification | Python stdlib (`json`, `re`, `pathlib`, `argparse`) | Implements the R5 verifier | Runs from repo root: `python scripts/check_i18n_logs.py` | + +## File Structure Plan + +### Modified Files + +Backend service modules — replace Chinese-bearing `logger.*` calls with `t("log..", **fmt)`: +- `backend/app/services/zep_tools.py` (~51 sites) +- `backend/app/services/simulation_runner.py` (~40 sites) +- `backend/app/services/oasis_profile_generator.py` (~23 sites) +- `backend/app/services/simulation_config_generator.py` (~14 sites) +- `backend/app/services/zep_graph_memory_updater.py` (~14 sites) +- `backend/app/services/zep_entity_reader.py` (~10 sites) +- `backend/app/services/simulation_ipc.py` (~5 sites) +- `backend/app/services/simulation_manager.py` (~3 sites) +- `backend/app/services/report_agent.py` (~1 site) +- `backend/app/services/ontology_generator.py` — already clean (no rewrites; verify only) +- `backend/app/services/graph_builder.py` — already clean (no rewrites; verify only) + +Backend API modules — rewrite both `logger.*` and `jsonify({"error|message": ...})` strings: +- `backend/app/api/simulation.py` (~55 logger + ~59 jsonify sites) +- `backend/app/api/report.py` (~19 logger sites; jsonify already i18n-ized) +- `backend/app/api/graph.py` (~15 logger + ~20 jsonify sites) + +Locale dictionaries: +- `locales/en.json` — add new nested entries; keep file structurally identical to `zh.json`. +- `locales/zh.json` — add new nested entries with the original Chinese verbatim; keep file structurally identical to `en.json`. + +Locale helper: +- `backend/app/utils/locale.py` — extend `t()` with a deduplicated `logger.warning(...)` on missing-key fallback. + +### New Files +- `scripts/check_i18n_logs.py` — R5 verifier. Two modes: `--logs` (regex-scan in-scope files) and `--parity` (compare key trees of `en.json` and `zh.json`). Default runs both. + +> No new directories. No new packages. The script lives under the existing top-level `scripts/` path (alongside conventions used elsewhere in the repo). + +## Requirements Traceability + +| Req | Summary | Components | Interfaces | Flows | +|-----|---------|------------|------------|-------| +| 1.1 | logger.* uses t() | All in-scope service + api modules | `t("log..", **fmt)` | n/a | +| 1.2 | en locale → English log line | locale.py, en.json | `t()` resolves `_translations["en"]` | request → t() → log | +| 1.3 | zh locale → original Chinese log line | locale.py, zh.json | `t()` resolves `_translations["zh"]` (default) | request → t() → log | +| 1.4 | Interpolation via kwargs | locale.py, all rewritten call sites | `t(key, name=value)` with `{name}` placeholder | n/a | +| 1.5 | Zero ZH literals in logger calls in-scope | All in-scope modules | regex `logger\.[a-z]+\([\"'][^\"']*[一-鿿]` returns 0 | verifier flow | +| 2.1 | jsonify error/message via t() | api.simulation, api.report, api.graph | `jsonify({"error": t("api.error.…", **fmt)})` | n/a | +| 2.2 | en locale → English error/message | locale.py, en.json | `t()` resolves `en` for `api.*` keys | request → t() → jsonify | +| 2.3 | zh locale → original Chinese | locale.py, zh.json | `t()` resolves `zh` fallback | request → t() → jsonify | +| 2.4 | Zero ZH literals in jsonify error/message in-scope | api.simulation, api.report, api.graph | verifier mode `--logs` extended to jsonify regex | verifier flow | +| 2.5 | HTTP status / response shape unchanged | All in-scope api modules | unchanged tuple/jsonify return signatures | request → handler | +| 3.1 | Every new key present in en.json | locales/en.json | nested JSON tree under `log.`, `api.error/message.` | n/a | +| 3.2 | Every new key present in zh.json verbatim | locales/zh.json | mirrored nested JSON tree | n/a | +| 3.3 | Namespace organization | locales/{en,zh}.json | top-level `log` and `api` extended with sub-namespaces | n/a | +| 3.4 | Structural parity en vs zh | locales/{en,zh}.json | verifier mode `--parity` walks both trees | verifier flow | +| 3.5 | No collision with existing flat frontend keys | locales/{en,zh}.json | new keys live at depth ≥3 under `log` / `api` | n/a | +| 4.1 | Missing key returns non-empty string | locale.py | `t()` returns the raw key string | request → t() (miss) | +| 4.2 | Missing key emits warning | locale.py | `logger.warning(...)` with `(key, locale)` | request → t() (miss) → log | +| 4.3 | t() never raises | locale.py | guarded `dict.get()` chain; no unguarded indexing | n/a | +| 4.4 | Background thread locale honored | locale.py, all background entrypoints | existing `set_locale(...)` calls | thread start → set_locale → t() | +| 5.1 | Logger regex returns zero matches in scope | scripts/check_i18n_logs.py | `--logs` mode | verifier flow | +| 5.2 | jsonify error/message regex returns zero matches in scope | scripts/check_i18n_logs.py | `--logs` mode (jsonify branch) | verifier flow | +| 5.3 | Locale parity check returns zero diffs | scripts/check_i18n_logs.py | `--parity` mode | verifier flow | +| 5.4 | No new dependencies | scripts/check_i18n_logs.py | stdlib only | n/a | +| 5.5 | pytest stays green | All in-scope modules | regression check | `uv run python -m pytest` | + +## Components and Interfaces + +| Component | Domain/Layer | Intent | Req Coverage | Key Dependencies (P0/P1) | Contracts | +|-----------|--------------|--------|--------------|--------------------------|-----------| +| `LocaleHelper` (`backend/app/utils/locale.py`) | shared utility | Resolves dotted keys to translated strings; warns on miss | 1.2, 1.3, 1.4, 4.1, 4.2, 4.3, 4.4 | `_translations` dict (P0), `logging` (P0) | Service | +| `BackendLogTranslations` (in-scope service + api modules, logger.* sites) | service + api | Emit translated log records via `t("log.…")` | 1.1, 1.5 | LocaleHelper (P0), `get_logger` (P0) | Service | +| `BackendApiResponseTranslations` (`backend/app/api/{simulation,report,graph}.py`, jsonify sites) | api | Emit translated `error`/`message` JSON fields | 2.1, 2.2, 2.3, 2.4, 2.5 | LocaleHelper (P0), Flask `jsonify` (P0) | API | +| `LocaleDictionary` (`locales/en.json`, `locales/zh.json`) | data | Source of truth for translation keys/values | 1.2, 1.3, 2.2, 2.3, 3.1, 3.2, 3.3, 3.4, 3.5 | filesystem at process start (P0) | State | +| `I18nLogVerifier` (`scripts/check_i18n_logs.py`) | tooling | Re-runnable check that R1/R2/R3 still hold | 1.5, 2.4, 3.4, 5.1, 5.2, 5.3, 5.4 | Python stdlib (P0) | Batch | + +### Shared Utility + +#### LocaleHelper + +| Field | Detail | +|-------|--------| +| Intent | Resolve dotted translation keys via `t(key, **kwargs)` against the active locale; warn-once on missing keys | +| Requirements | 1.2, 1.3, 1.4, 4.1, 4.2, 4.3, 4.4 | + +**Responsibilities & Constraints** +- Owns the in-process translation cache (`_translations`) and the per-thread locale (`_thread_local.locale`). +- Active-locale lookup order: thread-local override → request `Accept-Language` header → default `zh`. +- Substitutes `{name}` placeholders with stringified `kwargs[name]`. Other placeholder syntaxes are not supported. +- On a missing key (no value in active locale **and** no value in `zh` fallback): returns the raw key string (existing behavior) **and** emits `logger.warning("missing translation key: %s (locale=%s)", key, locale)` exactly once per `(locale, key)` pair using a process-lifetime memoization set. +- Never raises for any string `key` value — invalid path segments resolve to "missing" and trigger the warning path. + +**Dependencies** +- Inbound: every backend caller of `t()`. (Criticality: P0) +- Outbound: `logging.getLogger("mirofish.locale")` for missing-key warnings. (P0) +- External: Python stdlib only. (P2) + +**Contracts**: Service [x] + +##### Service Interface +```python +def set_locale(locale: str) -> None: ... +def get_locale() -> str: ... +def t(key: str, **kwargs: object) -> str: ... +def get_language_instruction() -> str: ... +``` +- Preconditions: `key` is a non-empty `str`; `kwargs` values are stringifiable. +- Postconditions: returns a non-empty `str`. If the key is unresolved, the return value equals `key` and exactly one warning is emitted per `(locale, key)`. +- Invariants: thread-safe for read-only translation lookups (the `_translations` dict is built once at import and never mutated). The dedup memoization set is mutated under the GIL only — adequate for the Flask + threaded-task usage pattern. + +**Implementation Notes** +- Integration: extend the existing function in `backend/app/utils/locale.py`; no new file. Keep `set_locale` / `get_locale` / `get_language_instruction` signatures unchanged. +- Validation: a tiny inline assertion (or unit test, see Testing Strategy) confirming `t("nonexistent.key.path")` returns `"nonexistent.key.path"` and emits a warning record. +- Risks: duplicate-warning storm if dedup set is forgotten — mitigated by the per-process memoization set; risk that the dedup set grows unbounded (bounded by total distinct missing keys = small). + +### Service & API Layer + +#### BackendLogTranslations (covers Req 1) + +| Field | Detail | +|-------|--------| +| Intent | Replace every Chinese string in `logger.*` calls with `t("log..", **fmt)` across the in-scope modules | +| Requirements | 1.1, 1.5 | + +**Responsibilities & Constraints** +- Per-module sub-namespace under `log` chosen from this fixed list so reviewers can predict the key path: + - `log.zep_tools.*` + - `log.simulation_runner.*` + - `log.simulation_manager.*` + - `log.simulation_ipc.*` + - `log.simulation_config.*` (for `simulation_config_generator.py`) + - `log.profile_generator.*` (for `oasis_profile_generator.py`) + - `log.zep_entity_reader.*` + - `log.zep_graph_memory_updater.*` + - `log.report_agent.*` + - `log.report_api.*` (for `backend/app/api/report.py` logger calls — the `report` namespace is reserved for the existing flat `report.*` UI keys, so backend-API logs use a `report_api` sibling) + - `log.simulation_api.*` (for `backend/app/api/simulation.py`) + - `log.graph_api.*` (for `backend/app/api/graph.py`) +- Key naming: `` of the message intent, ≤6 words, no message-ID style. Example: `log.zep_tools.entity_count_loaded` for `logger.info("加载了 5 个实体")`. +- Interpolation rule: every dynamic value moves to a `{name}` placeholder and a matching kwarg. f-strings around the `t()` call are not allowed; values are passed through `t()`'s formatter. + - Before: `logger.info(f"加载了 {n} 个实体")` → After: `logger.info(t("log.zep_tools.entity_count_loaded", n=n))`. +- For exception messages where `str(e)` is appended, use `{error}` placeholder: `logger.error(t("log.zep_tools.entity_fetch_failed", error=str(e)))`. + +**Dependencies** +- Inbound: existing service callers; no signature changes. (P2) +- Outbound: `LocaleHelper.t` (P0); module-level `logger` from `get_logger` (P0). + +**Contracts**: Service [x] + +**Implementation Notes** +- Integration: add `from ..utils.locale import t` at top of each modified file (already present in some). Avoid wildcard imports. +- Validation: I18nLogVerifier `--logs` mode catches any missed Chinese literal. +- Risks: missing a `logger.exception(...)` call (these are sometimes formatted differently) — mitigated by including `exception` in the verifier regex. +- Risks: shadowing of `t` by loop/comprehension variables (e.g. `[t.strip() for t in ...]`). Python 3 comprehension scope is local, so the module-level `t()` is unaffected — leave the existing variable names as-is. + +#### BackendApiResponseTranslations (covers Req 2) + +| Field | Detail | +|-------|--------| +| Intent | Replace every Chinese string assigned to the `error` or `message` field in `jsonify({...})` calls in the API blueprints with `t("api.error|message..", **fmt)` | +| Requirements | 2.1, 2.2, 2.3, 2.4, 2.5 | + +**Responsibilities & Constraints** +- Sub-namespaces under existing `api`: + - `api.error.simulation.*` + - `api.error.graph.*` + - `api.error.report.*` (only for *new* report-api keys; existing flat `api.requireSimulationId`-style keys stay where they are since `report.py` already uses them) + - `api.message.simulation.*` + - `api.message.graph.*` + - `api.message.report.*` +- Translated fields are limited to `error` and `message`. Other fields (`success`, `traceback`, `data`, `progress`, `status`) are not localized — this preserves the current contract for clients that key off them. +- HTTP status codes are preserved verbatim (the second tuple element of the return statement is left untouched). +- For dynamic content like `f"模拟不存在: {sid}"`, parameterize via `id=sid`: `jsonify({"error": t("api.error.simulation.not_found", id=sid)})`. +- Where `report.py` already uses a flat key (e.g. `t("api.simulationNotFound", id=...)`) — leave those alone and do not duplicate them under `api.error.report.*`. Only **new** translations introduced by this spec adopt the new sub-namespacing. + +**Dependencies** +- Inbound: HTTP clients (frontend, integration tests, external consumers). (P0 — must not break response shape.) +- Outbound: `LocaleHelper.t` (P0), Flask `jsonify` (P0). + +**Contracts**: API [x] + +##### API Contract (illustrative) +| Method | Endpoint | Before (Chinese) | After (i18n key) | Status | +|--------|----------|------------------|------------------|--------| +| GET | `/api/simulation/entities/` | `{"error": "NEO4J未配置"}` | `{"error": t("api.error.simulation.neo4j_not_configured")}` | 500 | +| GET | `/api/simulation/entities//` | `{"error": f"实体不存在: {entity_uuid}"}` | `{"error": t("api.error.simulation.entity_not_found", id=entity_uuid)}` | 404 | +| POST | `/api/graph/...` | `{"error": "..."}` | `{"error": t("api.error.graph.")}` | unchanged | + +(The full call-site list is the rewrite work; the table illustrates the pattern.) + +**Implementation Notes** +- Integration: where `success` lives alongside `error`, only `error`'s value changes. Where `message` is the only payload (e.g. `{"message": "..."}`), only `message`'s value changes. +- Validation: I18nLogVerifier `--logs` mode also scans `jsonify(...)` for Chinese characters inside `"error"` / `"message"` value strings. +- Risks: rewriting an `error` value that was being string-built across multiple lines — must use `{name}` placeholders; revisit if any call site assembles the message from a list comprehension. + +### Data Layer + +#### LocaleDictionary + +| Field | Detail | +|-------|--------| +| Intent | Source of truth for backend log/API translations | +| Requirements | 1.2, 1.3, 2.2, 2.3, 3.1, 3.2, 3.3, 3.4, 3.5 | + +**Responsibilities & Constraints** +- `locales/en.json` and `locales/zh.json` retain their existing top-level keys (`common`, `meta`, `nav`, `home`, `main`, `step1-5`, `graph`, `history`, `api`, `progress`, `log`, `report`, `console`). +- New backend keys appear under the existing `log` and `api` namespaces, but always at depth ≥3: + - `log..` for logger calls. + - `api.error..` and `api.message..` for jsonify responses. +- Within each new sub-namespace, keys are sorted alphabetically to keep diffs reviewable. +- `en.json` carries the English translation; `zh.json` carries the original Chinese verbatim (no rewriting). +- Both files end with a single trailing newline (project convention) and use 2-space JSON indentation (matching the existing files). + +**Contracts**: State [x] + +**Implementation Notes** +- Integration: edits must add only the new sub-namespaces; touching existing flat keys is forbidden (regression risk for the Vue frontend). +- Validation: I18nLogVerifier `--parity` mode confirms key paths match between `en.json` and `zh.json`. +- Risks: drift between en/zh shapes — mitigated by parity check and by adding both files in the same edit operation. + +### Tooling Layer + +#### I18nLogVerifier (`scripts/check_i18n_logs.py`) + +| Field | Detail | +|-------|--------| +| Intent | Re-runnable mechanical check for R5 | +| Requirements | 1.5, 2.4, 3.4, 5.1, 5.2, 5.3, 5.4 | + +**Responsibilities & Constraints** +- Two checks, both run by default; either can be selected via flag: + 1. **Source scan** (`--logs`): for every in-scope file (constant list embedded in the script), ensure no Chinese character (`U+4E00`–`U+9FFF`) appears inside the string-literal argument of any `logger.{info,warning,error,debug,exception}(...)` call OR inside the value of any `error` / `message` field of a `jsonify(...)` call. Reports each offending file:line:snippet. + 2. **Parity** (`--parity`): walk every `*.json` file in `/locales/` (excluding `languages.json`), pairwise-diff the recursive key set (path strings only, ignoring values), and report any key path that exists in one file but not the other. +- Exit code: 0 if both checks pass, non-zero (1) otherwise. Suitable for CI invocation. +- Implementation: pure stdlib (`json`, `re`, `pathlib`, `argparse`). No new packages, no project imports — runs from a clean checkout. + +**Contracts**: Batch [x] + +##### Batch / Job Contract +- Trigger: `python scripts/check_i18n_logs.py [--logs|--parity]` (default both) from repo root. +- Input / validation: scans the embedded file list and `/locales/*.json`. Stops with a clear error if a listed file is missing. +- Output / destination: stdout. Each finding line: `:: : `. Final summary: `OK` or `N issues`. +- Idempotency & recovery: read-only; safe to re-run. + +**Implementation Notes** +- Integration: not wired into CI by this spec (steering doesn't have CI configured). Documented in the spec's HANDOFF if needed; otherwise just available as a one-liner. +- Validation: developer runs the script before committing. +- Risks: regex on raw source can match Chinese inside docstrings or comments adjacent to `logger.*` lines; mitigated by anchoring the regex to the call expression `logger\.[a-z]+\(...[一-鿿]...\)` and limiting the match to the line itself. + +## System Flows + +```mermaid +sequenceDiagram + participant Caller as Service or API code + participant Locale as locale.t + participant Dict as locales en zh + participant Logger as logger + participant Resp as Flask response + + Caller->>Locale: t key fmt + Locale->>Dict: lookup active locale + alt key found in active locale + Dict-->>Locale: translated string + else key found in zh fallback only + Dict-->>Locale: zh string + else key missing in both + Locale->>Logger: warn missing key once + Locale-->>Caller: raw key string + end + Locale-->>Caller: resolved string + alt log call + Caller->>Logger: emit record with resolved string + else api response + Caller->>Resp: jsonify error or message resolved string + end +``` + +```mermaid +flowchart LR + Source[Source files in scope] -->|regex scan| Verifier + EnJson[en.json] -->|parse keys| Verifier + ZhJson[zh.json] -->|parse keys| Verifier + Verifier -->|0 = OK| ExitOk[exit 0] + Verifier -->|N issues| ExitFail[exit 1] +``` + +## Error Handling + +### Error Strategy +- The translation lookup itself never raises. A missing key triggers a single deduplicated `logger.warning` and falls back to the raw key string. Callers see no exception. +- Existing API error paths (`try/except` returning `jsonify({"error": str(e)}), 500`) continue to use `str(e)` for the dynamic exception part — only the static surrounding text moves into a translation key. Where appropriate, callers can wrap the dynamic part: `t("api.error..", error=str(e))`. + +### Error Categories and Responses +- **Translation miss (warning, not error)**: `logger.warning("missing translation key: %s (locale=%s)", key, locale)` — emitted once per `(locale, key)` pair. +- **No new HTTP status codes** are introduced. Existing `404`/`500`/`400` paths return the same status with translated `error` field. +- **Verifier failure**: exits non-zero with a list of offending lines. The author re-runs after fixing. + +### Monitoring +- Missing translation warnings appear in the standard backend log stream. No new metrics or alerting are introduced. + +## Testing Strategy + +### Unit Tests +- `t()` returns the active-locale value for a known key (`set_locale("en")` then `t("log.zep_tools.entity_count_loaded", n=5)` matches the en.json template with `5` substituted). +- `t()` falls back to `zh` when the active locale lacks a key. +- `t()` returns the raw key and emits exactly one `logger.warning` for an unknown key, even on multiple invocations (`caplog`-style assertion). +- `t()` does not raise for invalid nesting (e.g. `t("log.zep_tools.entity_count_loaded.deeper")`). + +### Integration Tests +- A representative API endpoint that previously returned a Chinese error (e.g. `GET /api/simulation/entities/`) now returns the translated string when called with `Accept-Language: en`. +- The same endpoint returns the Chinese string when called with `Accept-Language: zh` (regression check that no behavior changed for existing zh consumers). +- A representative service-layer log call emits the en string when the background thread set `set_locale("en")`. + +> Pytest coverage is currently small (`scripts/test_profile_format.py` only). Add the four-or-five new tests to a single test module under `backend/tests/` (created as part of this spec) to keep the scope contained. + +### Mechanical Verification (R5) +- `python scripts/check_i18n_logs.py` succeeds with exit 0. +- `grep -rEn "logger\.[a-z]+\([\"'][^\"']*[一-鿿]" backend/app/` returns no matches. +- `python -c "import json; e=json.load(open('locales/en.json')); z=json.load(open('locales/zh.json')); ..."` parity check returns empty diff. + +## Optional Sections + +### Migration Strategy +None required — the change is non-breaking for both the frontend and external API consumers: +- Existing flat `log.*` and `api.*` keys remain at their current paths and values. +- New keys live at deeper paths and only the backend reads them. +- Default locale (`zh`) returns the same strings as before (preserved verbatim in `zh.json`). diff --git a/.kiro/specs/i18n-externalize-backend-logs/requirements.md b/.kiro/specs/i18n-externalize-backend-logs/requirements.md new file mode 100644 index 00000000..c806165c --- /dev/null +++ b/.kiro/specs/i18n-externalize-backend-logs/requirements.md @@ -0,0 +1,87 @@ +# Requirements Document + +## Introduction +The MiroFish backend currently emits Chinese strings directly from `logger.{info,warning,error,debug,exception}` calls and from a number of `jsonify({"error|message": ...})` API responses. These hardcoded strings bypass the existing `t()` localization helper in `backend/app/utils/locale.py`, so log aggregators receive unreadable messages for English-speaking operators and API responses ignore the active locale. This spec defines the work required to externalize every Chinese log message and user-facing API error/message string in the listed backend modules into the locale dictionaries (`locales/en.json` and `locales/zh.json`), so logs and responses honor the request locale and English operators get a fully readable pipeline. + +## Boundary Context +- **In scope**: + - Replace Chinese string literals inside `logger.{info,warning,error,debug,exception}` calls in: + - `backend/app/services/report_agent.py` + - `backend/app/services/zep_tools.py` + - `backend/app/services/simulation_runner.py` + - `backend/app/services/oasis_profile_generator.py` + - `backend/app/services/simulation_config_generator.py` + - `backend/app/services/zep_graph_memory_updater.py` + - `backend/app/services/ontology_generator.py` + - `backend/app/services/simulation_manager.py` + - `backend/app/services/zep_entity_reader.py` + - `backend/app/services/simulation_ipc.py` + - `backend/app/services/graph_builder.py` + - `backend/app/api/simulation.py` + - `backend/app/api/report.py` + - `backend/app/api/graph.py` + - Replace Chinese string literals inside user-facing `jsonify({"error": ...})` and `jsonify({"message": ...})` (or equivalent response builders) in those API modules. + - Add the corresponding keys to both `locales/en.json` (English translation) and `locales/zh.json` (preserve original Chinese verbatim) under a domain-grouped namespace (`log..`, `api.error.`, `api.message.`). + - Preserve existing interpolation by passing values through `t(key, **kwargs)` (using the helper's `{name}` placeholder syntax) instead of f-strings or `%`-formatting around the call. + - Ensure `t()` returns a safe fallback (and emits a warning, not a crash) when a key is missing. +- **Out of scope**: + - Prompt template strings (handled by tickets #2/#3/#4/#5; the report-agent prompts work is already on the current branch). + - Chinese docstrings and inline comments (handled by ticket #7). + - Re-architecting the `t()` helper, switching i18n libraries, or introducing pluralization/ICU formatting. + - Changing log levels, log structure, or response status codes beyond the string content. + - Frontend `zh.json` parity beyond the new keys this work introduces. +- **Adjacent expectations**: + - The `t()` helper at `backend/app/utils/locale.py` already exposes `set_locale`, `get_locale`, and `t` and is wired up at request time and at background-thread entry; new code must reuse the existing helper. + - Locale files (`locales/en.json`, `locales/zh.json`) currently coexist with frontend `vue-i18n` consumption; new keys must not collide with existing top-level frontend keys (`menu`, `process`, `step1`, etc.). All new backend keys live under the new top-level namespaces `log` and `api` (or extend them if already present). + - Sibling spec `i18n-report-agent-prompts` covered the *prompt* portion of `report_agent.py`; this spec must not regress those translations. + +## Requirements + +### Requirement 1: Externalize Chinese Logger Messages +**Objective:** As a backend operator viewing logs in an English log aggregator, I want every Chinese log message in the listed backend modules to be emitted in the active locale, so that I can read and triage logs without translation tooling. + +#### Acceptance Criteria +1. The Backend Logging Layer shall emit log records whose message text is produced by `t("log..", **fmt)` for every `logger.{info,warning,error,debug,exception}` call in the listed in-scope modules that previously contained Chinese characters. +2. When the active locale is `en`, the Backend Logging Layer shall emit the English translation defined in `locales/en.json` for each externalized log key. +3. When the active locale is `zh`, the Backend Logging Layer shall emit the original Chinese text as preserved in `locales/zh.json` for each externalized log key. +4. The Backend Logging Layer shall preserve all interpolated values (entity counts, identifiers, exception text) by passing them as keyword arguments to `t()` rather than concatenating or formatting them around the `t()` call. +5. The Backend Logging Layer shall not contain any Chinese character (`U+4E00`–`U+9FFF`) inside the string-literal argument of any `logger.{info,warning,error,debug,exception}` call within the listed in-scope modules. + +### Requirement 2: Externalize Chinese API Response Strings +**Objective:** As a frontend client (or external API consumer) reading the `Accept-Language` header, I want backend error and message responses in the listed API modules to be returned in the active locale, so that user-facing error surfaces match the rest of the localized UI. + +#### Acceptance Criteria +1. The Backend API Layer shall produce the `error` and `message` field values of `jsonify({...})` responses in the listed in-scope API modules (`backend/app/api/{simulation,report,graph}.py`) by calling `t("api.error.", **fmt)` or `t("api.message.", **fmt)`. +2. When the request `Accept-Language` header is `en`, the Backend API Layer shall return the English translation for the corresponding response key. +3. When the request `Accept-Language` header is `zh` or absent, the Backend API Layer shall return the original Chinese string as preserved in `locales/zh.json`. +4. The Backend API Layer shall not contain any Chinese character inside the string value of an `error` or `message` field in any `jsonify(...)` (or equivalent response builder) call within the listed in-scope API modules. +5. The Backend API Layer shall keep the HTTP status code, response key set, and (for non-i18n keys) value structure of every modified response unchanged. + +### Requirement 3: Locale Dictionary Parity and Structure +**Objective:** As a translator or developer adding a new locale, I want every backend log/API key to exist in both `en.json` and `zh.json` with identical nested structure, so that the locale files can be diffed and validated mechanically. + +#### Acceptance Criteria +1. The Locale Dictionary shall contain, in `locales/en.json`, every key introduced by Requirements 1 and 2 with an English translation. +2. The Locale Dictionary shall contain, in `locales/zh.json`, every key introduced by Requirements 1 and 2 with the original Chinese text preserved verbatim from the previous source code. +3. The Locale Dictionary shall organize new backend keys under the top-level namespaces `log` (grouped by domain: `graph`, `simulation`, `report`, `agent`, `pipeline`, etc.) and `api` (grouped as `api.error.` / `api.message.`). +4. The Locale Dictionary shall expose a structurally identical key tree across `en.json` and `zh.json`, such that recursively diffing the key paths (ignoring values) of the two files produces an empty difference. +5. The Locale Dictionary shall not collide with or overwrite any pre-existing top-level frontend i18n key when the new namespaces are added. + +### Requirement 4: Safe Fallback for Missing Keys +**Objective:** As a backend service author who may ship code ahead of a translation update, I want missing translation keys to produce a visible warning without crashing the request or background task, so that incomplete locale dictionaries degrade gracefully. + +#### Acceptance Criteria +1. If a `t(key, ...)` call references a key that exists in neither the active locale nor the `zh` fallback, the Locale Helper shall return a non-empty string (the key itself or an explicit placeholder) rather than `None` or raising. +2. If a `t(key, ...)` call references a missing key, the Locale Helper shall emit a single warning-level log record identifying the missing key, the active locale, and (when available) the call site context. +3. The Locale Helper shall not raise `KeyError`, `AttributeError`, or `TypeError` for any key lookup, irrespective of nesting depth or invalid path segments. +4. When `t()` is invoked from a background thread that called `set_locale(...)` at entry, the Locale Helper shall resolve the locale set on that thread for the entire call chain. + +### Requirement 5: Verification and Regression Guards +**Objective:** As a reviewer of this PR, I want repeatable mechanical checks that prove the in-scope files are clean of stray Chinese log/response strings, so that the acceptance criteria can be re-validated on every future change. + +#### Acceptance Criteria +1. The Verification Script shall, when run against the repository, report zero matches for the regular expression `logger\.[a-z]+\(["'][^"']*[一-鿿]` across the listed in-scope modules. +2. The Verification Script shall, when run against the repository, report zero matches for any `jsonify({"error": ""})` or `jsonify({"message": ""})` literal in the listed in-scope API modules. +3. The Verification Script shall, when run against `locales/en.json` and `locales/zh.json`, confirm that every newly introduced key path exists in both files (structural-key parity) and exit non-zero if a key is present in only one file. +4. The Verification Script shall be runnable from the repository root using only tools already available in the dev environment (`grep`, `python`, or `jq` — no new dependencies introduced). +5. The Backend Test Suite shall continue to pass (`uv run python -m pytest`) after the externalization changes, with no new failures introduced by the rename of message strings. diff --git a/.kiro/specs/i18n-externalize-backend-logs/research.md b/.kiro/specs/i18n-externalize-backend-logs/research.md new file mode 100644 index 00000000..471ee492 --- /dev/null +++ b/.kiro/specs/i18n-externalize-backend-logs/research.md @@ -0,0 +1,111 @@ +# Gap Analysis — i18n-externalize-backend-logs + +## 1. Current State Investigation + +### Locale infrastructure already in place +- `backend/app/utils/locale.py` exposes `set_locale(locale)`, `get_locale()`, `t(key, **kwargs)`, and `get_language_instruction()`. Translations are loaded once at import time from every `*.json` in `/locales/` (excluding `languages.json`). +- `t()` resolves a dotted key, falls back to the `zh` dictionary if the active locale lacks the key, then returns the raw key string if both are missing. **No warning is emitted on miss.** +- Interpolation uses `{name}` placeholders applied via `str.replace`. There is no support for `%s`/`%d`/`{}` (numeric) — call sites must use named placeholders. +- Locale is request-scoped via the `Accept-Language` header, and background-thread-scoped via `set_locale(...)` / `_thread_local.locale`. A few entry points already call `set_locale(...)` (e.g. `report.py`, `graph_builder.py`, `simulation_runner.py`, `oasis_profile_generator.py`, `zep_graph_memory_updater.py`). + +### Locale dictionaries +- `locales/en.json` and `locales/zh.json` already share top-level namespaces `log` and `api` — but every existing `log.*` / `api.*` key currently lives **at depth 2** (e.g. `log.preparingGoBack`, `api.projectNotFound`). Existing `log.*` keys are exclusively consumed by the **frontend** (`frontend/src/views/*.vue`, `frontend/src/components/Step*.vue`). +- Existing `api.*` keys are already used by the backend (`backend/app/api/report.py` uses 27 of them — `api.requireSimulationId`, `api.simulationNotFound`, etc.). So `api.*` is a shared backend/frontend namespace. +- Both files are 665 lines, structurally identical (same line count and JSON shape), so adding new sub-namespaces (`log.graph.*`, `log.simulation.*`, `api.error.*`) will not collide with the existing flat keys. + +### In-scope file inventory (Chinese-character occurrences) + +Counted by regex over `logger.{info,warning,error,debug,exception}(...)` and `jsonify(...)` call expressions: + +| File | logger w/ ZH | jsonify w/ ZH | Notes | +| --- | ---: | ---: | --- | +| `backend/app/services/zep_tools.py` | 51 | 0 | Largest single contributor. Many `f"..."` interpolations. | +| `backend/app/services/simulation_runner.py` | 40 | 0 | Background runner; `set_locale` already wired. | +| `backend/app/services/oasis_profile_generator.py` | 23 | 0 | `set_locale` already wired. | +| `backend/app/services/simulation_config_generator.py` | 14 | 0 | | +| `backend/app/services/zep_graph_memory_updater.py` | 14 | 0 | `set_locale` already wired. | +| `backend/app/services/zep_entity_reader.py` | 10 | 0 | | +| `backend/app/services/simulation_ipc.py` | 5 | 0 | | +| `backend/app/services/simulation_manager.py` | 3 | 0 | `t()` already imported. | +| `backend/app/services/report_agent.py` | 1 | 0 | Sibling spec already covered prompts. | +| `backend/app/services/ontology_generator.py` | 0 | 0 | Already clean. | +| `backend/app/services/graph_builder.py` | 0 | 0 | Already clean. | +| `backend/app/api/simulation.py` | 55 | 59 | Largest API surface; **many** error responses still in Chinese. | +| `backend/app/api/report.py` | 19 | 0 | jsonify side already i18n-ized; logger calls remain. | +| `backend/app/api/graph.py` | 15 | 20 | | +| **Totals** | **250** | **79** | | + +### Conventions observed +- Loggers are obtained via `from ..utils.logger import get_logger; logger = get_logger('mirofish.')`. +- Many existing log lines use f-strings: `logger.info(f"加载了 {n} 个agent")`. These need to become `t("log.<…>", n=n)` with `{n}` placeholder syntax (not `{0}` or `%s`). +- A few occurrences shadow `t` as a loop/comprehension variable (`[t.strip() for t in ...]`, `for t, examples in ...`). In Python 3 these comprehension scopes are local and won't collide with the module-level `t()` import — safe to leave alone. +- Existing `report.py` already imports `from ..utils.locale import t, get_locale, set_locale` — this is the canonical import shape for API modules. +- `models/task.py` and `services/simulation_manager.py` already use `t()` in places — extend, don't reinvent. + +### Out-of-scope traffic on the same files +- The sibling spec `i18n-report-agent-prompts` (already merged into the current branch's history) externalized **prompts** in `report_agent.py`. This spec must keep its hands off prompt strings and only touch the residual `logger.*` / `jsonify({"error|message": …})` literals. +- `#7` covers Chinese docstrings/comments — leave alone. +- `#2/#3/#4/#5` cover ontology/profile/config/report **prompt** text — leave alone. + +## 2. Requirements Feasibility Map + +| Requirement | Existing Asset | Gap | Tag | +| --- | --- | --- | --- | +| **R1** Externalize logger ZH messages | `t()` helper, `logger` factory | ~250 call sites to rewrite + ~250 new keys | Missing translations | +| **R2** Externalize API jsonify ZH messages | `t()` helper, partial `report.py` precedent | ~79 call sites in `simulation.py` / `graph.py` + ~80 new keys | Missing translations | +| **R3** Locale dict parity (en/zh same shape) | `en.json` and `zh.json` already structurally identical | New nested namespaces `log..`, `api.error.`, `api.message.` to add to both | Missing namespace + needs verifier | +| **R4** Safe missing-key fallback (warns, doesn't crash) | `t()` returns the raw key on miss | **Missing**: a `logger.warning(...)` on miss path; verify thread-local locale propagation | Missing capability (small) | +| **R5** Verification guards | None today | Need `grep`/`python` script(s) that report 0 ZH in scope and assert key parity | Missing tooling | + +## 3. Implementation Approach Options + +### Option A — Pure file-by-file inline rewrite (recommended) +- For each in-scope file: import `t` from `..utils.locale`, walk every Chinese `logger.*` and `jsonify(...)` call, replace with `t("log..", **fmt)` / `t("api.error.", **fmt)`, and add the matching key to both locale JSONs. +- Group keys under the existing `log` and `api` top-level namespaces but **one level deeper** (`log.zep_tools.*`, `log.simulation.*`, `log.runner.*`, `api.error.simulation.*`, `api.error.graph.*`) to avoid colliding with the flat frontend keys already in `en.json`/`zh.json`. +- Implement R4 inside `t()` itself (single function — minimal blast radius): emit a `logging.getLogger(...).warning("missing translation key: %s (locale=%s)", key, locale)` on miss, **memoized per (locale, key)** so warnings don't spam. +- Add verification: a small `scripts/check_i18n_logs.py` (or just a docs snippet using `grep` + `jq`) per R5. + +**Trade-offs** +- ✅ Smallest delta, fits the project's "no new framework" constraint, mirrors existing `report.py` precedent. +- ✅ Easy to PR-split per area if PR grows. +- ❌ ~330 mechanical edits across 12 files. Tedious, easy to leave a stray ZH literal — mitigated by R5 verification. + +### Option B — AST-driven codemod +- Write a one-shot `libcst`/`ast` pass that walks each file, extracts every Chinese string literal under a `logger.*` / `jsonify({"error|message": ...})` Call node, generates a key, rewrites in place, and emits the locale JSON entries. +- Run once, commit the result. + +**Trade-offs** +- ✅ Mechanical correctness — no missed call sites. +- ❌ Adds a one-shot dep (`libcst`) the project doesn't currently use; conflicts with the "no new dep without justification" rule. +- ❌ Generated keys tend to be ugly (`log.zep_tools.line_142`); we'd post-process anyway. +- ❌ Existing f-strings (`f"加载了 {n} 个agent"`) need manual conversion to `t("…", n=n)` because the AST has to understand the f-string AST and reverse-engineer placeholder names — non-trivial. + +### Option C — Hybrid (manual rewrites + small verifier) +- Manual rewrites per Option A, but use a tiny disposable script during the work (`scripts/scan_zh.py`) to enumerate every remaining ZH-bearing logger/jsonify line so the human (or me) doesn't miss any. The script becomes the verifier guard required by R5. + +**Trade-offs** +- ✅ Same outcome as Option A but with continuous progress tracking and a re-runnable guard at the end. +- ✅ The verifier doubles as the R5 deliverable. +- ❌ Slightly more upfront work (writing the scanner) — but the script is also a CI-friendly artefact. + +## 4. Effort & Risk + +- **Effort: M (3–7 days for a human; ~1 session at this scale for an autonomous run)** — ~330 mechanical edits + 330 locale entries + small `t()` enhancement + verifier. No architectural changes. +- **Risk: Low/Medium** — + - Low for the locale-helper edit (small, well-isolated). + - Medium for the bulk rewrite: easy to leave stray ZH literals, easy to break interpolation by passing positional args. Mitigated by the R5 verifier and a final regex sweep. + - Watch: `t` shadowing in comprehensions (cosmetic, no functional issue thanks to comprehension scope), preserving HTTP status codes on jsonify rewrites, keeping `success`/`traceback`/etc. fields intact. + +## 5. Recommendations for Design Phase + +- **Adopt Option C.** A small `scripts/check_i18n_logs.py` doubles as both the R5 acceptance check and a working aid during the rewrite. No new runtime deps. +- **Key namespace decision** to lock in during design: + - `log..` for logger calls (e.g. `log.zep_tools.entity_count_loaded`, `log.simulation_runner.platform_completed`). + - `api.error..` for `jsonify({"error": …})`. + - `api.message..` for `jsonify({"message": …})`. + - Keep the existing flat `api.*` keys (used heavily by `report.py`) untouched. +- **`t()` helper extension**: emit a single deduplicated warning per missing `(locale, key)` pair. Use `logging.getLogger("mirofish.locale")`. Add a unit test (or a smoke check inside the verifier) that exercises a known-missing key and asserts the warning fires without raising. +- **Locale dictionary mechanics**: maintain alphabetical ordering inside each new sub-namespace and re-sort on update so diffs stay reviewable. +- **Research carried into design**: + - Confirm every background-task entry point that may emit logs from the in-scope modules calls `set_locale(...)` at thread start (current coverage looks complete — worth a quick re-scan). + - Decide whether to include the verifier in `package.json`/`Makefile` invocation or leave it as a documented one-liner. The ticket only asks that it be runnable from existing tools, so the lighter touch is fine. diff --git a/.kiro/specs/i18n-externalize-backend-logs/spec.json b/.kiro/specs/i18n-externalize-backend-logs/spec.json new file mode 100644 index 00000000..e66e9ac8 --- /dev/null +++ b/.kiro/specs/i18n-externalize-backend-logs/spec.json @@ -0,0 +1,23 @@ +{ + "feature_name": "i18n-externalize-backend-logs", + "created_at": "2026-05-07T13:24:45Z", + "updated_at": "2026-05-07T13:35:00Z", + "language": "en", + "phase": "tasks-generated", + "ticket": "salestech-group/MiroFish#6", + "approvals": { + "requirements": { + "generated": true, + "approved": true + }, + "design": { + "generated": true, + "approved": true + }, + "tasks": { + "generated": true, + "approved": true + } + }, + "ready_for_implementation": true +} diff --git a/.kiro/specs/i18n-externalize-backend-logs/tasks.md b/.kiro/specs/i18n-externalize-backend-logs/tasks.md new file mode 100644 index 00000000..cd71a3f1 --- /dev/null +++ b/.kiro/specs/i18n-externalize-backend-logs/tasks.md @@ -0,0 +1,129 @@ +# Implementation Plan — i18n-externalize-backend-logs + +## 1. Foundation: extend the locale helper and the verifier tooling + +- [x] 1.1 Add deduplicated missing-key warning and test-reset hook to the locale helper + - Extend the existing translation lookup so that, when a key is unresolved in both the active locale and the `zh` fallback, a single `logger.warning(...)` is emitted per `(locale, key)` pair (deduplicated for the lifetime of the process). + - Use the existing logger factory under a `mirofish.locale` logger name; the warning record must include the missing key string and the active locale. + - Preserve the existing return contract: a missing key still resolves to the raw key string, never raises. + - Expose a private reset entry point so unit tests can clear the dedup memoization between cases. + - Observable completion: invoking the helper with a known-missing key returns the key string, emits exactly one warning record, and a second invocation of the same key emits no additional warning until the reset hook is called. + - _Requirements: 4.1, 4.2, 4.3, 4.4_ + +- [x] 1.2 Build the i18n verification script with AST-aware Chinese-literal scanning and locale parity check + - Implement a single Python script that runs from the repo root using only the standard library (`json`, `re`, `pathlib`, `argparse`, `ast`). + - Mode A (`--logs`): walk the embedded list of in-scope backend modules and report every Chinese character (`U+4E00`–`U+9FFF`) found inside the string-literal arguments of `logger.{info,warning,error,debug,exception}(...)` calls and inside the `error` / `message` field values of `jsonify({...})` calls. Use the AST so that multi-line `jsonify(...)` calls are detected reliably. + - Mode B (`--parity`): load every `*.json` in `/locales/` (excluding `languages.json`), recursively diff the key paths pairwise, and report any path that exists in some files but not others. + - Default invocation runs both modes; CLI flags select either alone. Exit status: `0` when both pass, `1` otherwise. Each finding line is `:: : `; final line is `OK` or `N issues`. + - Observable completion: running the script against the unmodified repo prints the current findings list and exits non-zero; running it after the rewrite tasks below prints `OK` and exits `0`. + - _Requirements: 1.5, 2.4, 3.4, 5.1, 5.2, 5.3, 5.4_ + - _Boundary: I18nLogVerifier_ + +## 2. Core: rewrite Chinese log strings in the backend service modules + +> Each sub-task here is mechanically isolated to one file and only touches `logger.{info,warning,error,debug,exception}(...)` lines plus the matching `log..*` namespace in both locale files. Sub-tasks 2.1–2.9 are parallel-safe: they operate on disjoint file boundaries and only append (never overwrite) keys to the locale dictionaries. Locale-file edits are append-only sub-namespaces, so concurrent edits do not collide as long as the namespace per task is unique. + +- [x] 2.1 (P) Externalize Chinese logger messages in the Zep tools service + - Replace every Chinese string literal inside `logger.*` calls in the Zep tools service with translation lookups under the `log.zep_tools.*` sub-namespace. + - Move every dynamic value into a `{name}` placeholder kwarg passed through the translation helper (no f-strings or string concatenation around the helper call). + - Add the matching keys to `locales/en.json` (English translation) and `locales/zh.json` (original Chinese verbatim) in alphabetical order inside the new sub-namespace. + - Observable completion: the verifier `--logs` mode reports zero Chinese matches inside the Zep tools service file. + - _Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 3.1, 3.2, 3.3, 3.5_ + - _Boundary: BackendLogTranslations (zep_tools), LocaleDictionary_ + +- [x] 2.2 (P) Externalize Chinese logger messages in the simulation runner service + - Same rewrite/locale pattern under the `log.simulation_runner.*` sub-namespace. + - Confirm the runner's existing background-thread `set_locale(...)` call still happens at thread entry so the helper resolves the right locale for these messages. + - Observable completion: verifier `--logs` mode reports zero Chinese matches inside the simulation runner service file. + - _Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 3.1, 3.2, 3.3, 3.5_ + - _Boundary: BackendLogTranslations (simulation_runner), LocaleDictionary_ + +- [x] 2.3 (P) Externalize Chinese logger messages in the OASIS profile generator service + - Same rewrite/locale pattern under the `log.profile_generator.*` sub-namespace. + - Observable completion: verifier `--logs` mode reports zero Chinese matches inside the OASIS profile generator service file. + - _Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 3.1, 3.2, 3.3, 3.5_ + - _Boundary: BackendLogTranslations (oasis_profile_generator), LocaleDictionary_ + +- [x] 2.4 (P) Externalize Chinese logger messages in the simulation config generator service + - Same rewrite/locale pattern under the `log.simulation_config.*` sub-namespace. + - Observable completion: verifier `--logs` mode reports zero Chinese matches inside the simulation config generator service file. + - _Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 3.1, 3.2, 3.3, 3.5_ + - _Boundary: BackendLogTranslations (simulation_config_generator), LocaleDictionary_ + +- [x] 2.5 (P) Externalize Chinese logger messages in the Zep graph memory updater service + - Same rewrite/locale pattern under the `log.zep_graph_memory_updater.*` sub-namespace. + - Observable completion: verifier `--logs` mode reports zero Chinese matches inside the Zep graph memory updater service file. + - _Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 3.1, 3.2, 3.3, 3.5_ + - _Boundary: BackendLogTranslations (zep_graph_memory_updater), LocaleDictionary_ + +- [x] 2.6 (P) Externalize Chinese logger messages in the Zep entity reader service + - Same rewrite/locale pattern under the `log.zep_entity_reader.*` sub-namespace. + - Observable completion: verifier `--logs` mode reports zero Chinese matches inside the Zep entity reader service file. + - _Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 3.1, 3.2, 3.3, 3.5_ + - _Boundary: BackendLogTranslations (zep_entity_reader), LocaleDictionary_ + +- [x] 2.7 (P) Externalize Chinese logger messages in the simulation IPC service + - Same rewrite/locale pattern under the `log.simulation_ipc.*` sub-namespace. + - Observable completion: verifier `--logs` mode reports zero Chinese matches inside the simulation IPC service file. + - _Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 3.1, 3.2, 3.3, 3.5_ + - _Boundary: BackendLogTranslations (simulation_ipc), LocaleDictionary_ + +- [x] 2.8 (P) Externalize Chinese logger messages in the simulation manager service + - Same rewrite/locale pattern under the `log.simulation_manager.*` sub-namespace. + - Observable completion: verifier `--logs` mode reports zero Chinese matches inside the simulation manager service file. + - _Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 3.1, 3.2, 3.3, 3.5_ + - _Boundary: BackendLogTranslations (simulation_manager), LocaleDictionary_ + +- [x] 2.9 (P) Externalize the residual Chinese logger message in the report agent service + - Replace the single residual Chinese `logger.*` call in the report agent service with a translation lookup under the `log.report_agent.*` sub-namespace. + - Do not touch prompt strings — those remain owned by the sibling spec already merged on this branch. + - Observable completion: verifier `--logs` mode reports zero Chinese matches inside the report agent service file. + - _Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 3.1, 3.2, 3.3, 3.5_ + - _Boundary: BackendLogTranslations (report_agent), LocaleDictionary_ + +## 3. Core: rewrite Chinese strings in the backend API blueprints + +> The API sub-tasks rewrite both `logger.*` calls and the `error` / `message` field values of `jsonify(...)` responses in the same file. Each blueprint owns disjoint `log..*` and `api.{error,message}..*` sub-namespaces, so they remain parallel-safe. + +- [x] 3.1 (P) Externalize Chinese strings in the simulation API blueprint + - Rewrite Chinese `logger.*` strings under the `log.simulation_api.*` sub-namespace. + - Rewrite Chinese `error` / `message` field values inside `jsonify({...})` responses under the `api.error.simulation.*` / `api.message.simulation.*` sub-namespaces. Preserve every other field (`success`, `data`, `traceback`, `progress`, `status`) and the HTTP status code unchanged. + - Move dynamic values into `{name}` placeholder kwargs (e.g. `id=`); never embed Chinese in the surrounding f-string. + - Add the matching keys to `locales/en.json` and `locales/zh.json` in alphabetical order under the new sub-namespaces. + - Observable completion: verifier `--logs` mode reports zero Chinese matches inside the simulation API blueprint and the blueprint's existing endpoints continue to return the same HTTP status codes and response field shape. + - _Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 2.1, 2.2, 2.3, 2.4, 2.5, 3.1, 3.2, 3.3, 3.5_ + - _Boundary: BackendLogTranslations (simulation_api), BackendApiResponseTranslations (simulation), LocaleDictionary_ + +- [x] 3.2 (P) Externalize Chinese strings in the report API blueprint + - Rewrite Chinese `logger.*` strings under the `log.report_api.*` sub-namespace. + - Leave the existing flat `api.` keys already in use by the blueprint untouched (they are part of the existing contract and shared with the frontend). + - For any *new* `error` / `message` translations introduced by this rewrite, place them under `api.error.report.*` / `api.message.report.*`. + - Observable completion: verifier `--logs` mode reports zero Chinese matches inside the report API blueprint and the blueprint's existing endpoints continue to return the same HTTP status codes and response field shape. + - _Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 2.1, 2.2, 2.3, 2.4, 2.5, 3.1, 3.2, 3.3, 3.5_ + - _Boundary: BackendLogTranslations (report_api), BackendApiResponseTranslations (report), LocaleDictionary_ + +- [x] 3.3 (P) Externalize Chinese strings in the graph API blueprint + - Rewrite Chinese `logger.*` strings under the `log.graph_api.*` sub-namespace. + - Rewrite Chinese `error` / `message` field values inside `jsonify({...})` responses under `api.error.graph.*` / `api.message.graph.*`. + - Observable completion: verifier `--logs` mode reports zero Chinese matches inside the graph API blueprint and the blueprint's existing endpoints continue to return the same HTTP status codes and response field shape. + - _Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 2.1, 2.2, 2.3, 2.4, 2.5, 3.1, 3.2, 3.3, 3.5_ + - _Boundary: BackendLogTranslations (graph_api), BackendApiResponseTranslations (graph), LocaleDictionary_ + +## 4. Validation: end-to-end checks and regression coverage + +- [x] 4.1 Add focused locale-helper tests for the missing-key warning path + - Add unit tests that exercise the locale helper's missing-key behavior: a missing key returns the raw key string, emits exactly one warning record per `(locale, key)` pair, and never raises for any input string (including invalid nested paths). + - Tests use the private reset hook from task 1.1 to clear the dedup memoization between cases. + - Add a single integration-style test asserting that an API endpoint rendering a translated `error` field returns the English string when the request carries `Accept-Language: en` and the original Chinese when the header is `zh` or absent. + - Observable completion: `uv run python -m pytest` runs the new tests green alongside the existing test in the repository. + - _Depends: 1.1, 3.1_ + - _Requirements: 4.1, 4.2, 4.3, 4.4, 5.5_ + +- [x] 4.2 Run the verifier and the full pytest sweep against the rewritten codebase + - Execute `python scripts/check_i18n_logs.py` from the repo root and confirm both the source scan and the parity check pass with exit `0`. + - Re-run the regex acceptance check from the ticket (`grep -rEn "logger\.[a-z]+\([\"'][^\"']*[一-鿿]" backend/app/`) and confirm zero matches. + - Re-run `uv run python -m pytest` and confirm the suite is green (no new failures introduced by the rewrite). + - Spot-check one log line per modified file by setting the locale to `en` and tailing the formatted message — confirm the `{placeholder}` substitution works for messages with dynamic values. + - Observable completion: all three commands above exit `0` and the spot-checked log lines render in English under the `en` locale. + - _Depends: 1.2, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.1, 3.2, 3.3_ + - _Requirements: 1.5, 2.4, 3.4, 5.1, 5.2, 5.3, 5.5_ diff --git a/backend/app/__init__.py b/backend/app/__init__.py index 6f3345cd..11857ef0 100644 --- a/backend/app/__init__.py +++ b/backend/app/__init__.py @@ -14,6 +14,7 @@ from flask_cors import CORS from .config import Config from .utils.logger import setup_logger, get_logger +from .utils.locale import t def create_app(config_class=Config): @@ -36,7 +37,7 @@ def create_app(config_class=Config): if should_log_startup: logger.info("=" * 50) - logger.info("MiroFish Backend 启动中...") + logger.info(t("log.bootstrap.m001")) logger.info("=" * 50) # 启用CORS @@ -46,20 +47,20 @@ def create_app(config_class=Config): from .services.simulation_runner import SimulationRunner SimulationRunner.register_cleanup() if should_log_startup: - logger.info("已注册模拟进程清理函数") + logger.info(t("log.bootstrap.m002")) # 请求日志中间件 @app.before_request def log_request(): logger = get_logger('mirofish.request') - logger.debug(f"请求: {request.method} {request.path}") + logger.debug(t("log.bootstrap.m003", request=request.method, request_2=request.path)) if request.content_type and 'json' in request.content_type: - logger.debug(f"请求体: {request.get_json(silent=True)}") + logger.debug(t("log.bootstrap.m004", request=request.get_json(silent=True))) @app.after_request def log_response(response): logger = get_logger('mirofish.request') - logger.debug(f"响应: {response.status_code}") + logger.debug(t("log.bootstrap.m005", response=response.status_code)) return response # 注册蓝图 @@ -78,7 +79,7 @@ def create_app(config_class=Config): _recover_stuck_projects() if should_log_startup: - logger.info("MiroFish Backend 启动完成") + logger.info(t("log.bootstrap.m006")) return app diff --git a/backend/app/api/graph.py b/backend/app/api/graph.py index f432269d..d4cafa12 100644 --- a/backend/app/api/graph.py +++ b/backend/app/api/graph.py @@ -18,6 +18,7 @@ from ..utils.file_parser import FileParser from ..utils.logger import get_logger from ..models.task import TaskManager, TaskStatus from ..models.project import ProjectManager, ProjectStatus +from ..utils.locale import t # In-memory cache for graph data to avoid hammering Zep's rate-limited API. # Stale cache is served instantly on 429; a background thread refreshes it. @@ -49,7 +50,7 @@ def get_project(project_id: str): if not project: return jsonify({ "success": False, - "error": f"项目不存在: {project_id}" + "error": t("api.error.graph.m001", project_id=project_id) }), 404 return jsonify({ @@ -83,12 +84,12 @@ def delete_project(project_id: str): if not success: return jsonify({ "success": False, - "error": f"项目不存在或删除失败: {project_id}" + "error": t("api.error.graph.m002", project_id=project_id) }), 404 return jsonify({ "success": True, - "message": f"项目已删除: {project_id}" + "message": t("api.message.graph.m003", project_id=project_id) }) @@ -102,7 +103,7 @@ def reset_project(project_id: str): if not project: return jsonify({ "success": False, - "error": f"项目不存在: {project_id}" + "error": t("api.error.graph.m004", project_id=project_id) }), 404 # 重置到本体已生成状态 @@ -118,7 +119,7 @@ def reset_project(project_id: str): return jsonify({ "success": True, - "message": f"项目已重置: {project_id}", + "message": t("api.message.graph.m005", project_id=project_id), "data": project.to_dict() }) @@ -154,20 +155,20 @@ def generate_ontology(): } """ try: - logger.info("=== 开始生成本体定义 ===") + logger.info(t("log.graph_api.m006")) # 获取参数 simulation_requirement = request.form.get('simulation_requirement', '') project_name = request.form.get('project_name', 'Unnamed Project') additional_context = request.form.get('additional_context', '') - logger.debug(f"项目名称: {project_name}") - logger.debug(f"模拟需求: {simulation_requirement[:100]}...") + logger.debug(t("log.graph_api.m007", project_name=project_name)) + logger.debug(t("log.graph_api.m008", simulation_requirement=simulation_requirement[:100])) if not simulation_requirement: return jsonify({ "success": False, - "error": "请提供模拟需求描述 (simulation_requirement)" + "error": t("api.error.graph.m009") }), 400 # 获取上传的文件 @@ -175,13 +176,13 @@ def generate_ontology(): if not uploaded_files or all(not f.filename for f in uploaded_files): return jsonify({ "success": False, - "error": "请至少上传一个文档文件" + "error": t("api.error.graph.m010") }), 400 # 创建项目 project = ProjectManager.create_project(name=project_name) project.simulation_requirement = simulation_requirement - logger.info(f"创建项目: {project.project_id}") + logger.info(t("log.graph_api.m011", project=project.project_id)) # 保存文件并提取文本 document_texts = [] @@ -210,16 +211,16 @@ def generate_ontology(): ProjectManager.delete_project(project.project_id) return jsonify({ "success": False, - "error": "没有成功处理任何文档,请检查文件格式" + "error": t("api.error.graph.m012") }), 400 # 保存提取的文本 project.total_text_length = len(all_text) ProjectManager.save_extracted_text(project.project_id, all_text) - logger.info(f"文本提取完成,共 {len(all_text)} 字符") + logger.info(t("log.graph_api.m013", len=len(all_text))) # 生成本体 - logger.info("调用 LLM 生成本体定义...") + logger.info(t("log.graph_api.m014")) generator = OntologyGenerator() ontology = generator.generate( document_texts=document_texts, @@ -230,7 +231,7 @@ def generate_ontology(): # 保存本体到项目 entity_count = len(ontology.get("entity_types", [])) edge_count = len(ontology.get("edge_types", [])) - logger.info(f"本体生成完成: {entity_count} 个实体类型, {edge_count} 个关系类型") + logger.info(t("log.graph_api.m015", entity_count=entity_count, edge_count=edge_count)) project.ontology = { "entity_types": ontology.get("entity_types", []), @@ -239,7 +240,7 @@ def generate_ontology(): project.analysis_summary = ontology.get("analysis_summary", "") project.status = ProjectStatus.ONTOLOGY_GENERATED ProjectManager.save_project(project) - logger.info(f"=== 本体生成完成 === 项目ID: {project.project_id}") + logger.info(t("log.graph_api.m016", project=project.project_id)) return jsonify({ "success": True, @@ -287,14 +288,14 @@ def build_graph(): } """ try: - logger.info("=== 开始构建图谱 ===") + logger.info(t("log.graph_api.m017")) # 检查配置 errors = [] if not Config.NEO4J_PASSWORD: errors.append("NEO4J未配置") if errors: - logger.error(f"配置错误: {errors}") + logger.error(t("log.graph_api.m018", errors=errors)) return jsonify({ "success": False, "error": "配置错误: " + "; ".join(errors) @@ -303,12 +304,12 @@ def build_graph(): # 解析请求 data = request.get_json() or {} project_id = data.get('project_id') - logger.debug(f"请求参数: project_id={project_id}") + logger.debug(t("log.graph_api.m019", project_id=project_id)) if not project_id: return jsonify({ "success": False, - "error": "请提供 project_id" + "error": t("api.error.graph.m020") }), 400 # 获取项目 @@ -316,7 +317,7 @@ def build_graph(): if not project: return jsonify({ "success": False, - "error": f"项目不存在: {project_id}" + "error": t("api.error.graph.m021", project_id=project_id) }), 404 # 检查项目状态 @@ -325,13 +326,13 @@ def build_graph(): if project.status == ProjectStatus.CREATED: return jsonify({ "success": False, - "error": "项目尚未生成本体,请先调用 /ontology/generate" + "error": t("api.error.graph.m022") }), 400 if project.status == ProjectStatus.GRAPH_BUILDING and not force: return jsonify({ "success": False, - "error": "图谱正在构建中,请勿重复提交。如需强制重建,请添加 force: true", + "error": t("api.error.graph.m023"), "task_id": project.graph_build_task_id }), 400 @@ -356,7 +357,7 @@ def build_graph(): if not text: return jsonify({ "success": False, - "error": "未找到提取的文本内容" + "error": t("api.error.graph.m024") }), 400 # 获取本体 @@ -364,13 +365,13 @@ def build_graph(): if not ontology: return jsonify({ "success": False, - "error": "未找到本体定义" + "error": t("api.error.graph.m025") }), 400 # 创建异步任务 task_manager = TaskManager() task_id = task_manager.create_task(f"构建图谱: {graph_name}") - logger.info(f"创建图谱构建任务: task_id={task_id}, project_id={project_id}") + logger.info(t("log.graph_api.m026", task_id=task_id, project_id=project_id)) # 更新项目状态 project.status = ProjectStatus.GRAPH_BUILDING @@ -556,7 +557,7 @@ def get_task(task_id: str): if not task: return jsonify({ "success": False, - "error": f"任务不存在: {task_id}" + "error": t("api.error.graph.m027", task_id=task_id) }), 404 return jsonify({ @@ -613,7 +614,7 @@ def get_graph_data(graph_id: str): - 无缓存:后台线程拉取,返回 202 让前端稍后重试 """ if not Config.NEO4J_PASSWORD: - return jsonify({"success": False, "error": "NEO4J未配置"}), 500 + return jsonify({"success": False, "error": t("api.error.graph.m028")}), 500 cached = _graph_data_cache.get(graph_id) age = time.time() - cached["ts"] if cached else None @@ -645,7 +646,7 @@ def delete_graph(graph_id: str): if not Config.NEO4J_PASSWORD: return jsonify({ "success": False, - "error": "NEO4J未配置" + "error": t("api.error.graph.m029") }), 500 builder = GraphBuilderService() @@ -653,7 +654,7 @@ def delete_graph(graph_id: str): return jsonify({ "success": True, - "message": f"图谱已删除: {graph_id}" + "message": t("api.message.graph.m030", graph_id=graph_id) }) except Exception as e: diff --git a/backend/app/api/report.py b/backend/app/api/report.py index d7f2a4d0..92f47df2 100644 --- a/backend/app/api/report.py +++ b/backend/app/api/report.py @@ -172,7 +172,7 @@ def generate_report(): task_manager.fail_task(task_id, report.error or t('api.reportGenerateFailed')) except Exception as e: - logger.error(f"报告生成失败: {str(e)}") + logger.error(t("log.report_api.m001", str=str(e))) task_manager.fail_task(task_id, str(e)) # 启动后台线程 @@ -192,7 +192,7 @@ def generate_report(): }) except Exception as e: - logger.error(f"启动报告生成任务失败: {str(e)}") + logger.error(t("log.report_api.m002", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -265,7 +265,7 @@ def get_generate_status(): }) except Exception as e: - logger.error(f"查询任务状态失败: {str(e)}") + logger.error(t("log.report_api.m003", str=str(e))) return jsonify({ "success": False, "error": str(e) @@ -308,7 +308,7 @@ def get_report(report_id: str): }) except Exception as e: - logger.error(f"获取报告失败: {str(e)}") + logger.error(t("log.report_api.m004", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -347,7 +347,7 @@ def get_report_by_simulation(simulation_id: str): }) except Exception as e: - logger.error(f"获取报告失败: {str(e)}") + logger.error(t("log.report_api.m005", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -387,7 +387,7 @@ def list_reports(): }) except Exception as e: - logger.error(f"列出报告失败: {str(e)}") + logger.error(t("log.report_api.m006", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -433,7 +433,7 @@ def download_report(report_id: str): ) except Exception as e: - logger.error(f"下载报告失败: {str(e)}") + logger.error(t("log.report_api.m007", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -459,7 +459,7 @@ def delete_report(report_id: str): }) except Exception as e: - logger.error(f"删除报告失败: {str(e)}") + logger.error(t("log.report_api.m008", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -556,7 +556,7 @@ def chat_with_report_agent(): }) except Exception as e: - logger.error(f"对话失败: {str(e)}") + logger.error(t("log.report_api.m009", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -599,7 +599,7 @@ def get_report_progress(report_id: str): }) except Exception as e: - logger.error(f"获取报告进度失败: {str(e)}") + logger.error(t("log.report_api.m010", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -650,7 +650,7 @@ def get_report_sections(report_id: str): }) except Exception as e: - logger.error(f"获取章节列表失败: {str(e)}") + logger.error(t("log.report_api.m011", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -694,7 +694,7 @@ def get_single_section(report_id: str, section_index: int): }) except Exception as e: - logger.error(f"获取章节内容失败: {str(e)}") + logger.error(t("log.report_api.m012", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -745,7 +745,7 @@ def check_report_status(simulation_id: str): }) except Exception as e: - logger.error(f"检查报告状态失败: {str(e)}") + logger.error(t("log.report_api.m013", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -806,7 +806,7 @@ def get_agent_log(report_id: str): }) except Exception as e: - logger.error(f"获取Agent日志失败: {str(e)}") + logger.error(t("log.report_api.m014", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -840,7 +840,7 @@ def stream_agent_log(report_id: str): }) except Exception as e: - logger.error(f"获取Agent日志失败: {str(e)}") + logger.error(t("log.report_api.m015", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -888,7 +888,7 @@ def get_console_log(report_id: str): }) except Exception as e: - logger.error(f"获取控制台日志失败: {str(e)}") + logger.error(t("log.report_api.m016", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -922,7 +922,7 @@ def stream_console_log(report_id: str): }) except Exception as e: - logger.error(f"获取控制台日志失败: {str(e)}") + logger.error(t("log.report_api.m017", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -972,7 +972,7 @@ def search_graph_tool(): }) except Exception as e: - logger.error(f"图谱搜索失败: {str(e)}") + logger.error(t("log.report_api.m018", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -1012,7 +1012,7 @@ def get_graph_statistics_tool(): }) except Exception as e: - logger.error(f"获取图谱统计失败: {str(e)}") + logger.error(t("log.report_api.m019", str=str(e))) return jsonify({ "success": False, "error": str(e), diff --git a/backend/app/api/simulation.py b/backend/app/api/simulation.py index 77acc1a9..4cc3018e 100644 --- a/backend/app/api/simulation.py +++ b/backend/app/api/simulation.py @@ -15,6 +15,7 @@ from ..services.simulation_manager import SimulationManager, SimulationStatus from ..services.simulation_runner import SimulationRunner, RunnerStatus from ..utils.logger import get_logger from ..models.project import ProjectManager +from ..utils.locale import t logger = get_logger('mirofish.api.simulation') @@ -59,14 +60,14 @@ def get_graph_entities(graph_id: str): if not Config.NEO4J_PASSWORD: return jsonify({ "success": False, - "error": "NEO4J未配置" + "error": t("api.error.simulation.m001") }), 500 entity_types_str = request.args.get('entity_types', '') entity_types = [t.strip() for t in entity_types_str.split(',') if t.strip()] if entity_types_str else None enrich = request.args.get('enrich', 'true').lower() == 'true' - logger.info(f"获取图谱实体: graph_id={graph_id}, entity_types={entity_types}, enrich={enrich}") + logger.info(t("log.simulation_api.m002", graph_id=graph_id, entity_types=entity_types, enrich=enrich)) reader = ZepEntityReader() result = reader.filter_defined_entities( @@ -81,7 +82,7 @@ def get_graph_entities(graph_id: str): }) except Exception as e: - logger.error(f"获取图谱实体失败: {str(e)}") + logger.error(t("log.simulation_api.m003", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -96,7 +97,7 @@ def get_entity_detail(graph_id: str, entity_uuid: str): if not Config.NEO4J_PASSWORD: return jsonify({ "success": False, - "error": "NEO4J未配置" + "error": t("api.error.simulation.m004") }), 500 reader = ZepEntityReader() @@ -105,7 +106,7 @@ def get_entity_detail(graph_id: str, entity_uuid: str): if not entity: return jsonify({ "success": False, - "error": f"实体不存在: {entity_uuid}" + "error": t("api.error.simulation.m005", entity_uuid=entity_uuid) }), 404 return jsonify({ @@ -114,7 +115,7 @@ def get_entity_detail(graph_id: str, entity_uuid: str): }) except Exception as e: - logger.error(f"获取实体详情失败: {str(e)}") + logger.error(t("log.simulation_api.m006", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -129,7 +130,7 @@ def get_entities_by_type(graph_id: str, entity_type: str): if not Config.NEO4J_PASSWORD: return jsonify({ "success": False, - "error": "NEO4J未配置" + "error": t("api.error.simulation.m007") }), 500 enrich = request.args.get('enrich', 'true').lower() == 'true' @@ -151,7 +152,7 @@ def get_entities_by_type(graph_id: str, entity_type: str): }) except Exception as e: - logger.error(f"获取实体失败: {str(e)}") + logger.error(t("log.simulation_api.m008", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -197,21 +198,21 @@ def create_simulation(): if not project_id: return jsonify({ "success": False, - "error": "请提供 project_id" + "error": t("api.error.simulation.m009") }), 400 project = ProjectManager.get_project(project_id) if not project: return jsonify({ "success": False, - "error": f"项目不存在: {project_id}" + "error": t("api.error.simulation.m010", project_id=project_id) }), 404 graph_id = data.get('graph_id') or project.graph_id if not graph_id: return jsonify({ "success": False, - "error": "项目尚未构建图谱,请先调用 /api/graph/build" + "error": t("api.error.simulation.m011") }), 400 manager = SimulationManager() @@ -228,7 +229,7 @@ def create_simulation(): }) except Exception as e: - logger.error(f"创建模拟失败: {str(e)}") + logger.error(t("log.simulation_api.m012", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -297,7 +298,7 @@ def _check_simulation_prepared(simulation_id: str) -> tuple: config_generated = state_data.get("config_generated", False) # 详细日志 - logger.debug(f"检测模拟准备状态: {simulation_id}, status={status}, config_generated={config_generated}") + logger.debug(t("log.simulation_api.m013", simulation_id=simulation_id, status=status, config_generated=config_generated)) # 如果 config_generated=True 且文件存在,认为准备完成 # 以下状态都说明准备工作已完成: @@ -327,12 +328,12 @@ def _check_simulation_prepared(simulation_id: str) -> tuple: state_data["updated_at"] = datetime.now().isoformat() with open(state_file, 'w', encoding='utf-8') as f: json.dump(state_data, f, ensure_ascii=False, indent=2) - logger.info(f"自动更新模拟状态: {simulation_id} preparing -> ready") + logger.info(t("log.simulation_api.m014", simulation_id=simulation_id)) status = "ready" except Exception as e: - logger.warning(f"自动更新状态失败: {e}") + logger.warning(t("log.simulation_api.m015", e=e)) - logger.info(f"模拟 {simulation_id} 检测结果: 已准备完成 (status={status}, config_generated={config_generated})") + logger.info(t("log.simulation_api.m016", simulation_id=simulation_id, status=status, config_generated=config_generated)) return True, { "status": status, "entities_count": state_data.get("entities_count", 0), @@ -344,7 +345,7 @@ def _check_simulation_prepared(simulation_id: str) -> tuple: "existing_files": existing_files } else: - logger.warning(f"模拟 {simulation_id} 检测结果: 未准备完成 (status={status}, config_generated={config_generated})") + logger.warning(t("log.simulation_api.m017", simulation_id=simulation_id, status=status, config_generated=config_generated)) return False, { "reason": f"状态不在已准备列表中或config_generated为false: status={status}, config_generated={config_generated}", "status": status, @@ -408,7 +409,7 @@ def prepare_simulation(): if not simulation_id: return jsonify({ "success": False, - "error": "请提供 simulation_id" + "error": t("api.error.simulation.m018") }), 400 manager = SimulationManager() @@ -417,20 +418,20 @@ def prepare_simulation(): if not state: return jsonify({ "success": False, - "error": f"模拟不存在: {simulation_id}" + "error": t("api.error.simulation.m019", simulation_id=simulation_id) }), 404 # 检查是否强制重新生成 force_regenerate = data.get('force_regenerate', False) - logger.info(f"开始处理 /prepare 请求: simulation_id={simulation_id}, force_regenerate={force_regenerate}") + logger.info(t("log.simulation_api.m020", simulation_id=simulation_id, force_regenerate=force_regenerate)) # 检查是否已经准备完成(避免重复生成) if not force_regenerate: - logger.debug(f"检查模拟 {simulation_id} 是否已准备完成...") + logger.debug(t("log.simulation_api.m021", simulation_id=simulation_id)) is_prepared, prepare_info = _check_simulation_prepared(simulation_id) - logger.debug(f"检查结果: is_prepared={is_prepared}, prepare_info={prepare_info}") + logger.debug(t("log.simulation_api.m022", is_prepared=is_prepared, prepare_info=prepare_info)) if is_prepared: - logger.info(f"模拟 {simulation_id} 已准备完成,跳过重复生成") + logger.info(t("log.simulation_api.m023", simulation_id=simulation_id)) return jsonify({ "success": True, "data": { @@ -442,14 +443,14 @@ def prepare_simulation(): } }) else: - logger.info(f"模拟 {simulation_id} 未准备完成,将启动准备任务") + logger.info(t("log.simulation_api.m024", simulation_id=simulation_id)) # 从项目获取必要信息 project = ProjectManager.get_project(state.project_id) if not project: return jsonify({ "success": False, - "error": f"项目不存在: {state.project_id}" + "error": t("api.error.simulation.m025", state=state.project_id) }), 404 # 获取模拟需求 @@ -457,7 +458,7 @@ def prepare_simulation(): if not simulation_requirement: return jsonify({ "success": False, - "error": "项目缺少模拟需求描述 (simulation_requirement)" + "error": t("api.error.simulation.m026") }), 400 # 获取文档文本 @@ -470,7 +471,7 @@ def prepare_simulation(): # ========== 同步获取实体数量(在后台任务启动前) ========== # 这样前端在调用prepare后立即就能获取到预期Agent总数 try: - logger.info(f"同步获取实体数量: graph_id={state.graph_id}") + logger.info(t("log.simulation_api.m027", state=state.graph_id)) reader = ZepEntityReader() # 快速读取实体(不需要边信息,只统计数量) filtered_preview = reader.filter_defined_entities( @@ -481,9 +482,9 @@ def prepare_simulation(): # 保存实体数量到状态(供前端立即获取) state.entities_count = filtered_preview.filtered_count state.entity_types = list(filtered_preview.entity_types) - logger.info(f"预期实体数量: {filtered_preview.filtered_count}, 类型: {filtered_preview.entity_types}") + logger.info(t("log.simulation_api.m028", filtered_preview=filtered_preview.filtered_count, filtered_preview_2=filtered_preview.entity_types)) except Exception as e: - logger.warning(f"同步获取实体数量失败(将在后台任务中重试): {e}") + logger.warning(t("log.simulation_api.m029", e=e)) # 失败不影响后续流程,后台任务会重新获取 # 创建异步任务 @@ -592,7 +593,7 @@ def prepare_simulation(): ) except Exception as e: - logger.error(f"准备模拟失败: {str(e)}") + logger.error(t("log.simulation_api.m030", str=str(e))) task_manager.fail_task(task_id, str(e)) # 更新模拟状态为失败 @@ -626,7 +627,7 @@ def prepare_simulation(): }), 404 except Exception as e: - logger.error(f"启动准备任务失败: {str(e)}") + logger.error(t("log.simulation_api.m031", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -702,7 +703,7 @@ def get_prepare_status(): }) return jsonify({ "success": False, - "error": "请提供 task_id 或 simulation_id" + "error": t("api.error.simulation.m032") }), 400 task_manager = TaskManager() @@ -728,7 +729,7 @@ def get_prepare_status(): return jsonify({ "success": False, - "error": f"任务不存在: {task_id}" + "error": t("api.error.simulation.m033", task_id=task_id) }), 404 task_dict = task.to_dict() @@ -740,7 +741,7 @@ def get_prepare_status(): }) except Exception as e: - logger.error(f"查询任务状态失败: {str(e)}") + logger.error(t("log.simulation_api.m034", str=str(e))) return jsonify({ "success": False, "error": str(e) @@ -757,7 +758,7 @@ def get_simulation(simulation_id: str): if not state: return jsonify({ "success": False, - "error": f"模拟不存在: {simulation_id}" + "error": t("api.error.simulation.m035", simulation_id=simulation_id) }), 404 result = state.to_dict() @@ -772,7 +773,7 @@ def get_simulation(simulation_id: str): }) except Exception as e: - logger.error(f"获取模拟状态失败: {str(e)}") + logger.error(t("log.simulation_api.m036", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -801,7 +802,7 @@ def list_simulations(): }) except Exception as e: - logger.error(f"列出模拟失败: {str(e)}") + logger.error(t("log.simulation_api.m037", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -864,7 +865,7 @@ def _get_report_id_for_simulation(simulation_id: str) -> str: return matching_reports[0].get("report_id") except Exception as e: - logger.warning(f"查找 simulation {simulation_id} 的 report 失败: {e}") + logger.warning(t("log.simulation_api.m038", simulation_id=simulation_id, e=e)) return None @@ -974,7 +975,7 @@ def get_simulation_history(): }) except Exception as e: - logger.error(f"获取历史模拟失败: {str(e)}") + logger.error(t("log.simulation_api.m039", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -1012,7 +1013,7 @@ def get_simulation_profiles(simulation_id: str): }), 404 except Exception as e: - logger.error(f"获取Profile失败: {str(e)}") + logger.error(t("log.simulation_api.m040", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -1061,7 +1062,7 @@ def get_simulation_profiles_realtime(simulation_id: str): if not os.path.exists(sim_dir): return jsonify({ "success": False, - "error": f"模拟不存在: {simulation_id}" + "error": t("api.error.simulation.m041", simulation_id=simulation_id) }), 404 # 确定文件路径 @@ -1089,7 +1090,7 @@ def get_simulation_profiles_realtime(simulation_id: str): reader = csv.DictReader(f) profiles = list(reader) except (json.JSONDecodeError, Exception) as e: - logger.warning(f"读取 profiles 文件失败(可能正在写入中): {e}") + logger.warning(t("log.simulation_api.m042", e=e)) profiles = [] # 检查是否正在生成(通过 state.json 判断) @@ -1122,7 +1123,7 @@ def get_simulation_profiles_realtime(simulation_id: str): }) except Exception as e: - logger.error(f"实时获取Profile失败: {str(e)}") + logger.error(t("log.simulation_api.m043", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -1164,7 +1165,7 @@ def get_simulation_config_realtime(simulation_id: str): if not os.path.exists(sim_dir): return jsonify({ "success": False, - "error": f"模拟不存在: {simulation_id}" + "error": t("api.error.simulation.m044", simulation_id=simulation_id) }), 404 # 配置文件路径 @@ -1184,7 +1185,7 @@ def get_simulation_config_realtime(simulation_id: str): with open(config_file, 'r', encoding='utf-8') as f: config = json.load(f) except (json.JSONDecodeError, Exception) as e: - logger.warning(f"读取 config 文件失败(可能正在写入中): {e}") + logger.warning(t("log.simulation_api.m045", e=e)) config = None # 检查是否正在生成(通过 state.json 判断) @@ -1242,7 +1243,7 @@ def get_simulation_config_realtime(simulation_id: str): }) except Exception as e: - logger.error(f"实时获取Config失败: {str(e)}") + logger.error(t("log.simulation_api.m046", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -1269,7 +1270,7 @@ def get_simulation_config(simulation_id: str): if not config: return jsonify({ "success": False, - "error": f"模拟配置不存在,请先调用 /prepare 接口" + "error": t("api.error.simulation.m047") }), 404 return jsonify({ @@ -1278,7 +1279,7 @@ def get_simulation_config(simulation_id: str): }) except Exception as e: - logger.error(f"获取配置失败: {str(e)}") + logger.error(t("log.simulation_api.m048", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -1297,7 +1298,7 @@ def download_simulation_config(simulation_id: str): if not os.path.exists(config_path): return jsonify({ "success": False, - "error": "配置文件不存在,请先调用 /prepare 接口" + "error": t("api.error.simulation.m049") }), 404 return send_file( @@ -1307,7 +1308,7 @@ def download_simulation_config(simulation_id: str): ) except Exception as e: - logger.error(f"下载配置失败: {str(e)}") + logger.error(t("log.simulation_api.m050", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -1341,7 +1342,7 @@ def download_simulation_script(script_name: str): if script_name not in allowed_scripts: return jsonify({ "success": False, - "error": f"未知脚本: {script_name},可选: {allowed_scripts}" + "error": t("api.error.simulation.m051", script_name=script_name, allowed_scripts=allowed_scripts) }), 400 script_path = os.path.join(scripts_dir, script_name) @@ -1349,7 +1350,7 @@ def download_simulation_script(script_name: str): if not os.path.exists(script_path): return jsonify({ "success": False, - "error": f"脚本文件不存在: {script_name}" + "error": t("api.error.simulation.m052", script_name=script_name) }), 404 return send_file( @@ -1359,7 +1360,7 @@ def download_simulation_script(script_name: str): ) except Exception as e: - logger.error(f"下载脚本失败: {str(e)}") + logger.error(t("log.simulation_api.m053", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -1389,7 +1390,7 @@ def generate_profiles(): if not graph_id: return jsonify({ "success": False, - "error": "请提供 graph_id" + "error": t("api.error.simulation.m054") }), 400 entity_types = data.get('entity_types') @@ -1406,7 +1407,7 @@ def generate_profiles(): if filtered.filtered_count == 0: return jsonify({ "success": False, - "error": "没有找到符合条件的实体" + "error": t("api.error.simulation.m055") }), 400 generator = OasisProfileGenerator() @@ -1433,7 +1434,7 @@ def generate_profiles(): }) except Exception as e: - logger.error(f"生成Profile失败: {str(e)}") + logger.error(t("log.simulation_api.m056", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -1491,7 +1492,7 @@ def start_simulation(): if not simulation_id: return jsonify({ "success": False, - "error": "请提供 simulation_id" + "error": t("api.error.simulation.m057") }), 400 platform = data.get('platform', 'parallel') @@ -1506,18 +1507,18 @@ def start_simulation(): if max_rounds <= 0: return jsonify({ "success": False, - "error": "max_rounds 必须是正整数" + "error": t("api.error.simulation.m058") }), 400 except (ValueError, TypeError): return jsonify({ "success": False, - "error": "max_rounds 必须是有效的整数" + "error": t("api.error.simulation.m059") }), 400 if platform not in ['twitter', 'reddit', 'parallel']: return jsonify({ "success": False, - "error": f"无效的平台类型: {platform},可选: twitter/reddit/parallel" + "error": t("api.error.simulation.m060", platform=platform) }), 400 # 检查模拟是否已准备好 @@ -1527,7 +1528,7 @@ def start_simulation(): if not state: return jsonify({ "success": False, - "error": f"模拟不存在: {simulation_id}" + "error": t("api.error.simulation.m061", simulation_id=simulation_id) }), 404 force_restarted = False @@ -1546,34 +1547,34 @@ def start_simulation(): # 进程确实在运行 if force: # 强制模式:停止运行中的模拟 - logger.info(f"强制模式:停止运行中的模拟 {simulation_id}") + logger.info(t("log.simulation_api.m062", simulation_id=simulation_id)) try: SimulationRunner.stop_simulation(simulation_id) except Exception as e: - logger.warning(f"停止模拟时出现警告: {str(e)}") + logger.warning(t("log.simulation_api.m063", str=str(e))) else: return jsonify({ "success": False, - "error": f"模拟正在运行中,请先调用 /stop 接口停止,或使用 force=true 强制重新开始" + "error": t("api.error.simulation.m064") }), 400 # 如果是强制模式,清理运行日志 if force: - logger.info(f"强制模式:清理模拟日志 {simulation_id}") + logger.info(t("log.simulation_api.m065", simulation_id=simulation_id)) cleanup_result = SimulationRunner.cleanup_simulation_logs(simulation_id) if not cleanup_result.get("success"): - logger.warning(f"清理日志时出现警告: {cleanup_result.get('errors')}") + logger.warning(t("log.simulation_api.m066", cleanup_result=cleanup_result.get('errors'))) force_restarted = True # 进程不存在或已结束,重置状态为 ready - logger.info(f"模拟 {simulation_id} 准备工作已完成,重置状态为 ready(原状态: {state.status.value})") + logger.info(t("log.simulation_api.m067", simulation_id=simulation_id, state=state.status.value)) state.status = SimulationStatus.READY manager._save_simulation_state(state) else: # 准备工作未完成 return jsonify({ "success": False, - "error": f"模拟未准备好,当前状态: {state.status.value},请先调用 /prepare 接口" + "error": t("api.error.simulation.m068", state=state.status.value) }), 400 # 获取图谱ID(用于图谱记忆更新) @@ -1590,10 +1591,10 @@ def start_simulation(): if not graph_id: return jsonify({ "success": False, - "error": "启用图谱记忆更新需要有效的 graph_id,请确保项目已构建图谱" + "error": t("api.error.simulation.m069") }), 400 - logger.info(f"启用图谱记忆更新: simulation_id={simulation_id}, graph_id={graph_id}") + logger.info(t("log.simulation_api.m070", simulation_id=simulation_id, graph_id=graph_id)) # 启动模拟 run_state = SimulationRunner.start_simulation( @@ -1628,7 +1629,7 @@ def start_simulation(): }), 400 except Exception as e: - logger.error(f"启动模拟失败: {str(e)}") + logger.error(t("log.simulation_api.m071", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -1663,7 +1664,7 @@ def stop_simulation(): if not simulation_id: return jsonify({ "success": False, - "error": "请提供 simulation_id" + "error": t("api.error.simulation.m072") }), 400 run_state = SimulationRunner.stop_simulation(simulation_id) @@ -1687,7 +1688,7 @@ def stop_simulation(): }), 400 except Exception as e: - logger.error(f"停止模拟失败: {str(e)}") + logger.error(t("log.simulation_api.m073", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -1747,7 +1748,7 @@ def get_run_status(simulation_id: str): }) except Exception as e: - logger.error(f"获取运行状态失败: {str(e)}") + logger.error(t("log.simulation_api.m074", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -1848,7 +1849,7 @@ def get_run_status_detail(simulation_id: str): }) except Exception as e: - logger.error(f"获取详细状态失败: {str(e)}") + logger.error(t("log.simulation_api.m075", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -1902,7 +1903,7 @@ def get_simulation_actions(simulation_id: str): }) except Exception as e: - logger.error(f"获取动作历史失败: {str(e)}") + logger.error(t("log.simulation_api.m076", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -1942,7 +1943,7 @@ def get_simulation_timeline(simulation_id: str): }) except Exception as e: - logger.error(f"获取时间线失败: {str(e)}") + logger.error(t("log.simulation_api.m077", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -1969,7 +1970,7 @@ def get_agent_stats(simulation_id: str): }) except Exception as e: - logger.error(f"获取Agent统计失败: {str(e)}") + logger.error(t("log.simulation_api.m078", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -2049,7 +2050,7 @@ def get_simulation_posts(simulation_id: str): }) except Exception as e: - logger.error(f"获取帖子失败: {str(e)}") + logger.error(t("log.simulation_api.m079", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -2124,7 +2125,7 @@ def get_simulation_comments(simulation_id: str): }) except Exception as e: - logger.error(f"获取评论失败: {str(e)}") + logger.error(t("log.simulation_api.m080", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -2197,33 +2198,33 @@ def interview_agent(): if not simulation_id: return jsonify({ "success": False, - "error": "请提供 simulation_id" + "error": t("api.error.simulation.m081") }), 400 if agent_id is None: return jsonify({ "success": False, - "error": "请提供 agent_id" + "error": t("api.error.simulation.m082") }), 400 if not prompt: return jsonify({ "success": False, - "error": "请提供 prompt(采访问题)" + "error": t("api.error.simulation.m083") }), 400 # 验证platform参数 if platform and platform not in ("twitter", "reddit"): return jsonify({ "success": False, - "error": "platform 参数只能是 'twitter' 或 'reddit'" + "error": t("api.error.simulation.m084") }), 400 # 检查环境状态 if not SimulationRunner.check_env_alive(simulation_id): return jsonify({ "success": False, - "error": "模拟环境未运行或已关闭。请确保模拟已完成并进入等待命令模式。" + "error": t("api.error.simulation.m085") }), 400 # 优化prompt,添加前缀避免Agent调用工具 @@ -2251,11 +2252,11 @@ def interview_agent(): except TimeoutError as e: return jsonify({ "success": False, - "error": f"等待Interview响应超时: {str(e)}" + "error": t("api.error.simulation.m086", str=str(e)) }), 504 except Exception as e: - logger.error(f"Interview失败: {str(e)}") + logger.error(t("log.simulation_api.m087", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -2318,20 +2319,20 @@ def interview_agents_batch(): if not simulation_id: return jsonify({ "success": False, - "error": "请提供 simulation_id" + "error": t("api.error.simulation.m088") }), 400 if not interviews or not isinstance(interviews, list): return jsonify({ "success": False, - "error": "请提供 interviews(采访列表)" + "error": t("api.error.simulation.m089") }), 400 # 验证platform参数 if platform and platform not in ("twitter", "reddit"): return jsonify({ "success": False, - "error": "platform 参数只能是 'twitter' 或 'reddit'" + "error": t("api.error.simulation.m090") }), 400 # 验证每个采访项 @@ -2339,26 +2340,26 @@ def interview_agents_batch(): if 'agent_id' not in interview: return jsonify({ "success": False, - "error": f"采访列表第{i+1}项缺少 agent_id" + "error": t("api.error.simulation.m091", i=i + 1) }), 400 if 'prompt' not in interview: return jsonify({ "success": False, - "error": f"采访列表第{i+1}项缺少 prompt" + "error": t("api.error.simulation.m092", i=i + 1) }), 400 # 验证每项的platform(如果有) item_platform = interview.get('platform') if item_platform and item_platform not in ("twitter", "reddit"): return jsonify({ "success": False, - "error": f"采访列表第{i+1}项的platform只能是 'twitter' 或 'reddit'" + "error": t("api.error.simulation.m093", i=i + 1) }), 400 # 检查环境状态 if not SimulationRunner.check_env_alive(simulation_id): return jsonify({ "success": False, - "error": "模拟环境未运行或已关闭。请确保模拟已完成并进入等待命令模式。" + "error": t("api.error.simulation.m094") }), 400 # 优化每个采访项的prompt,添加前缀避免Agent调用工具 @@ -2389,11 +2390,11 @@ def interview_agents_batch(): except TimeoutError as e: return jsonify({ "success": False, - "error": f"等待批量Interview响应超时: {str(e)}" + "error": t("api.error.simulation.m095", str=str(e)) }), 504 except Exception as e: - logger.error(f"批量Interview失败: {str(e)}") + logger.error(t("log.simulation_api.m096", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -2445,27 +2446,27 @@ def interview_all_agents(): if not simulation_id: return jsonify({ "success": False, - "error": "请提供 simulation_id" + "error": t("api.error.simulation.m097") }), 400 if not prompt: return jsonify({ "success": False, - "error": "请提供 prompt(采访问题)" + "error": t("api.error.simulation.m098") }), 400 # 验证platform参数 if platform and platform not in ("twitter", "reddit"): return jsonify({ "success": False, - "error": "platform 参数只能是 'twitter' 或 'reddit'" + "error": t("api.error.simulation.m099") }), 400 # 检查环境状态 if not SimulationRunner.check_env_alive(simulation_id): return jsonify({ "success": False, - "error": "模拟环境未运行或已关闭。请确保模拟已完成并进入等待命令模式。" + "error": t("api.error.simulation.m100") }), 400 # 优化prompt,添加前缀避免Agent调用工具 @@ -2492,11 +2493,11 @@ def interview_all_agents(): except TimeoutError as e: return jsonify({ "success": False, - "error": f"等待全局Interview响应超时: {str(e)}" + "error": t("api.error.simulation.m101", str=str(e)) }), 504 except Exception as e: - logger.error(f"全局Interview失败: {str(e)}") + logger.error(t("log.simulation_api.m102", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -2549,7 +2550,7 @@ def get_interview_history(): if not simulation_id: return jsonify({ "success": False, - "error": "请提供 simulation_id" + "error": t("api.error.simulation.m103") }), 400 history = SimulationRunner.get_interview_history( @@ -2568,7 +2569,7 @@ def get_interview_history(): }) except Exception as e: - logger.error(f"获取Interview历史失败: {str(e)}") + logger.error(t("log.simulation_api.m104", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -2608,7 +2609,7 @@ def get_env_status(): if not simulation_id: return jsonify({ "success": False, - "error": "请提供 simulation_id" + "error": t("api.error.simulation.m105") }), 400 env_alive = SimulationRunner.check_env_alive(simulation_id) @@ -2633,7 +2634,7 @@ def get_env_status(): }) except Exception as e: - logger.error(f"获取环境状态失败: {str(e)}") + logger.error(t("log.simulation_api.m106", str=str(e))) return jsonify({ "success": False, "error": str(e), @@ -2676,7 +2677,7 @@ def close_simulation_env(): if not simulation_id: return jsonify({ "success": False, - "error": "请提供 simulation_id" + "error": t("api.error.simulation.m107") }), 400 result = SimulationRunner.close_simulation_env( @@ -2703,7 +2704,7 @@ def close_simulation_env(): }), 400 except Exception as e: - logger.error(f"关闭环境失败: {str(e)}") + logger.error(t("log.simulation_api.m108", str=str(e))) return jsonify({ "success": False, "error": str(e), diff --git a/backend/app/services/oasis_profile_generator.py b/backend/app/services/oasis_profile_generator.py index f822be4e..1cf9158a 100644 --- a/backend/app/services/oasis_profile_generator.py +++ b/backend/app/services/oasis_profile_generator.py @@ -303,7 +303,7 @@ class OasisProfileGenerator: # 必须有graph_id才能进行搜索 if not self.graph_id: - logger.debug(f"跳过Zep检索:未设置graph_id") + logger.debug(t("log.profile_generator.m001")) return results comprehensive_query = t('progress.zepSearchQuery', name=entity_name) @@ -325,11 +325,11 @@ class OasisProfileGenerator: except Exception as e: last_exception = e if attempt < max_retries - 1: - logger.debug(f"Zep边搜索第 {attempt + 1} 次失败: {str(e)[:80]}, 重试中...") + logger.debug(t("log.profile_generator.m002", attempt=attempt + 1, str=str(e)[:80])) time.sleep(delay) delay *= 2 else: - logger.debug(f"Zep边搜索在 {max_retries} 次尝试后仍失败: {e}") + logger.debug(t("log.profile_generator.m003", max_retries=max_retries, e=e)) return None def search_nodes(): @@ -349,11 +349,11 @@ class OasisProfileGenerator: except Exception as e: last_exception = e if attempt < max_retries - 1: - logger.debug(f"Zep节点搜索第 {attempt + 1} 次失败: {str(e)[:80]}, 重试中...") + logger.debug(t("log.profile_generator.m004", attempt=attempt + 1, str=str(e)[:80])) time.sleep(delay) delay *= 2 else: - logger.debug(f"Zep节点搜索在 {max_retries} 次尝试后仍失败: {e}") + logger.debug(t("log.profile_generator.m005", max_retries=max_retries, e=e)) return None try: @@ -392,12 +392,12 @@ class OasisProfileGenerator: context_parts.append("相关实体:\n" + "\n".join(f"- {s}" for s in results["node_summaries"][:10])) results["context"] = "\n\n".join(context_parts) - logger.info(f"Zep混合检索完成: {entity_name}, 获取 {len(results['facts'])} 条事实, {len(results['node_summaries'])} 个相关节点") + logger.info(t("log.profile_generator.m006", entity_name=entity_name, len=len(results['facts']), len_2=len(results['node_summaries']))) except concurrent.futures.TimeoutError: - logger.warning(f"Zep检索超时 ({entity_name})") + logger.warning(t("log.profile_generator.m007", entity_name=entity_name)) except Exception as e: - logger.warning(f"Zep检索失败 ({entity_name}): {e}") + logger.warning(t("log.profile_generator.m008", entity_name=entity_name, e=e)) return results @@ -533,7 +533,7 @@ class OasisProfileGenerator: # 检查是否被截断(finish_reason不是'stop') finish_reason = response.choices[0].finish_reason if finish_reason == 'length': - logger.warning(f"LLM输出被截断 (attempt {attempt+1}), 尝试修复...") + logger.warning(t("log.profile_generator.m009", attempt=attempt + 1)) content = self._fix_truncated_json(content) # 尝试解析JSON @@ -549,7 +549,7 @@ class OasisProfileGenerator: return result except json.JSONDecodeError as je: - logger.warning(f"JSON解析失败 (attempt {attempt+1}): {str(je)[:80]}") + logger.warning(t("log.profile_generator.m010", attempt=attempt + 1, str=str(je)[:80])) # 尝试修复JSON result = self._try_fix_json(content, entity_name, entity_type, entity_summary) @@ -560,12 +560,12 @@ class OasisProfileGenerator: last_error = je except Exception as e: - logger.warning(f"LLM调用失败 (attempt {attempt+1}): {str(e)[:80]}") + logger.warning(t("log.profile_generator.m011", attempt=attempt + 1, str=str(e)[:80])) last_error = e import time time.sleep(1 * (attempt + 1)) # 指数退避 - logger.warning(f"LLM生成人设失败({max_attempts}次尝试): {last_error}, 使用规则生成") + logger.warning(t("log.profile_generator.m012", max_attempts=max_attempts, last_error=last_error)) return self._generate_profile_rule_based( entity_name, entity_type, entity_summary, entity_attributes ) @@ -645,7 +645,7 @@ class OasisProfileGenerator: # 如果提取到了有意义的内容,标记为已修复 if bio_match or persona_match: - logger.info(f"从损坏的JSON中提取了部分信息") + logger.info(t("log.profile_generator.m013")) return { "bio": bio, "persona": persona, @@ -653,7 +653,7 @@ class OasisProfileGenerator: } # 7. 完全失败,返回基础结构 - logger.warning(f"JSON修复失败,返回基础结构") + logger.warning(t("log.profile_generator.m014")) return { "bio": entity_summary[:200] if entity_summary else f"{entity_type}: {entity_name}", "persona": entity_summary or f"{entity_name}是一个{entity_type}。" @@ -904,7 +904,7 @@ class OasisProfileGenerator: writer.writeheader() writer.writerows(profiles_data) except Exception as e: - logger.warning(f"实时保存 profiles 失败: {e}") + logger.warning(t("log.profile_generator.m015", e=e)) # Capture locale before spawning thread pool workers current_locale = get_locale() @@ -927,7 +927,7 @@ class OasisProfileGenerator: return idx, profile, None except Exception as e: - logger.error(f"生成实体 {entity.name} 的人设失败: {str(e)}") + logger.error(t("log.profile_generator.m016", entity=entity.name, str=str(e))) # 创建一个基础profile fallback_profile = OasisAgentProfile( user_id=idx, @@ -940,7 +940,7 @@ class OasisProfileGenerator: ) return idx, fallback_profile, str(e) - logger.info(f"开始并行生成 {total} 个Agent人设(并行数: {parallel_count})...") + logger.info(t("log.profile_generator.m017", total=total, parallel_count=parallel_count)) print(f"\n{'='*60}") print(f"开始生成Agent人设 - 共 {total} 个实体,并行数: {parallel_count}") print(f"{'='*60}\n") @@ -977,12 +977,12 @@ class OasisProfileGenerator: ) if error: - logger.warning(f"[{current}/{total}] {entity.name} 使用备用人设: {error}") + logger.warning(t("log.profile_generator.m018", current=current, total=total, entity=entity.name, error=error)) else: - logger.info(f"[{current}/{total}] 成功生成人设: {entity.name} ({entity_type})") + logger.info(t("log.profile_generator.m019", current=current, total=total, entity=entity.name, entity_type=entity_type)) except Exception as e: - logger.error(f"处理实体 {entity.name} 时发生异常: {str(e)}") + logger.error(t("log.profile_generator.m020", entity=entity.name, str=str(e))) with lock: completed_count[0] += 1 profiles[idx] = OasisAgentProfile( @@ -1106,7 +1106,7 @@ class OasisProfileGenerator: ] writer.writerow(row) - logger.info(f"已保存 {len(profiles)} 个Twitter Profile到 {file_path} (OASIS CSV格式)") + logger.info(t("log.profile_generator.m021", len=len(profiles), file_path=file_path)) def _normalize_gender(self, gender: Optional[str]) -> str: """ @@ -1180,7 +1180,7 @@ class OasisProfileGenerator: with open(file_path, 'w', encoding='utf-8') as f: json.dump(data, f, ensure_ascii=False, indent=2) - logger.info(f"已保存 {len(profiles)} 个Reddit Profile到 {file_path} (JSON格式,包含user_id字段)") + logger.info(t("log.profile_generator.m022", len=len(profiles), file_path=file_path)) # 保留旧方法名作为别名,保持向后兼容 def save_profiles_to_json( @@ -1190,6 +1190,6 @@ class OasisProfileGenerator: platform: str = "reddit" ): """[已废弃] 请使用 save_profiles() 方法""" - logger.warning("save_profiles_to_json已废弃,请使用save_profiles方法") + logger.warning(t("log.profile_generator.m023")) self.save_profiles(profiles, file_path, platform) diff --git a/backend/app/services/report_agent.py b/backend/app/services/report_agent.py index 13b760b3..ddba4e9d 100644 --- a/backend/app/services/report_agent.py +++ b/backend/app/services/report_agent.py @@ -1319,7 +1319,7 @@ class ReportAgent: # 最后一次迭代也返回 None,跳出循环进入强制收尾 break - logger.debug(f"LLM响应: {response[:200]}...") + logger.debug(t("log.report_agent.m001", response=response[:200])) # 解析一次,复用结果 tool_calls = self._parse_tool_calls(response) diff --git a/backend/app/services/simulation_config_generator.py b/backend/app/services/simulation_config_generator.py index cb77f6b6..e03cd425 100644 --- a/backend/app/services/simulation_config_generator.py +++ b/backend/app/services/simulation_config_generator.py @@ -269,7 +269,7 @@ class SimulationConfigGenerator: Returns: SimulationParameters: 完整的模拟参数 """ - logger.info(f"开始智能生成模拟配置: simulation_id={simulation_id}, 实体数={len(entities)}") + logger.info(t("log.simulation_config.m001", simulation_id=simulation_id, len=len(entities))) # 计算总步骤数 num_batches = math.ceil(len(entities) / self.AGENTS_PER_BATCH) @@ -328,7 +328,7 @@ class SimulationConfigGenerator: reasoning_parts.append(t('progress.agentConfigResult', count=len(all_agent_configs))) # ========== 为初始帖子分配发布者 Agent ========== - logger.info("为初始帖子分配合适的发布者 Agent...") + logger.info(t("log.simulation_config.m002")) event_config = self._assign_initial_post_agents(event_config, all_agent_configs) assigned_count = len([p for p in event_config.initial_posts if p.get("poster_agent_id") is not None]) reasoning_parts.append(t('progress.postAssignResult', count=assigned_count)) @@ -374,7 +374,7 @@ class SimulationConfigGenerator: generation_reasoning=" | ".join(reasoning_parts) ) - logger.info(f"模拟配置生成完成: {len(params.agent_configs)} 个Agent配置") + logger.info(t("log.simulation_config.m003", len=len(params.agent_configs))) return params @@ -456,14 +456,14 @@ class SimulationConfigGenerator: # 检查是否被截断 if finish_reason == 'length': - logger.warning(f"LLM输出被截断 (attempt {attempt+1})") + logger.warning(t("log.simulation_config.m004", attempt=attempt + 1)) content = self._fix_truncated_json(content) # 尝试解析JSON try: return json.loads(content) except json.JSONDecodeError as e: - logger.warning(f"JSON解析失败 (attempt {attempt+1}): {str(e)[:80]}") + logger.warning(t("log.simulation_config.m005", attempt=attempt + 1, str=str(e)[:80])) # 尝试修复JSON fixed = self._try_fix_config_json(content) @@ -473,7 +473,7 @@ class SimulationConfigGenerator: last_error = e except Exception as e: - logger.warning(f"LLM调用失败 (attempt {attempt+1}): {str(e)[:80]}") + logger.warning(t("log.simulation_config.m006", attempt=attempt + 1, str=str(e)[:80])) last_error = e import time time.sleep(2 * (attempt + 1)) @@ -591,7 +591,7 @@ class SimulationConfigGenerator: try: return self._call_llm_with_retry(prompt, system_prompt) except Exception as e: - logger.warning(f"时间配置LLM生成失败: {e}, 使用默认配置") + logger.warning(t("log.simulation_config.m007", e=e)) return self._get_default_time_config(num_entities) def _get_default_time_config(self, num_entities: int) -> Dict[str, Any]: @@ -616,17 +616,17 @@ class SimulationConfigGenerator: # 验证并修正:确保不超过总agent数 if agents_per_hour_min > num_entities: - logger.warning(f"agents_per_hour_min ({agents_per_hour_min}) 超过总Agent数 ({num_entities}),已修正") + logger.warning(t("log.simulation_config.m008", agents_per_hour_min=agents_per_hour_min, num_entities=num_entities)) agents_per_hour_min = max(1, num_entities // 10) if agents_per_hour_max > num_entities: - logger.warning(f"agents_per_hour_max ({agents_per_hour_max}) 超过总Agent数 ({num_entities}),已修正") + logger.warning(t("log.simulation_config.m009", agents_per_hour_max=agents_per_hour_max, num_entities=num_entities)) agents_per_hour_max = max(agents_per_hour_min + 1, num_entities // 2) # 确保 min < max if agents_per_hour_min >= agents_per_hour_max: agents_per_hour_min = max(1, agents_per_hour_max // 2) - logger.warning(f"agents_per_hour_min >= max,已修正为 {agents_per_hour_min}") + logger.warning(t("log.simulation_config.m010", agents_per_hour_min=agents_per_hour_min)) return TimeSimulationConfig( total_simulation_hours=result.get("total_simulation_hours", 72), @@ -708,7 +708,7 @@ class SimulationConfigGenerator: try: return self._call_llm_with_retry(prompt, system_prompt) except Exception as e: - logger.warning(f"事件配置LLM生成失败: {e}, 使用默认配置") + logger.warning(t("log.simulation_config.m011", e=e)) return { "hot_topics": [], "narrative_direction": "", @@ -791,7 +791,7 @@ class SimulationConfigGenerator: # 3. 如果仍未找到,使用影响力最高的 agent if matched_agent_id is None: - logger.warning(f"未找到类型 '{poster_type}' 的匹配 Agent,使用影响力最高的 Agent") + logger.warning(t("log.simulation_config.m012", poster_type=poster_type)) if agent_configs: # 按影响力排序,选择影响力最高的 sorted_agents = sorted(agent_configs, key=lambda a: a.influence_weight, reverse=True) @@ -805,7 +805,7 @@ class SimulationConfigGenerator: "poster_agent_id": matched_agent_id }) - logger.info(f"初始帖子分配: poster_type='{poster_type}' -> agent_id={matched_agent_id}") + logger.info(t("log.simulation_config.m013", poster_type=poster_type, matched_agent_id=matched_agent_id)) event_config.initial_posts = updated_posts return event_config @@ -873,7 +873,7 @@ class SimulationConfigGenerator: result = self._call_llm_with_retry(prompt, system_prompt) llm_configs = {cfg["agent_id"]: cfg for cfg in result.get("agent_configs", [])} except Exception as e: - logger.warning(f"Agent配置批次LLM生成失败: {e}, 使用规则生成") + logger.warning(t("log.simulation_config.m014", e=e)) llm_configs = {} # 构建AgentActivityConfig对象 diff --git a/backend/app/services/simulation_ipc.py b/backend/app/services/simulation_ipc.py index 9d70d0be..be2eac32 100644 --- a/backend/app/services/simulation_ipc.py +++ b/backend/app/services/simulation_ipc.py @@ -18,6 +18,7 @@ from datetime import datetime from enum import Enum from ..utils.logger import get_logger +from ..utils.locale import t logger = get_logger('mirofish.simulation_ipc') @@ -148,7 +149,7 @@ class SimulationIPCClient: with open(command_file, 'w', encoding='utf-8') as f: json.dump(command.to_dict(), f, ensure_ascii=False, indent=2) - logger.info(f"发送IPC命令: {command_type.value}, command_id={command_id}") + logger.info(t("log.simulation_ipc.m001", command_type=command_type.value, command_id=command_id)) # 等待响应 response_file = os.path.join(self.responses_dir, f"{command_id}.json") @@ -168,15 +169,15 @@ class SimulationIPCClient: except OSError: pass - logger.info(f"收到IPC响应: command_id={command_id}, status={response.status.value}") + logger.info(t("log.simulation_ipc.m002", command_id=command_id, response=response.status.value)) return response except (json.JSONDecodeError, KeyError) as e: - logger.warning(f"解析响应失败: {e}") + logger.warning(t("log.simulation_ipc.m003", e=e)) time.sleep(poll_interval) # 超时 - logger.error(f"等待IPC响应超时: command_id={command_id}") + logger.error(t("log.simulation_ipc.m004", command_id=command_id)) # 清理命令文件 try: @@ -354,7 +355,7 @@ class SimulationIPCServer: data = json.load(f) return IPCCommand.from_dict(data) except (json.JSONDecodeError, KeyError, OSError) as e: - logger.warning(f"读取命令文件失败: {filepath}, {e}") + logger.warning(t("log.simulation_ipc.m005", filepath=filepath, e=e)) continue return None diff --git a/backend/app/services/simulation_manager.py b/backend/app/services/simulation_manager.py index 0d161a90..2f297e2c 100644 --- a/backend/app/services/simulation_manager.py +++ b/backend/app/services/simulation_manager.py @@ -223,7 +223,7 @@ class SimulationManager: ) self._save_simulation_state(state) - logger.info(f"创建模拟: {simulation_id}, project={project_id}, graph={graph_id}") + logger.info(t("log.simulation_manager.m001", simulation_id=simulation_id, project_id=project_id, graph_id=graph_id)) return state @@ -442,13 +442,12 @@ class SimulationManager: state.status = SimulationStatus.READY self._save_simulation_state(state) - logger.info(f"模拟准备完成: {simulation_id}, " - f"entities={state.entities_count}, profiles={state.profiles_count}") + logger.info(t("log.simulation_manager.m002", simulation_id=simulation_id, state=state.entities_count, state_2=state.profiles_count)) return state except Exception as e: - logger.error(f"模拟准备失败: {simulation_id}, error={str(e)}") + logger.error(t("log.simulation_manager.m003", simulation_id=simulation_id, str=str(e))) import traceback logger.error(traceback.format_exc()) state.status = SimulationStatus.FAILED diff --git a/backend/app/services/simulation_runner.py b/backend/app/services/simulation_runner.py index e86021f8..3afd2278 100644 --- a/backend/app/services/simulation_runner.py +++ b/backend/app/services/simulation_runner.py @@ -20,7 +20,7 @@ from queue import Queue from ..config import Config from ..utils.logger import get_logger -from ..utils.locale import get_locale, set_locale +from ..utils.locale import get_locale, set_locale, t from .zep_graph_memory_updater import ZepGraphMemoryManager from .simulation_ipc import SimulationIPCClient, CommandType, IPCResponse @@ -292,7 +292,7 @@ class SimulationRunner: return state except Exception as e: - logger.error(f"加载运行状态失败: {str(e)}") + logger.error(t("log.simulation_runner.m001", str=str(e))) return None @classmethod @@ -357,7 +357,7 @@ class SimulationRunner: original_rounds = total_rounds total_rounds = min(total_rounds, max_rounds) if total_rounds < original_rounds: - logger.info(f"轮数已截断: {original_rounds} -> {total_rounds} (max_rounds={max_rounds})") + logger.info(t("log.simulation_runner.m002", original_rounds=original_rounds, total_rounds=total_rounds, max_rounds=max_rounds)) state = SimulationRunState( simulation_id=simulation_id, @@ -377,9 +377,9 @@ class SimulationRunner: try: ZepGraphMemoryManager.create_updater(simulation_id, graph_id) cls._graph_memory_enabled[simulation_id] = True - logger.info(f"已启用图谱记忆更新: simulation_id={simulation_id}, graph_id={graph_id}") + logger.info(t("log.simulation_runner.m003", simulation_id=simulation_id, graph_id=graph_id)) except Exception as e: - logger.error(f"创建图谱记忆更新器失败: {e}") + logger.error(t("log.simulation_runner.m004", e=e)) cls._graph_memory_enabled[simulation_id] = False else: cls._graph_memory_enabled[simulation_id] = False @@ -468,7 +468,7 @@ class SimulationRunner: monitor_thread.start() cls._monitor_threads[simulation_id] = monitor_thread - logger.info(f"模拟启动成功: {simulation_id}, pid={process.pid}, platform={platform}") + logger.info(t("log.simulation_runner.m005", simulation_id=simulation_id, process=process.pid, platform=platform)) except Exception as e: state.runner_status = RunnerStatus.FAILED @@ -527,7 +527,7 @@ class SimulationRunner: if exit_code == 0: state.runner_status = RunnerStatus.COMPLETED state.completed_at = datetime.now().isoformat() - logger.info(f"模拟完成: {simulation_id}") + logger.info(t("log.simulation_runner.m006", simulation_id=simulation_id)) else: state.runner_status = RunnerStatus.FAILED # 从主日志文件读取错误信息 @@ -540,14 +540,14 @@ class SimulationRunner: except Exception: pass state.error = f"进程退出码: {exit_code}, 错误: {error_info}" - logger.error(f"模拟失败: {simulation_id}, error={state.error}") + logger.error(t("log.simulation_runner.m007", simulation_id=simulation_id, state=state.error)) state.twitter_running = False state.reddit_running = False cls._save_run_state(state) except Exception as e: - logger.error(f"监控线程异常: {simulation_id}, error={str(e)}") + logger.error(t("log.simulation_runner.m008", simulation_id=simulation_id, str=str(e))) state.runner_status = RunnerStatus.FAILED state.error = str(e) cls._save_run_state(state) @@ -557,9 +557,9 @@ class SimulationRunner: if cls._graph_memory_enabled.get(simulation_id, False): try: ZepGraphMemoryManager.stop_updater(simulation_id) - logger.info(f"已停止图谱记忆更新: simulation_id={simulation_id}") + logger.info(t("log.simulation_runner.m009", simulation_id=simulation_id)) except Exception as e: - logger.error(f"停止图谱记忆更新器失败: {e}") + logger.error(t("log.simulation_runner.m010", e=e)) cls._graph_memory_enabled.pop(simulation_id, None) # 清理进程资源 @@ -624,11 +624,11 @@ class SimulationRunner: if platform == "twitter": state.twitter_completed = True state.twitter_running = False - logger.info(f"Twitter 模拟已完成: {state.simulation_id}, total_rounds={action_data.get('total_rounds')}, total_actions={action_data.get('total_actions')}") + logger.info(t("log.simulation_runner.m011", state=state.simulation_id, action_data=action_data.get('total_rounds'), action_data_2=action_data.get('total_actions'))) elif platform == "reddit": state.reddit_completed = True state.reddit_running = False - logger.info(f"Reddit 模拟已完成: {state.simulation_id}, total_rounds={action_data.get('total_rounds')}, total_actions={action_data.get('total_actions')}") + logger.info(t("log.simulation_runner.m012", state=state.simulation_id, action_data=action_data.get('total_rounds'), action_data_2=action_data.get('total_actions'))) # 检查是否所有启用的平台都已完成 # 如果只运行了一个平台,只检查那个平台 @@ -637,7 +637,7 @@ class SimulationRunner: if all_completed: state.runner_status = RunnerStatus.COMPLETED state.completed_at = datetime.now().isoformat() - logger.info(f"所有平台模拟已完成: {state.simulation_id}") + logger.info(t("log.simulation_runner.m013", state=state.simulation_id)) # 更新轮次信息(从 round_end 事件) elif event_type == "round_end": @@ -687,7 +687,7 @@ class SimulationRunner: pass return f.tell() except Exception as e: - logger.warning(f"读取动作日志失败: {log_path}, error={e}") + logger.warning(t("log.simulation_runner.m014", log_path=log_path, e=e)) return position @classmethod @@ -730,7 +730,7 @@ class SimulationRunner: if IS_WINDOWS: # Windows: 使用 taskkill 命令终止进程树 # /F = 强制终止, /T = 终止进程树(包括子进程) - logger.info(f"终止进程树 (Windows): simulation={simulation_id}, pid={process.pid}") + logger.info(t("log.simulation_runner.m015", simulation_id=simulation_id, process=process.pid)) try: # 先尝试优雅终止 subprocess.run( @@ -742,7 +742,7 @@ class SimulationRunner: process.wait(timeout=timeout) except subprocess.TimeoutExpired: # 强制终止 - logger.warning(f"进程未响应,强制终止: {simulation_id}") + logger.warning(t("log.simulation_runner.m016", simulation_id=simulation_id)) subprocess.run( ['taskkill', '/F', '/PID', str(process.pid), '/T'], capture_output=True, @@ -750,7 +750,7 @@ class SimulationRunner: ) process.wait(timeout=5) except Exception as e: - logger.warning(f"taskkill 失败,尝试 terminate: {e}") + logger.warning(t("log.simulation_runner.m017", e=e)) process.terminate() try: process.wait(timeout=5) @@ -760,7 +760,7 @@ class SimulationRunner: # Unix: 使用进程组终止 # 由于使用了 start_new_session=True,进程组 ID 等于主进程 PID pgid = os.getpgid(process.pid) - logger.info(f"终止进程组 (Unix): simulation={simulation_id}, pgid={pgid}") + logger.info(t("log.simulation_runner.m018", simulation_id=simulation_id, pgid=pgid)) # 先发送 SIGTERM 给整个进程组 os.killpg(pgid, signal.SIGTERM) @@ -769,7 +769,7 @@ class SimulationRunner: process.wait(timeout=timeout) except subprocess.TimeoutExpired: # 如果超时后还没结束,强制发送 SIGKILL - logger.warning(f"进程组未响应 SIGTERM,强制终止: {simulation_id}") + logger.warning(t("log.simulation_runner.m019", simulation_id=simulation_id)) os.killpg(pgid, signal.SIGKILL) process.wait(timeout=5) @@ -795,7 +795,7 @@ class SimulationRunner: # 进程已经不存在 pass except Exception as e: - logger.error(f"终止进程组失败: {simulation_id}, error={e}") + logger.error(t("log.simulation_runner.m020", simulation_id=simulation_id, e=e)) # 回退到直接终止进程 try: process.terminate() @@ -813,12 +813,12 @@ class SimulationRunner: if cls._graph_memory_enabled.get(simulation_id, False): try: ZepGraphMemoryManager.stop_updater(simulation_id) - logger.info(f"已停止图谱记忆更新: simulation_id={simulation_id}") + logger.info(t("log.simulation_runner.m021", simulation_id=simulation_id)) except Exception as e: - logger.error(f"停止图谱记忆更新器失败: {e}") + logger.error(t("log.simulation_runner.m022", e=e)) cls._graph_memory_enabled.pop(simulation_id, None) - logger.info(f"模拟已停止: {simulation_id}") + logger.info(t("log.simulation_runner.m023", simulation_id=simulation_id)) return state @classmethod @@ -1172,7 +1172,7 @@ class SimulationRunner: if simulation_id in cls._run_states: del cls._run_states[simulation_id] - logger.info(f"清理模拟日志完成: {simulation_id}, 删除文件: {cleaned_files}") + logger.info(t("log.simulation_runner.m024", simulation_id=simulation_id, cleaned_files=cleaned_files)) return { "success": len(errors) == 0, @@ -1202,13 +1202,13 @@ class SimulationRunner: if not has_processes and not has_updaters: return # 没有需要清理的内容,静默返回 - logger.info("正在清理所有模拟进程...") + logger.info(t("log.simulation_runner.m025")) # 首先停止所有图谱记忆更新器(stop_all 内部会打印日志) try: ZepGraphMemoryManager.stop_all() except Exception as e: - logger.error(f"停止图谱记忆更新器失败: {e}") + logger.error(t("log.simulation_runner.m026", e=e)) cls._graph_memory_enabled.clear() # 复制字典以避免在迭代时修改 @@ -1217,7 +1217,7 @@ class SimulationRunner: for simulation_id, process in processes: try: if process.poll() is None: # 进程仍在运行 - logger.info(f"终止模拟进程: {simulation_id}, pid={process.pid}") + logger.info(t("log.simulation_runner.m027", simulation_id=simulation_id, process=process.pid)) try: # 使用跨平台的进程终止方法 @@ -1244,7 +1244,7 @@ class SimulationRunner: try: sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id) state_file = os.path.join(sim_dir, "state.json") - logger.info(f"尝试更新 state.json: {state_file}") + logger.info(t("log.simulation_runner.m028", state_file=state_file)) if os.path.exists(state_file): with open(state_file, 'r', encoding='utf-8') as f: state_data = json.load(f) @@ -1252,14 +1252,14 @@ class SimulationRunner: state_data['updated_at'] = datetime.now().isoformat() with open(state_file, 'w', encoding='utf-8') as f: json.dump(state_data, f, indent=2, ensure_ascii=False) - logger.info(f"已更新 state.json 状态为 stopped: {simulation_id}") + logger.info(t("log.simulation_runner.m029", simulation_id=simulation_id)) else: - logger.warning(f"state.json 不存在: {state_file}") + logger.warning(t("log.simulation_runner.m030", state_file=state_file)) except Exception as state_err: - logger.warning(f"更新 state.json 失败: {simulation_id}, error={state_err}") + logger.warning(t("log.simulation_runner.m031", simulation_id=simulation_id, state_err=state_err)) except Exception as e: - logger.error(f"清理进程失败: {simulation_id}, error={e}") + logger.error(t("log.simulation_runner.m032", simulation_id=simulation_id, e=e)) # 清理文件句柄 for simulation_id, file_handle in list(cls._stdout_files.items()): @@ -1282,7 +1282,7 @@ class SimulationRunner: cls._processes.clear() cls._action_queues.clear() - logger.info("模拟进程清理完成") + logger.info(t("log.simulation_runner.m033")) @classmethod def register_cleanup(cls): @@ -1320,7 +1320,7 @@ class SimulationRunner: """信号处理器:先清理模拟进程,再调用原处理器""" # 只有在有进程需要清理时才打印日志 if cls._processes or cls._graph_memory_enabled: - logger.info(f"收到信号 {signum},开始清理...") + logger.info(t("log.simulation_runner.m034", signum=signum)) cls.cleanup_all_simulations() # 调用原有的信号处理器,让 Flask 正常退出 @@ -1353,7 +1353,7 @@ class SimulationRunner: signal.signal(signal.SIGHUP, cleanup_handler) except ValueError: # 不在主线程中,只能使用 atexit - logger.warning("无法注册信号处理器(不在主线程),仅使用 atexit") + logger.warning(t("log.simulation_runner.m035")) _cleanup_registered = True @@ -1462,7 +1462,7 @@ class SimulationRunner: if not ipc_client.check_env_alive(): raise ValueError(f"模拟环境未运行或已关闭,无法执行Interview: {simulation_id}") - logger.info(f"发送Interview命令: simulation_id={simulation_id}, agent_id={agent_id}, platform={platform}") + logger.info(t("log.simulation_runner.m036", simulation_id=simulation_id, agent_id=agent_id, platform=platform)) response = ipc_client.send_interview( agent_id=agent_id, @@ -1524,7 +1524,7 @@ class SimulationRunner: if not ipc_client.check_env_alive(): raise ValueError(f"模拟环境未运行或已关闭,无法执行Interview: {simulation_id}") - logger.info(f"发送批量Interview命令: simulation_id={simulation_id}, count={len(interviews)}, platform={platform}") + logger.info(t("log.simulation_runner.m037", simulation_id=simulation_id, len=len(interviews), platform=platform)) response = ipc_client.send_batch_interview( interviews=interviews, @@ -1598,7 +1598,7 @@ class SimulationRunner: "prompt": prompt }) - logger.info(f"发送全局Interview命令: simulation_id={simulation_id}, agent_count={len(interviews)}, platform={platform}") + logger.info(t("log.simulation_runner.m038", simulation_id=simulation_id, len=len(interviews), platform=platform)) return cls.interview_agents_batch( simulation_id=simulation_id, @@ -1637,7 +1637,7 @@ class SimulationRunner: "message": "环境已经关闭" } - logger.info(f"发送关闭环境命令: simulation_id={simulation_id}") + logger.info(t("log.simulation_runner.m039", simulation_id=simulation_id)) try: response = ipc_client.send_close_env(timeout=timeout) @@ -1709,7 +1709,7 @@ class SimulationRunner: conn.close() except Exception as e: - logger.error(f"读取Interview历史失败 ({platform_name}): {e}") + logger.error(t("log.simulation_runner.m040", platform_name=platform_name, e=e)) return results diff --git a/backend/app/services/zep_entity_reader.py b/backend/app/services/zep_entity_reader.py index e664959c..905468ac 100644 --- a/backend/app/services/zep_entity_reader.py +++ b/backend/app/services/zep_entity_reader.py @@ -12,6 +12,7 @@ from .graphiti_adapter import GraphitiAdapter from ..config import Config from ..utils.logger import get_logger from ..utils.zep_paging import fetch_all_nodes, fetch_all_edges +from ..utils.locale import t logger = get_logger('mirofish.zep_entity_reader') @@ -110,13 +111,12 @@ class ZepEntityReader: last_exception = e if attempt < max_retries - 1: logger.warning( - f"Zep {operation_name} 第 {attempt + 1} 次尝试失败: {str(e)[:100]}, " - f"{delay:.1f}秒后重试..." + t("log.zep_entity_reader.m001", operation_name=operation_name, attempt=attempt + 1, str=str(e)[:100], delay=delay) ) time.sleep(delay) delay *= 2 # 指数退避 else: - logger.error(f"Zep {operation_name} 在 {max_retries} 次尝试后仍失败: {str(e)}") + logger.error(t("log.zep_entity_reader.m002", operation_name=operation_name, max_retries=max_retries, str=str(e))) raise last_exception @@ -130,7 +130,7 @@ class ZepEntityReader: Returns: 节点列表 """ - logger.info(f"获取图谱 {graph_id} 的所有节点...") + logger.info(t("log.zep_entity_reader.m003", graph_id=graph_id)) nodes = fetch_all_nodes(self.client, graph_id) @@ -144,7 +144,7 @@ class ZepEntityReader: "attributes": node.attributes or {}, }) - logger.info(f"共获取 {len(nodes_data)} 个节点") + logger.info(t("log.zep_entity_reader.m004", len=len(nodes_data))) return nodes_data def get_all_edges(self, graph_id: str) -> List[Dict[str, Any]]: @@ -157,7 +157,7 @@ class ZepEntityReader: Returns: 边列表 """ - logger.info(f"获取图谱 {graph_id} 的所有边...") + logger.info(t("log.zep_entity_reader.m005", graph_id=graph_id)) edges = fetch_all_edges(self.client, graph_id) @@ -172,7 +172,7 @@ class ZepEntityReader: "attributes": edge.attributes or {}, }) - logger.info(f"共获取 {len(edges_data)} 条边") + logger.info(t("log.zep_entity_reader.m006", len=len(edges_data))) return edges_data def get_node_edges(self, node_uuid: str) -> List[Dict[str, Any]]: @@ -205,7 +205,7 @@ class ZepEntityReader: return edges_data except Exception as e: - logger.warning(f"获取节点 {node_uuid} 的边失败: {str(e)}") + logger.warning(t("log.zep_entity_reader.m007", node_uuid=node_uuid, str=str(e))) return [] def filter_defined_entities( @@ -229,7 +229,7 @@ class ZepEntityReader: Returns: FilteredEntities: 过滤后的实体集合 """ - logger.info(f"开始筛选图谱 {graph_id} 的实体...") + logger.info(t("log.zep_entity_reader.m008", graph_id=graph_id)) # Look up ontology from project to classify entities ontology = None @@ -340,8 +340,7 @@ class ZepEntityReader: filtered_entities.append(entity) - logger.info(f"筛选完成: 总节点 {total_count}, 符合条件 {len(filtered_entities)}, " - f"实体类型: {entity_types_found}") + logger.info(t("log.zep_entity_reader.m009", total_count=total_count, len=len(filtered_entities), entity_types_found=entity_types_found)) return FilteredEntities( entities=filtered_entities, @@ -427,7 +426,7 @@ class ZepEntityReader: ) except Exception as e: - logger.error(f"获取实体 {entity_uuid} 失败: {str(e)}") + logger.error(t("log.zep_entity_reader.m010", entity_uuid=entity_uuid, str=str(e))) return None def get_entities_by_type( diff --git a/backend/app/services/zep_graph_memory_updater.py b/backend/app/services/zep_graph_memory_updater.py index 02f68209..83a748e5 100644 --- a/backend/app/services/zep_graph_memory_updater.py +++ b/backend/app/services/zep_graph_memory_updater.py @@ -16,7 +16,7 @@ from .graphiti_adapter import GraphitiAdapter from ..config import Config from ..utils.logger import get_logger -from ..utils.locale import get_locale, set_locale +from ..utils.locale import get_locale, set_locale, t logger = get_logger('mirofish.zep_graph_memory_updater') @@ -261,7 +261,7 @@ class ZepGraphMemoryUpdater: self._failed_count = 0 # 发送失败的批次数 self._skipped_count = 0 # 被过滤跳过的活动数(DO_NOTHING) - logger.info(f"ZepGraphMemoryUpdater 初始化完成: graph_id={graph_id}, batch_size={self.BATCH_SIZE}") + logger.info(t("log.zep_graph_memory_updater.m001", graph_id=graph_id, self=self.BATCH_SIZE)) def _get_platform_display_name(self, platform: str) -> str: """获取平台的显示名称""" @@ -283,7 +283,7 @@ class ZepGraphMemoryUpdater: name=f"ZepMemoryUpdater-{self.graph_id[:8]}" ) self._worker_thread.start() - logger.info(f"ZepGraphMemoryUpdater 已启动: graph_id={self.graph_id}") + logger.info(t("log.zep_graph_memory_updater.m002", self=self.graph_id)) def stop(self): """停止后台工作线程""" @@ -295,12 +295,7 @@ class ZepGraphMemoryUpdater: if self._worker_thread and self._worker_thread.is_alive(): self._worker_thread.join(timeout=10) - logger.info(f"ZepGraphMemoryUpdater 已停止: graph_id={self.graph_id}, " - f"total_activities={self._total_activities}, " - f"batches_sent={self._total_sent}, " - f"items_sent={self._total_items_sent}, " - f"failed={self._failed_count}, " - f"skipped={self._skipped_count}") + logger.info(t("log.zep_graph_memory_updater.m003", self=self.graph_id, self_2=self._total_activities, self_3=self._total_sent, self_4=self._total_items_sent, self_5=self._failed_count, self_6=self._skipped_count)) def add_activity(self, activity: AgentActivity): """ @@ -330,7 +325,7 @@ class ZepGraphMemoryUpdater: self._activity_queue.put(activity) self._total_activities += 1 - logger.debug(f"添加活动到Zep队列: {activity.agent_name} - {activity.action_type}") + logger.debug(t("log.zep_graph_memory_updater.m004", activity=activity.agent_name, activity_2=activity.action_type)) def add_activity_from_dict(self, data: Dict[str, Any], platform: str): """ @@ -385,7 +380,7 @@ class ZepGraphMemoryUpdater: pass except Exception as e: - logger.error(f"工作循环异常: {e}") + logger.error(t("log.zep_graph_memory_updater.m005", e=e)) time.sleep(1) def _send_batch_activities(self, activities: List[AgentActivity], platform: str): @@ -415,16 +410,16 @@ class ZepGraphMemoryUpdater: self._total_sent += 1 self._total_items_sent += len(activities) display_name = self._get_platform_display_name(platform) - logger.info(f"成功批量发送 {len(activities)} 条{display_name}活动到图谱 {self.graph_id}") - logger.debug(f"批量内容预览: {combined_text[:200]}...") + logger.info(t("log.zep_graph_memory_updater.m006", len=len(activities), display_name=display_name, self=self.graph_id)) + logger.debug(t("log.zep_graph_memory_updater.m007", combined_text=combined_text[:200])) return except Exception as e: if attempt < self.MAX_RETRIES - 1: - logger.warning(f"批量发送到Zep失败 (尝试 {attempt + 1}/{self.MAX_RETRIES}): {e}") + logger.warning(t("log.zep_graph_memory_updater.m008", attempt=attempt + 1, self=self.MAX_RETRIES, e=e)) time.sleep(self.RETRY_DELAY * (attempt + 1)) else: - logger.error(f"批量发送到Zep失败,已重试{self.MAX_RETRIES}次: {e}") + logger.error(t("log.zep_graph_memory_updater.m009", self=self.MAX_RETRIES, e=e)) self._failed_count += 1 def _flush_remaining(self): @@ -446,7 +441,7 @@ class ZepGraphMemoryUpdater: for platform, buffer in self._platform_buffers.items(): if buffer: display_name = self._get_platform_display_name(platform) - logger.info(f"发送{display_name}平台剩余的 {len(buffer)} 条活动") + logger.info(t("log.zep_graph_memory_updater.m010", display_name=display_name, len=len(buffer))) self._send_batch_activities(buffer, platform) # 清空所有缓冲区 for platform in self._platform_buffers: @@ -502,7 +497,7 @@ class ZepGraphMemoryManager: updater.start() cls._updaters[simulation_id] = updater - logger.info(f"创建图谱记忆更新器: simulation_id={simulation_id}, graph_id={graph_id}") + logger.info(t("log.zep_graph_memory_updater.m011", simulation_id=simulation_id, graph_id=graph_id)) return updater @classmethod @@ -517,7 +512,7 @@ class ZepGraphMemoryManager: if simulation_id in cls._updaters: cls._updaters[simulation_id].stop() del cls._updaters[simulation_id] - logger.info(f"已停止图谱记忆更新器: simulation_id={simulation_id}") + logger.info(t("log.zep_graph_memory_updater.m012", simulation_id=simulation_id)) # 防止 stop_all 重复调用的标志 _stop_all_done = False @@ -536,9 +531,9 @@ class ZepGraphMemoryManager: try: updater.stop() except Exception as e: - logger.error(f"停止更新器失败: simulation_id={simulation_id}, error={e}") + logger.error(t("log.zep_graph_memory_updater.m013", simulation_id=simulation_id, e=e)) cls._updaters.clear() - logger.info("已停止所有图谱记忆更新器") + logger.info(t("log.zep_graph_memory_updater.m014")) @classmethod def get_all_stats(cls) -> Dict[str, Dict[str, Any]]: diff --git a/backend/app/services/zep_tools.py b/backend/app/services/zep_tools.py index 7c87bde7..ac3059ff 100644 --- a/backend/app/services/zep_tools.py +++ b/backend/app/services/zep_tools.py @@ -19,6 +19,7 @@ from ..config import Config from ..utils.logger import get_logger from ..utils.llm_client import LLMClient from ..utils.zep_paging import fetch_all_nodes, fetch_all_edges +from ..utils.locale import t logger = get_logger('mirofish.zep_tools') @@ -425,7 +426,7 @@ class ZepToolsService: self.client = GraphitiAdapter() # LLM客户端用于InsightForge生成子问题 self._llm_client = llm_client - logger.info("ZepToolsService 初始化完成") + logger.info(t("log.zep_tools.m001")) @property def llm(self) -> LLMClient: @@ -454,18 +455,16 @@ class ZepToolsService: retry_after = e.headers.get('retry-after') wait = float(retry_after) + 1 if retry_after else 65.0 logger.warning( - f"Zep {operation_name} 触发限速 (429), " - f"等待 {wait:.0f} 秒后重试 (第 {attempt + 1}/{max_retries - 1} 次)..." + t("log.zep_tools.m002", operation_name=operation_name, wait=wait, attempt=attempt + 1, max_retries=max_retries - 1) ) else: logger.warning( - f"Zep {operation_name} 第 {attempt + 1} 次尝试失败: {str(e)[:100]}, " - f"{wait:.1f}秒后重试..." + t("log.zep_tools.m003", operation_name=operation_name, attempt=attempt + 1, str=str(e)[:100], wait=wait) ) time.sleep(wait) delay *= 2 else: - logger.error(f"Zep {operation_name} 在 {max_retries} 次尝试后仍失败: {str(e)}") + logger.error(t("log.zep_tools.m004", operation_name=operation_name, max_retries=max_retries, str=str(e))) raise last_exception @@ -491,7 +490,7 @@ class ZepToolsService: Returns: SearchResult: 搜索结果 """ - logger.info(f"图谱搜索: graph_id={graph_id}, query={query[:50]}...") + logger.info(t("log.zep_tools.m005", graph_id=graph_id, query=query[:50])) # 尝试使用Zep Cloud Search API try: @@ -535,7 +534,7 @@ class ZepToolsService: if hasattr(node, 'summary') and node.summary: facts.append(f"[{node.name}]: {node.summary}") - logger.info(f"搜索完成: 找到 {len(facts)} 条相关事实") + logger.info(t("log.zep_tools.m006", len=len(facts))) return SearchResult( facts=facts, @@ -546,7 +545,7 @@ class ZepToolsService: ) except Exception as e: - logger.warning(f"Zep Search API失败,降级为本地搜索: {str(e)}") + logger.warning(t("log.zep_tools.m007", str=str(e))) # 降级:使用本地关键词匹配搜索 return self._local_search(graph_id, query, limit, scope) @@ -571,7 +570,7 @@ class ZepToolsService: Returns: SearchResult: 搜索结果 """ - logger.info(f"使用本地搜索: query={query[:30]}...") + logger.info(t("log.zep_tools.m008", query=query[:30])) facts = [] edges_result = [] @@ -641,10 +640,10 @@ class ZepToolsService: if node.summary: facts.append(f"[{node.name}]: {node.summary}") - logger.info(f"本地搜索完成: 找到 {len(facts)} 条相关事实") + logger.info(t("log.zep_tools.m009", len=len(facts))) except Exception as e: - logger.error(f"本地搜索失败: {str(e)}") + logger.error(t("log.zep_tools.m010", str=str(e))) return SearchResult( facts=facts, @@ -664,7 +663,7 @@ class ZepToolsService: Returns: 节点列表 """ - logger.info(f"获取图谱 {graph_id} 的所有节点...") + logger.info(t("log.zep_tools.m011", graph_id=graph_id)) nodes = fetch_all_nodes(self.client, graph_id) @@ -679,7 +678,7 @@ class ZepToolsService: attributes=node.attributes or {} )) - logger.info(f"获取到 {len(result)} 个节点") + logger.info(t("log.zep_tools.m012", len=len(result))) return result def get_all_edges(self, graph_id: str, include_temporal: bool = True) -> List[EdgeInfo]: @@ -693,7 +692,7 @@ class ZepToolsService: Returns: 边列表(包含created_at, valid_at, invalid_at, expired_at) """ - logger.info(f"获取图谱 {graph_id} 的所有边...") + logger.info(t("log.zep_tools.m013", graph_id=graph_id)) edges = fetch_all_edges(self.client, graph_id) @@ -717,7 +716,7 @@ class ZepToolsService: result.append(edge_info) - logger.info(f"获取到 {len(result)} 条边") + logger.info(t("log.zep_tools.m014", len=len(result))) return result def get_node_detail(self, node_uuid: str) -> Optional[NodeInfo]: @@ -730,7 +729,7 @@ class ZepToolsService: Returns: 节点信息或None """ - logger.info(f"获取节点详情: {node_uuid[:8]}...") + logger.info(t("log.zep_tools.m015", node_uuid=node_uuid[:8])) try: node = self._call_with_retry( @@ -749,7 +748,7 @@ class ZepToolsService: attributes=node.attributes or {} ) except Exception as e: - logger.error(f"获取节点详情失败: {str(e)}") + logger.error(t("log.zep_tools.m016", str=str(e))) return None def get_node_edges(self, graph_id: str, node_uuid: str) -> List[EdgeInfo]: @@ -765,7 +764,7 @@ class ZepToolsService: Returns: 边列表 """ - logger.info(f"获取节点 {node_uuid[:8]}... 的相关边") + logger.info(t("log.zep_tools.m017", node_uuid=node_uuid[:8])) try: # 获取图谱所有边,然后过滤 @@ -777,11 +776,11 @@ class ZepToolsService: if edge.source_node_uuid == node_uuid or edge.target_node_uuid == node_uuid: result.append(edge) - logger.info(f"找到 {len(result)} 条与节点相关的边") + logger.info(t("log.zep_tools.m018", len=len(result))) return result except Exception as e: - logger.warning(f"获取节点边失败: {str(e)}") + logger.warning(t("log.zep_tools.m019", str=str(e))) return [] def get_entities_by_type( @@ -799,7 +798,7 @@ class ZepToolsService: Returns: 符合类型的实体列表 """ - logger.info(f"获取类型为 {entity_type} 的实体...") + logger.info(t("log.zep_tools.m020", entity_type=entity_type)) all_nodes = self.get_all_nodes(graph_id) @@ -809,7 +808,7 @@ class ZepToolsService: if entity_type in node.labels: filtered.append(node) - logger.info(f"找到 {len(filtered)} 个 {entity_type} 类型的实体") + logger.info(t("log.zep_tools.m021", len=len(filtered), entity_type=entity_type)) return filtered def get_entity_summary( @@ -829,7 +828,7 @@ class ZepToolsService: Returns: 实体摘要信息 """ - logger.info(f"获取实体 {entity_name} 的关系摘要...") + logger.info(t("log.zep_tools.m022", entity_name=entity_name)) # 先搜索该实体相关的信息 search_result = self.search_graph( @@ -869,7 +868,7 @@ class ZepToolsService: Returns: 统计信息 """ - logger.info(f"获取图谱 {graph_id} 的统计信息...") + logger.info(t("log.zep_tools.m023", graph_id=graph_id)) nodes = self.get_all_nodes(graph_id) edges = self.get_all_edges(graph_id) @@ -913,7 +912,7 @@ class ZepToolsService: Returns: 模拟上下文信息 """ - logger.info(f"获取模拟上下文: {simulation_requirement[:50]}...") + logger.info(t("log.zep_tools.m024", simulation_requirement=simulation_requirement[:50])) # 搜索与模拟需求相关的信息 search_result = self.search_graph( @@ -977,7 +976,7 @@ class ZepToolsService: Returns: InsightForgeResult: 深度洞察检索结果 """ - logger.info(f"InsightForge 深度洞察检索: {query[:50]}...") + logger.info(t("log.zep_tools.m025", query=query[:50])) result = InsightForgeResult( query=query, @@ -993,7 +992,7 @@ class ZepToolsService: max_queries=max_sub_queries ) result.sub_queries = sub_queries - logger.info(f"生成 {len(sub_queries)} 个子问题") + logger.info(t("log.zep_tools.m026", len=len(sub_queries))) # Step 2: 对每个子问题进行语义搜索 all_facts = [] @@ -1069,7 +1068,7 @@ class ZepToolsService: "related_facts": related_facts # 完整输出,不截断 }) except Exception as e: - logger.debug(f"获取节点 {uuid} 失败: {e}") + logger.debug(t("log.zep_tools.m027", uuid=uuid, e=e)) continue result.entity_insights = entity_insights @@ -1093,7 +1092,7 @@ class ZepToolsService: result.relationship_chains = relationship_chains result.total_relationships = len(relationship_chains) - logger.info(f"InsightForge完成: {result.total_facts}条事实, {result.total_entities}个实体, {result.total_relationships}条关系") + logger.info(t("log.zep_tools.m028", result=result.total_facts, result_2=result.total_entities, result_3=result.total_relationships)) return result def _generate_sub_queries( @@ -1140,7 +1139,7 @@ class ZepToolsService: return [str(sq) for sq in sub_queries[:max_queries]] except Exception as e: - logger.warning(f"生成子问题失败: {str(e)},使用默认子问题") + logger.warning(t("log.zep_tools.m029", str=str(e))) # 降级:返回基于原问题的变体 return [ query, @@ -1175,7 +1174,7 @@ class ZepToolsService: Returns: PanoramaResult: 广度搜索结果 """ - logger.info(f"PanoramaSearch 广度搜索: {query[:50]}...") + logger.info(t("log.zep_tools.m030", query=query[:50])) result = PanoramaResult(query=query) @@ -1238,7 +1237,7 @@ class ZepToolsService: result.active_count = len(active_facts) result.historical_count = len(historical_facts) - logger.info(f"PanoramaSearch完成: {result.active_count}条有效, {result.historical_count}条历史") + logger.info(t("log.zep_tools.m031", result=result.active_count, result_2=result.historical_count)) return result def quick_search( @@ -1263,7 +1262,7 @@ class ZepToolsService: Returns: SearchResult: 搜索结果 """ - logger.info(f"QuickSearch 简单搜索: {query[:50]}...") + logger.info(t("log.zep_tools.m032", query=query[:50])) # 直接调用现有的search_graph方法 result = self.search_graph( @@ -1273,7 +1272,7 @@ class ZepToolsService: scope="edges" ) - logger.info(f"QuickSearch完成: {result.total_count}条结果") + logger.info(t("log.zep_tools.m033", result=result.total_count)) return result def interview_agents( @@ -1313,7 +1312,7 @@ class ZepToolsService: """ from .simulation_runner import SimulationRunner - logger.info(f"InterviewAgents 深度采访(真实API): {interview_requirement[:50]}...") + logger.info(t("log.zep_tools.m034", interview_requirement=interview_requirement[:50])) result = InterviewResult( interview_topic=interview_requirement, @@ -1324,12 +1323,12 @@ class ZepToolsService: profiles = self._load_agent_profiles(simulation_id) if not profiles: - logger.warning(f"未找到模拟 {simulation_id} 的人设文件") + logger.warning(t("log.zep_tools.m035", simulation_id=simulation_id)) result.summary = "未找到可采访的Agent人设文件" return result result.total_agents = len(profiles) - logger.info(f"加载到 {len(profiles)} 个Agent人设") + logger.info(t("log.zep_tools.m036", len=len(profiles))) # Step 2: 使用LLM选择要采访的Agent(返回agent_id列表) selected_agents, selected_indices, selection_reasoning = self._select_agents_for_interview( @@ -1341,7 +1340,7 @@ class ZepToolsService: result.selected_agents = selected_agents result.selection_reasoning = selection_reasoning - logger.info(f"选择了 {len(selected_agents)} 个Agent进行采访: {selected_indices}") + logger.info(t("log.zep_tools.m037", len=len(selected_agents), selected_indices=selected_indices)) # Step 3: 生成采访问题(如果没有提供) if not result.interview_questions: @@ -1350,7 +1349,7 @@ class ZepToolsService: simulation_requirement=simulation_requirement, selected_agents=selected_agents ) - logger.info(f"生成了 {len(result.interview_questions)} 个采访问题") + logger.info(t("log.zep_tools.m038", len=len(result.interview_questions))) # 将问题合并为一个采访prompt combined_prompt = "\n".join([f"{i+1}. {q}" for i, q in enumerate(result.interview_questions)]) @@ -1380,7 +1379,7 @@ class ZepToolsService: # 不指定platform,API会在twitter和reddit两个平台都采访 }) - logger.info(f"调用批量采访API(双平台): {len(interviews_request)} 个Agent") + logger.info(t("log.zep_tools.m039", len=len(interviews_request))) # 调用 SimulationRunner 的批量采访方法(不传platform,双平台采访) api_result = SimulationRunner.interview_agents_batch( @@ -1390,12 +1389,12 @@ class ZepToolsService: timeout=180.0 # 双平台需要更长超时 ) - logger.info(f"采访API返回: {api_result.get('interviews_count', 0)} 个结果, success={api_result.get('success')}") + logger.info(t("log.zep_tools.m040", api_result=api_result.get('interviews_count', 0), api_result_2=api_result.get('success'))) # 检查API调用是否成功 if not api_result.get("success", False): error_msg = api_result.get("error", "未知错误") - logger.warning(f"采访API返回失败: {error_msg}") + logger.warning(t("log.zep_tools.m041", error_msg=error_msg)) result.summary = f"采访API调用失败:{error_msg}。请检查OASIS模拟环境状态。" return result @@ -1468,11 +1467,11 @@ class ZepToolsService: except ValueError as e: # 模拟环境未运行 - logger.warning(f"采访API调用失败(环境未运行?): {e}") + logger.warning(t("log.zep_tools.m042", e=e)) result.summary = f"采访失败:{str(e)}。模拟环境可能已关闭,请确保OASIS环境正在运行。" return result except Exception as e: - logger.error(f"采访API调用异常: {e}") + logger.error(t("log.zep_tools.m043", e=e)) import traceback logger.error(traceback.format_exc()) result.summary = f"采访过程发生错误:{str(e)}" @@ -1485,7 +1484,7 @@ class ZepToolsService: interview_requirement=interview_requirement ) - logger.info(f"InterviewAgents完成: 采访了 {result.interviewed_count} 个Agent(双平台)") + logger.info(t("log.zep_tools.m044", result=result.interviewed_count)) return result @staticmethod @@ -1528,10 +1527,10 @@ class ZepToolsService: try: with open(reddit_profile_path, 'r', encoding='utf-8') as f: profiles = json.load(f) - logger.info(f"从 reddit_profiles.json 加载了 {len(profiles)} 个人设") + logger.info(t("log.zep_tools.m045", len=len(profiles))) return profiles except Exception as e: - logger.warning(f"读取 reddit_profiles.json 失败: {e}") + logger.warning(t("log.zep_tools.m046", e=e)) # 尝试读取Twitter CSV格式 twitter_profile_path = os.path.join(sim_dir, "twitter_profiles.csv") @@ -1548,10 +1547,10 @@ class ZepToolsService: "persona": row.get("user_char", ""), "profession": "未知" }) - logger.info(f"从 twitter_profiles.csv 加载了 {len(profiles)} 个人设") + logger.info(t("log.zep_tools.m047", len=len(profiles))) return profiles except Exception as e: - logger.warning(f"读取 twitter_profiles.csv 失败: {e}") + logger.warning(t("log.zep_tools.m048", e=e)) return profiles @@ -1632,7 +1631,7 @@ class ZepToolsService: return selected_agents, valid_indices, reasoning except Exception as e: - logger.warning(f"LLM选择Agent失败,使用默认选择: {e}") + logger.warning(t("log.zep_tools.m049", e=e)) # 降级:选择前N个 selected = profiles[:max_agents] indices = list(range(min(max_agents, len(profiles)))) @@ -1680,7 +1679,7 @@ class ZepToolsService: return response.get("questions", [f"关于{interview_requirement},您有什么看法?"]) except Exception as e: - logger.warning(f"生成采访问题失败: {e}") + logger.warning(t("log.zep_tools.m050", e=e)) return [ f"关于{interview_requirement},您的观点是什么?", "这件事对您或您所代表的群体有什么影响?", @@ -1737,6 +1736,6 @@ class ZepToolsService: return summary except Exception as e: - logger.warning(f"生成采访摘要失败: {e}") + logger.warning(t("log.zep_tools.m051", e=e)) # 降级:简单拼接 return f"共采访了{len(interviews)}位受访者,包括:" + "、".join([i.agent_name for i in interviews]) diff --git a/backend/app/utils/locale.py b/backend/app/utils/locale.py index 23d04aa9..aac3e501 100644 --- a/backend/app/utils/locale.py +++ b/backend/app/utils/locale.py @@ -1,4 +1,5 @@ import json +import logging import os import threading from flask import request, has_request_context @@ -19,6 +20,32 @@ for filename in os.listdir(_locales_dir): with open(os.path.join(_locales_dir, filename), 'r', encoding='utf-8') as f: _translations[locale_name] = json.load(f) +# Per-process dedup cache for missing-translation warnings. +# Each (locale, key) pair triggers exactly one warning until reset. +_missing_key_cache: set = set() +_missing_key_lock = threading.Lock() +_locale_logger = logging.getLogger("mirofish.locale") + + +def _reset_missing_key_cache() -> None: + """Clear the missing-key dedup cache. + + Intended for tests that need to re-assert the warning behavior between + cases. Not part of the public runtime API. + """ + with _missing_key_lock: + _missing_key_cache.clear() + + +def _warn_missing_key_once(key: str, locale: str) -> None: + """Emit a warning for a missing translation key, deduped per (locale, key).""" + pair = (locale, key) + with _missing_key_lock: + if pair in _missing_key_cache: + return + _missing_key_cache.add(pair) + _locale_logger.warning("missing translation key: %s (locale=%s)", key, locale) + def set_locale(locale: str): """Set locale for current thread. Call at the start of background threads.""" @@ -32,28 +59,28 @@ def get_locale() -> str: return getattr(_thread_local, 'locale', 'zh') -def t(key: str, **kwargs) -> str: - locale = get_locale() - messages = _translations.get(locale, _translations.get('zh', {})) - +def _resolve(messages, key: str): + """Walk the dotted ``key`` path through ``messages``; return the leaf or None.""" value = messages for part in key.split('.'): if isinstance(value, dict): value = value.get(part) else: - value = None - break - - if value is None: - value = _translations.get('zh', {}) - for part in key.split('.'): - if isinstance(value, dict): - value = value.get(part) - else: - value = None - break + return None + return value if isinstance(value, str) else None + + +def t(key: str, **kwargs) -> str: + locale = get_locale() + messages = _translations.get(locale, _translations.get('zh', {})) + + value = _resolve(messages, key) + + if value is None and locale != 'zh': + value = _resolve(_translations.get('zh', {}), key) if value is None: + _warn_missing_key_once(key, locale) return key if kwargs: diff --git a/backend/tests/__init__.py b/backend/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py new file mode 100644 index 00000000..b61103d0 --- /dev/null +++ b/backend/tests/conftest.py @@ -0,0 +1,27 @@ +"""Shared pytest configuration. + +The full ``app`` package pulls heavy third-party dependencies (openai, camel, +graphiti) at import time. Tests that only exercise leaf utility modules avoid +that by loading the target file directly via ``importlib.util`` rather than +going through ``app/__init__.py``. +""" + +import importlib.util +import os +import sys +import types + +BACKEND_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if BACKEND_DIR not in sys.path: + sys.path.insert(0, BACKEND_DIR) + + +def load_module_directly(module_name: str, source_path: str) -> types.ModuleType: + """Load ``source_path`` as ``module_name`` without triggering parent packages.""" + spec = importlib.util.spec_from_file_location(module_name, source_path) + if spec is None or spec.loader is None: + raise ImportError(f"cannot load {source_path}") + module = importlib.util.module_from_spec(spec) + sys.modules[module_name] = module + spec.loader.exec_module(module) + return module diff --git a/backend/tests/test_locale.py b/backend/tests/test_locale.py new file mode 100644 index 00000000..9b92d273 --- /dev/null +++ b/backend/tests/test_locale.py @@ -0,0 +1,104 @@ +"""Unit tests for ``app.utils.locale``. + +Covers the missing-key warning behavior introduced for ticket #6: + +- Resolving a known key returns the translated value. +- Active locale falls back to ``zh`` when a key is only defined there. +- A missing key returns the raw key string and never raises. +- Each missing ``(locale, key)`` pair emits exactly one warning across the + process lifetime (deduplicated). +- The private ``_reset_missing_key_cache`` hook clears the dedup memoization + so successive tests can re-assert the warning behavior. +""" + +import logging +import os + +import pytest + +from tests.conftest import load_module_directly + +LOCALE_PATH = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + "app", + "utils", + "locale.py", +) + +locale_module = load_module_directly("mirofish_locale_under_test", LOCALE_PATH) +_reset_missing_key_cache = locale_module._reset_missing_key_cache +set_locale = locale_module.set_locale +t = locale_module.t + + +@pytest.fixture(autouse=True) +def _clear_dedup_cache(): + """Reset the missing-key dedup cache around every test.""" + _reset_missing_key_cache() + yield + _reset_missing_key_cache() + + +def test_known_key_returns_active_locale_value(): + set_locale("en") + # ``api.projectNotFound`` is a long-standing key in both en.json and zh.json. + assert t("api.projectNotFound", id="abc") != "" + assert t("api.projectNotFound", id="abc") != "api.projectNotFound" + + +def test_zh_fallback_when_active_locale_lacks_key(): + set_locale("en") + # Inject a zh-only key for this test, then assert lookup falls back to it. + locale_module._translations.setdefault("zh", {})["__test_zh_only_key__"] = "中文回退" + try: + assert t("__test_zh_only_key__") == "中文回退" + finally: + locale_module._translations["zh"].pop("__test_zh_only_key__", None) + + +def test_missing_key_returns_raw_key_string(): + set_locale("en") + assert t("definitely.not.a.real.key.path") == "definitely.not.a.real.key.path" + + +def test_missing_key_never_raises_for_invalid_path_segments(): + set_locale("en") + # ``api.projectNotFound`` resolves to a string; descending into it would + # otherwise crash. The helper must guard against that. + assert t("api.projectNotFound.deeper") == "api.projectNotFound.deeper" + + +def test_missing_key_emits_exactly_one_warning_per_pair(caplog): + set_locale("en") + target_logger_name = "mirofish.locale" + with caplog.at_level(logging.WARNING, logger=target_logger_name): + t("definitely.not.a.real.key.path") + t("definitely.not.a.real.key.path") + t("definitely.not.a.real.key.path") + warnings = [r for r in caplog.records if r.name == target_logger_name and r.levelno == logging.WARNING] + assert len(warnings) == 1 + assert "definitely.not.a.real.key.path" in warnings[0].getMessage() + assert "en" in warnings[0].getMessage() + + +def test_reset_hook_allows_warning_to_fire_again(caplog): + set_locale("en") + target_logger_name = "mirofish.locale" + with caplog.at_level(logging.WARNING, logger=target_logger_name): + t("another.missing.key") + _reset_missing_key_cache() + t("another.missing.key") + warnings = [r for r in caplog.records if r.name == target_logger_name and r.levelno == logging.WARNING] + assert len(warnings) == 2 + + +def test_distinct_missing_keys_each_warn_once(caplog): + set_locale("en") + target_logger_name = "mirofish.locale" + with caplog.at_level(logging.WARNING, logger=target_logger_name): + t("missing.key.one") + t("missing.key.two") + t("missing.key.one") + t("missing.key.two") + warnings = [r for r in caplog.records if r.name == target_logger_name and r.levelno == logging.WARNING] + assert len(warnings) == 2 diff --git a/backend/tests/test_locale_request_resolution.py b/backend/tests/test_locale_request_resolution.py new file mode 100644 index 00000000..4d8ce9bc --- /dev/null +++ b/backend/tests/test_locale_request_resolution.py @@ -0,0 +1,56 @@ +"""Integration test: Flask request locale drives ``t()`` lookups. + +Exercises the request-context branch of ``app.utils.locale`` end-to-end +by spinning up a minimal Flask app, registering a route that returns a +known translated key, and asserting the response varies with the +``Accept-Language`` header. +""" + +import json +import os + +import pytest +from flask import Flask, jsonify + +from tests.conftest import load_module_directly + +LOCALE_PATH = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + "app", + "utils", + "locale.py", +) + +locale_module = load_module_directly("mirofish_locale_for_request_test", LOCALE_PATH) +t = locale_module.t + + +@pytest.fixture +def client(): + app = Flask(__name__) + + @app.route("/echo") + def echo(): + return jsonify({"error": t("api.error.simulation.m018")}) + + return app.test_client() + + +def test_accept_language_en_returns_english(client): + resp = client.get("/echo", headers={"Accept-Language": "en"}) + body = resp.get_json() + # m018 is "Missing simulation_id" in en.json. + assert "Missing simulation_id" in body["error"] + + +def test_accept_language_zh_returns_chinese(client): + resp = client.get("/echo", headers={"Accept-Language": "zh"}) + body = resp.get_json() + # zh.json preserves the original Chinese verbatim. + assert any("一" <= ch <= "鿿" for ch in body["error"]) + + +def test_missing_accept_language_defaults_to_zh(client): + resp = client.get("/echo") + body = resp.get_json() + assert any("一" <= ch <= "鿿" for ch in body["error"]) diff --git a/locales/en.json b/locales/en.json index 544c68b1..0c924b04 100644 --- a/locales/en.json +++ b/locales/en.json @@ -75,7 +75,13 @@ "layoutGraph": "Graph", "layoutSplit": "Split", "layoutWorkbench": "Workbench", - "stepNames": ["Graph Build", "Env Setup", "Run Simulation", "Report Generation", "Deep Interaction"] + "stepNames": [ + "Graph Build", + "Env Setup", + "Run Simulation", + "Report Generation", + "Deep Interaction" + ] }, "step1": { "ontologyGeneration": "Ontology Generation", @@ -388,7 +394,88 @@ "envRunning": "Environment is running and ready for Interview commands", "envNotRunningShort": "Environment not running or closed", "requireGraphIdAndQuery": "Please provide graph_id and query", - "initReportAgent": "Initializing Report Agent..." + "initReportAgent": "Initializing Report Agent...", + "error": { + "simulation": { + "m001": "Neo4j is not configured", + "m004": "Neo4j is not configured", + "m005": "Entity not found: {entity_uuid}", + "m007": "Neo4j is not configured", + "m009": "Missing project_id", + "m010": "Project not found: {project_id}", + "m011": "Project graph has not been built; call /api/graph/build first", + "m018": "Missing simulation_id", + "m019": "Simulation not found: {simulation_id}", + "m025": "Project not found: {state}", + "m026": "Project is missing the simulation requirement description (simulation_requirement)", + "m032": "Missing task_id or simulation_id", + "m033": "Task not found: {task_id}", + "m035": "Simulation not found: {simulation_id}", + "m041": "Simulation not found: {simulation_id}", + "m044": "Simulation not found: {simulation_id}", + "m047": "Simulation config not found; call /prepare first", + "m049": "Config file not found; call /prepare first", + "m051": "Unknown script: {script_name}; allowed: {allowed_scripts}", + "m052": "Script file not found: {script_name}", + "m054": "Missing graph_id", + "m055": "No matching entities found", + "m057": "Missing simulation_id", + "m058": "max_rounds must be a positive integer", + "m059": "max_rounds must be a valid integer", + "m060": "Invalid platform: {platform}; allowed: twitter/reddit/parallel", + "m061": "Simulation not found: {simulation_id}", + "m064": "Simulation is currently running; call /stop first or pass force=true to restart", + "m068": "Simulation is not ready; current state: {state}. Call /prepare first.", + "m069": "Enabling graph memory updates requires a valid graph_id; ensure the project graph has been built", + "m072": "Missing simulation_id", + "m081": "Missing simulation_id", + "m082": "Missing agent_id", + "m083": "Missing prompt (interview question)", + "m084": "platform must be 'twitter' or 'reddit'", + "m085": "Simulation environment is not running or has been closed. Make sure the simulation completed and entered command-wait mode.", + "m086": "Timed out waiting for interview response: {str}", + "m088": "Missing simulation_id", + "m089": "Missing interviews (interview list)", + "m090": "platform must be 'twitter' or 'reddit'", + "m091": "Interview list item {i} is missing agent_id", + "m092": "Interview list item {i} is missing prompt", + "m093": "Interview list item {i} platform must be 'twitter' or 'reddit'", + "m094": "Simulation environment is not running or has been closed. Make sure the simulation completed and entered command-wait mode.", + "m095": "Timed out waiting for batch interview response: {str}", + "m097": "Missing simulation_id", + "m098": "Missing prompt (interview question)", + "m099": "platform must be 'twitter' or 'reddit'", + "m100": "Simulation environment is not running or has been closed. Make sure the simulation completed and entered command-wait mode.", + "m101": "Timed out waiting for global interview response: {str}", + "m103": "Missing simulation_id", + "m105": "Missing simulation_id", + "m107": "Missing simulation_id" + }, + "graph": { + "m001": "Project not found: {project_id}", + "m002": "Project not found or delete failed: {project_id}", + "m004": "Project not found: {project_id}", + "m009": "Missing simulation requirement description (simulation_requirement)", + "m010": "Please upload at least one document file", + "m012": "No documents were processed successfully; please check the file format", + "m020": "Missing project_id", + "m021": "Project not found: {project_id}", + "m022": "Project ontology has not been generated; call /ontology/generate first", + "m023": "Graph build is already in progress; do not resubmit. Pass force: true to force a rebuild.", + "m024": "Extracted text content not found", + "m025": "Ontology definition not found", + "m027": "Task not found: {task_id}", + "m028": "Neo4j is not configured", + "m029": "Neo4j is not configured" + } + }, + "message": { + "graph": { + "m003": "Project deleted: {project_id}", + "m005": "Project reset: {project_id}", + "m030": "Graph deleted: {graph_id}" + } + } }, "progress": { "initGraphService": "Initializing graph build service...", @@ -563,7 +650,286 @@ "getReportInfoFailed": "Failed to get report info: {error}", "enterStep": "Entering Step {step}: {name}", "returnToStep": "Returning to Step {step}: {name}", - "customSimRounds": "Custom simulation rounds: {rounds} rounds" + "customSimRounds": "Custom simulation rounds: {rounds} rounds", + "report_agent": { + "m001": "LLM response: {response}..." + }, + "zep_tools": { + "m001": "ZepToolsService initialized", + "m002": "Zep {operation_name} hit rate limit (429); waiting {wait}s before retry (attempt {attempt}/{max_retries})...", + "m003": "Zep {operation_name} attempt {attempt} failed: {str}; retrying in {wait}s...", + "m004": "Zep {operation_name} still failing after {max_retries} attempts: {str}", + "m005": "Graph search: graph_id={graph_id}, query={query}...", + "m006": "Search complete: found {len} relevant facts", + "m007": "Zep Search API failed; falling back to local search: {str}", + "m008": "Using local search: query={query}...", + "m009": "Local search complete: found {len} relevant facts", + "m010": "Local search failed: {str}", + "m011": "Fetching all nodes for graph {graph_id}...", + "m012": "Fetched {len} nodes", + "m013": "Fetching all edges for graph {graph_id}...", + "m014": "Fetched {len} edges", + "m015": "Fetching node detail: {node_uuid}...", + "m016": "Failed to fetch node detail: {str}", + "m017": "Fetching edges related to node {node_uuid}...", + "m018": "Found {len} edges related to the node", + "m019": "Failed to fetch node edges: {str}", + "m020": "Fetching entities of type {entity_type}...", + "m021": "Found {len} entities of type {entity_type}", + "m022": "Fetching relationship summary for entity {entity_name}...", + "m023": "Fetching statistics for graph {graph_id}...", + "m024": "Fetching simulation context: {simulation_requirement}...", + "m025": "InsightForge deep retrieval: {query}...", + "m026": "Generated {len} sub-queries", + "m027": "Failed to fetch node {uuid}: {e}", + "m028": "InsightForge complete: {result} facts, {result_2} entities, {result_3} relationships", + "m029": "Failed to generate sub-queries: {str}; using default sub-queries", + "m030": "PanoramaSearch broad retrieval: {query}...", + "m031": "PanoramaSearch complete: {result} active, {result_2} historical", + "m032": "QuickSearch simple retrieval: {query}...", + "m033": "QuickSearch complete: {result} results", + "m034": "InterviewAgents deep interview (real API): {interview_requirement}...", + "m035": "Profile file for simulation {simulation_id} not found", + "m036": "Loaded {len} agent profiles", + "m037": "Selected {len} agents for interview: {selected_indices}", + "m038": "Generated {len} interview questions", + "m039": "Calling batch interview API (dual platform): {len} agents", + "m040": "Interview API returned: {api_result} results, success={api_result_2}", + "m041": "Interview API returned failure: {error_msg}", + "m042": "Interview API call failed (environment not running?): {e}", + "m043": "Interview API call raised: {e}", + "m044": "InterviewAgents complete: interviewed {result} agents (dual platform)", + "m045": "Loaded {len} profiles from reddit_profiles.json", + "m046": "Failed to read reddit_profiles.json: {e}", + "m047": "Loaded {len} profiles from twitter_profiles.csv", + "m048": "Failed to read twitter_profiles.csv: {e}", + "m049": "LLM agent selection failed; using default selection: {e}", + "m050": "Failed to generate interview questions: {e}", + "m051": "Failed to generate interview summary: {e}" + }, + "simulation_runner": { + "m001": "Failed to load run state: {str}", + "m002": "Round count truncated: {original_rounds} -> {total_rounds} (max_rounds={max_rounds})", + "m003": "Graph memory updates enabled: simulation_id={simulation_id}, graph_id={graph_id}", + "m004": "Failed to create graph memory updater: {e}", + "m005": "Simulation started: {simulation_id}, pid={process}, platform={platform}", + "m006": "Simulation complete: {simulation_id}", + "m007": "Simulation failed: {simulation_id}, error={state}", + "m008": "Monitor thread error: {simulation_id}, error={str}", + "m009": "Stopped graph memory updates: simulation_id={simulation_id}", + "m010": "Failed to stop graph memory updater: {e}", + "m011": "Twitter simulation complete: {state}, total_rounds={action_data}, total_actions={action_data_2}", + "m012": "Reddit simulation complete: {state}, total_rounds={action_data}, total_actions={action_data_2}", + "m013": "All-platform simulation complete: {state}", + "m014": "Failed to read action log: {log_path}, error={e}", + "m015": "Terminating process tree (Windows): simulation={simulation_id}, pid={process}", + "m016": "Process did not respond; force-terminating: {simulation_id}", + "m017": "taskkill failed; falling back to terminate: {e}", + "m018": "Terminating process group (Unix): simulation={simulation_id}, pgid={pgid}", + "m019": "Process group did not respond to SIGTERM; force-terminating: {simulation_id}", + "m020": "Failed to terminate process group: {simulation_id}, error={e}", + "m021": "Stopped graph memory updates: simulation_id={simulation_id}", + "m022": "Failed to stop graph memory updater: {e}", + "m023": "Simulation stopped: {simulation_id}", + "m024": "Simulation logs cleaned: {simulation_id}, files removed: {cleaned_files}", + "m025": "Cleaning up all simulation processes...", + "m026": "Failed to stop graph memory updater: {e}", + "m027": "Terminating simulation process: {simulation_id}, pid={process}", + "m028": "Attempting to update state.json: {state_file}", + "m029": "Updated state.json status to stopped: {simulation_id}", + "m030": "state.json does not exist: {state_file}", + "m031": "Failed to update state.json: {simulation_id}, error={state_err}", + "m032": "Failed to clean up process: {simulation_id}, error={e}", + "m033": "Simulation process cleanup complete", + "m034": "Received signal {signum}; starting cleanup...", + "m035": "Cannot register signal handler (not on the main thread); using atexit only", + "m036": "Sending interview command: simulation_id={simulation_id}, agent_id={agent_id}, platform={platform}", + "m037": "Sending batch interview command: simulation_id={simulation_id}, count={len}, platform={platform}", + "m038": "Sending global interview command: simulation_id={simulation_id}, agent_count={len}, platform={platform}", + "m039": "Sending environment-close command: simulation_id={simulation_id}", + "m040": "Failed to read interview history ({platform_name}): {e}" + }, + "profile_generator": { + "m001": "Skipping Zep retrieval: graph_id is not set", + "m002": "Zep edge search failed on attempt {attempt}: {str}; retrying...", + "m003": "Zep edge search still failing after {max_retries} attempts: {e}", + "m004": "Zep node search failed on attempt {attempt}: {str}; retrying...", + "m005": "Zep node search still failing after {max_retries} attempts: {e}", + "m006": "Zep hybrid retrieval complete: {entity_name}, fetched {len} facts and {len_2} related nodes", + "m007": "Zep retrieval timed out ({entity_name})", + "m008": "Zep retrieval failed ({entity_name}): {e}", + "m009": "LLM output truncated (attempt {attempt}); attempting repair...", + "m010": "JSON parse failed (attempt {attempt}): {str}", + "m011": "LLM call failed (attempt {attempt}): {str}", + "m012": "LLM profile generation failed after {max_attempts} attempts: {last_error}; falling back to rule-based profile", + "m013": "Recovered partial information from a corrupted JSON payload", + "m014": "JSON repair failed; returning fallback structure", + "m015": "Realtime profiles save failed: {e}", + "m016": "Failed to generate profile for entity {entity}: {str}", + "m017": "Starting parallel generation of {total} agent profiles (parallelism: {parallel_count})...", + "m018": "[{current}/{total}] {entity} using fallback profile: {error}", + "m019": "[{current}/{total}] Generated profile: {entity} ({entity_type})", + "m020": "Exception while processing entity {entity}: {str}", + "m021": "Saved {len} Twitter profiles to {file_path} (OASIS CSV format)", + "m022": "Saved {len} Reddit profiles to {file_path} (JSON format with user_id field)", + "m023": "save_profiles_to_json is deprecated; use save_profiles instead" + }, + "simulation_config": { + "m001": "Smart simulation config generation started: simulation_id={simulation_id}, entities={len}", + "m002": "Assigning a suitable poster agent for the initial post...", + "m003": "Simulation config generation complete: {len} agent configurations", + "m004": "LLM output truncated (attempt {attempt})", + "m005": "JSON parse failed (attempt {attempt}): {str}", + "m006": "LLM call failed (attempt {attempt}): {str}", + "m007": "Time-config LLM generation failed: {e}; falling back to default config", + "m008": "agents_per_hour_min ({agents_per_hour_min}) exceeds total agent count ({num_entities}); corrected", + "m009": "agents_per_hour_max ({agents_per_hour_max}) exceeds total agent count ({num_entities}); corrected", + "m010": "agents_per_hour_min >= max; corrected to {agents_per_hour_min}", + "m011": "Event-config LLM generation failed: {e}; falling back to default config", + "m012": "No matching agent found for type '{poster_type}'; using the most influential agent", + "m013": "Initial post assignment: poster_type='{poster_type}' -> agent_id={matched_agent_id}", + "m014": "Agent-config batch LLM generation failed: {e}; falling back to rule-based generation" + }, + "zep_graph_memory_updater": { + "m001": "ZepGraphMemoryUpdater initialized: graph_id={graph_id}, batch_size={self}", + "m002": "ZepGraphMemoryUpdater started: graph_id={self}", + "m003": "ZepGraphMemoryUpdater stopped: graph_id={self}, total_activities={self_2}, batches_sent={self_3}, items_sent={self_4}, failed={self_5}, skipped={self_6}", + "m004": "Activity added to Zep queue: {activity} - {activity_2}", + "m005": "Worker loop error: {e}", + "m006": "Successfully batched {len} {display_name} activities to graph {self}", + "m007": "Batch content preview: {combined_text}...", + "m008": "Batch send to Zep failed (attempt {attempt}/{self}): {e}", + "m009": "Batch send to Zep failed after {self} retries: {e}", + "m010": "Sending {len} remaining {display_name} activities", + "m011": "Created graph memory updater: simulation_id={simulation_id}, graph_id={graph_id}", + "m012": "Stopped graph memory updater: simulation_id={simulation_id}", + "m013": "Failed to stop updater: simulation_id={simulation_id}, error={e}", + "m014": "Stopped all graph memory updaters" + }, + "zep_entity_reader": { + "m001": "Zep {operation_name} attempt {attempt} failed: {str}; retrying in {delay}s...", + "m002": "Zep {operation_name} still failing after {max_retries} attempts: {str}", + "m003": "Fetching all nodes for graph {graph_id}...", + "m004": "Fetched {len} nodes", + "m005": "Fetching all edges for graph {graph_id}...", + "m006": "Fetched {len} edges", + "m007": "Failed to fetch edges for node {node_uuid}: {str}", + "m008": "Filtering entities for graph {graph_id}...", + "m009": "Filtering complete: total nodes {total_count}, matching {len}, entity types: {entity_types_found}", + "m010": "Failed to fetch entity {entity_uuid}: {str}" + }, + "simulation_ipc": { + "m001": "Sending IPC command: {command_type}, command_id={command_id}", + "m002": "Received IPC response: command_id={command_id}, status={response}", + "m003": "Failed to parse response: {e}", + "m004": "Timed out waiting for IPC response: command_id={command_id}", + "m005": "Failed to read command file: {filepath}, {e}" + }, + "simulation_manager": { + "m001": "Created simulation: {simulation_id}, project={project_id}, graph={graph_id}", + "m002": "Simulation prepared: {simulation_id}, entities={state}, profiles={state_2}", + "m003": "Simulation prepare failed: {simulation_id}, error={str}" + }, + "simulation_api": { + "m002": "Fetching graph entities: graph_id={graph_id}, entity_types={entity_types}, enrich={enrich}", + "m003": "Failed to fetch graph entities: {str}", + "m006": "Failed to fetch entity detail: {str}", + "m008": "Failed to fetch entities: {str}", + "m012": "Failed to create simulation: {str}", + "m013": "Checking simulation prepare state: {simulation_id}, status={status}, config_generated={config_generated}", + "m014": "Auto-updating simulation state: {simulation_id} preparing -> ready", + "m015": "Auto state-update failed: {e}", + "m016": "Simulation {simulation_id} check result: prepared (status={status}, config_generated={config_generated})", + "m017": "Simulation {simulation_id} check result: not yet prepared (status={status}, config_generated={config_generated})", + "m020": "Handling /prepare request: simulation_id={simulation_id}, force_regenerate={force_regenerate}", + "m021": "Checking whether simulation {simulation_id} is already prepared...", + "m022": "Check result: is_prepared={is_prepared}, prepare_info={prepare_info}", + "m023": "Simulation {simulation_id} is already prepared; skipping regeneration", + "m024": "Simulation {simulation_id} is not yet prepared; starting prepare task", + "m027": "Synchronously fetching entity count: graph_id={state}", + "m028": "Expected entity count: {filtered_preview}, types: {filtered_preview_2}", + "m029": "Synchronous entity count fetch failed (will retry in background task): {e}", + "m030": "Failed to prepare simulation: {str}", + "m031": "Failed to start prepare task: {str}", + "m034": "Failed to query task status: {str}", + "m036": "Failed to fetch simulation status: {str}", + "m037": "Failed to list simulations: {str}", + "m038": "Failed to look up report for simulation {simulation_id}: {e}", + "m039": "Failed to fetch historical simulations: {str}", + "m040": "Failed to fetch profile: {str}", + "m042": "Failed to read profiles file (it may be in the middle of being written): {e}", + "m043": "Realtime profile fetch failed: {str}", + "m045": "Failed to read config file (it may be in the middle of being written): {e}", + "m046": "Realtime config fetch failed: {str}", + "m048": "Failed to fetch config: {str}", + "m050": "Failed to download config: {str}", + "m053": "Failed to download script: {str}", + "m056": "Failed to generate profile: {str}", + "m062": "Force mode: stopping running simulation {simulation_id}", + "m063": "Warning while stopping simulation: {str}", + "m065": "Force mode: cleaning simulation logs for {simulation_id}", + "m066": "Warning while cleaning logs: {cleanup_result}", + "m067": "Simulation {simulation_id} prepare work completed; resetting status to ready (was {state})", + "m070": "Enabling graph memory updates: simulation_id={simulation_id}, graph_id={graph_id}", + "m071": "Failed to start simulation: {str}", + "m073": "Failed to stop simulation: {str}", + "m074": "Failed to fetch run state: {str}", + "m075": "Failed to fetch detailed status: {str}", + "m076": "Failed to fetch action history: {str}", + "m077": "Failed to fetch timeline: {str}", + "m078": "Failed to fetch agent stats: {str}", + "m079": "Failed to fetch posts: {str}", + "m080": "Failed to fetch comments: {str}", + "m087": "Interview failed: {str}", + "m096": "Batch interview failed: {str}", + "m102": "Global interview failed: {str}", + "m104": "Failed to fetch interview history: {str}", + "m106": "Failed to fetch environment status: {str}", + "m108": "Failed to close environment: {str}" + }, + "report_api": { + "m001": "Report generation failed: {str}", + "m002": "Failed to start report generation task: {str}", + "m003": "Failed to query task status: {str}", + "m004": "Failed to fetch report: {str}", + "m005": "Failed to fetch report: {str}", + "m006": "Failed to list reports: {str}", + "m007": "Failed to download report: {str}", + "m008": "Failed to delete report: {str}", + "m009": "Chat failed: {str}", + "m010": "Failed to fetch report progress: {str}", + "m011": "Failed to fetch section list: {str}", + "m012": "Failed to fetch section content: {str}", + "m013": "Failed to check report status: {str}", + "m014": "Failed to fetch agent log: {str}", + "m015": "Failed to fetch agent log: {str}", + "m016": "Failed to fetch console log: {str}", + "m017": "Failed to fetch console log: {str}", + "m018": "Graph search failed: {str}", + "m019": "Failed to fetch graph stats: {str}" + }, + "graph_api": { + "m006": "=== Ontology generation started ===", + "m007": "Project name: {project_name}", + "m008": "Simulation requirement: {simulation_requirement}...", + "m011": "Project created: {project}", + "m013": "Text extraction complete; total {len} characters", + "m014": "Calling LLM to generate ontology...", + "m015": "Ontology generation complete: {entity_count} entity types, {edge_count} relation types", + "m016": "=== Ontology generation finished === project_id: {project}", + "m017": "=== Graph build started ===", + "m018": "Configuration error: {errors}", + "m019": "Request parameters: project_id={project_id}", + "m026": "Created graph build task: task_id={task_id}, project_id={project_id}" + }, + "bootstrap": { + "m001": "MiroFish backend starting...", + "m002": "Registered simulation process cleanup hook", + "m003": "Request: {request} {request_2}", + "m004": "Request body: {request}", + "m005": "Response: {response}", + "m006": "MiroFish backend started" + } }, "report": { "taskStarted": "Report generation task started", diff --git a/locales/zh.json b/locales/zh.json index cd747e2f..b283cf0b 100644 --- a/locales/zh.json +++ b/locales/zh.json @@ -75,7 +75,13 @@ "layoutGraph": "图谱", "layoutSplit": "双栏", "layoutWorkbench": "工作台", - "stepNames": ["图谱构建", "环境搭建", "开始模拟", "报告生成", "深度互动"] + "stepNames": [ + "图谱构建", + "环境搭建", + "开始模拟", + "报告生成", + "深度互动" + ] }, "step1": { "ontologyGeneration": "本体生成", @@ -388,7 +394,88 @@ "envRunning": "环境正在运行,可以接收Interview命令", "envNotRunningShort": "环境未运行或已关闭", "requireGraphIdAndQuery": "请提供 graph_id 和 query", - "initReportAgent": "初始化Report Agent..." + "initReportAgent": "初始化Report Agent...", + "error": { + "simulation": { + "m001": "NEO4J未配置", + "m004": "NEO4J未配置", + "m005": "实体不存在: {entity_uuid}", + "m007": "NEO4J未配置", + "m009": "请提供 project_id", + "m010": "项目不存在: {project_id}", + "m011": "项目尚未构建图谱,请先调用 /api/graph/build", + "m018": "请提供 simulation_id", + "m019": "模拟不存在: {simulation_id}", + "m025": "项目不存在: {state}", + "m026": "项目缺少模拟需求描述 (simulation_requirement)", + "m032": "请提供 task_id 或 simulation_id", + "m033": "任务不存在: {task_id}", + "m035": "模拟不存在: {simulation_id}", + "m041": "模拟不存在: {simulation_id}", + "m044": "模拟不存在: {simulation_id}", + "m047": "模拟配置不存在,请先调用 /prepare 接口", + "m049": "配置文件不存在,请先调用 /prepare 接口", + "m051": "未知脚本: {script_name},可选: {allowed_scripts}", + "m052": "脚本文件不存在: {script_name}", + "m054": "请提供 graph_id", + "m055": "没有找到符合条件的实体", + "m057": "请提供 simulation_id", + "m058": "max_rounds 必须是正整数", + "m059": "max_rounds 必须是有效的整数", + "m060": "无效的平台类型: {platform},可选: twitter/reddit/parallel", + "m061": "模拟不存在: {simulation_id}", + "m064": "模拟正在运行中,请先调用 /stop 接口停止,或使用 force=true 强制重新开始", + "m068": "模拟未准备好,当前状态: {state},请先调用 /prepare 接口", + "m069": "启用图谱记忆更新需要有效的 graph_id,请确保项目已构建图谱", + "m072": "请提供 simulation_id", + "m081": "请提供 simulation_id", + "m082": "请提供 agent_id", + "m083": "请提供 prompt(采访问题)", + "m084": "platform 参数只能是 'twitter' 或 'reddit'", + "m085": "模拟环境未运行或已关闭。请确保模拟已完成并进入等待命令模式。", + "m086": "等待Interview响应超时: {str}", + "m088": "请提供 simulation_id", + "m089": "请提供 interviews(采访列表)", + "m090": "platform 参数只能是 'twitter' 或 'reddit'", + "m091": "采访列表第{i}项缺少 agent_id", + "m092": "采访列表第{i}项缺少 prompt", + "m093": "采访列表第{i}项的platform只能是 'twitter' 或 'reddit'", + "m094": "模拟环境未运行或已关闭。请确保模拟已完成并进入等待命令模式。", + "m095": "等待批量Interview响应超时: {str}", + "m097": "请提供 simulation_id", + "m098": "请提供 prompt(采访问题)", + "m099": "platform 参数只能是 'twitter' 或 'reddit'", + "m100": "模拟环境未运行或已关闭。请确保模拟已完成并进入等待命令模式。", + "m101": "等待全局Interview响应超时: {str}", + "m103": "请提供 simulation_id", + "m105": "请提供 simulation_id", + "m107": "请提供 simulation_id" + }, + "graph": { + "m001": "项目不存在: {project_id}", + "m002": "项目不存在或删除失败: {project_id}", + "m004": "项目不存在: {project_id}", + "m009": "请提供模拟需求描述 (simulation_requirement)", + "m010": "请至少上传一个文档文件", + "m012": "没有成功处理任何文档,请检查文件格式", + "m020": "请提供 project_id", + "m021": "项目不存在: {project_id}", + "m022": "项目尚未生成本体,请先调用 /ontology/generate", + "m023": "图谱正在构建中,请勿重复提交。如需强制重建,请添加 force: true", + "m024": "未找到提取的文本内容", + "m025": "未找到本体定义", + "m027": "任务不存在: {task_id}", + "m028": "NEO4J未配置", + "m029": "NEO4J未配置" + } + }, + "message": { + "graph": { + "m003": "项目已删除: {project_id}", + "m005": "项目已重置: {project_id}", + "m030": "图谱已删除: {graph_id}" + } + } }, "progress": { "initGraphService": "初始化图谱构建服务...", @@ -563,7 +650,286 @@ "getReportInfoFailed": "获取报告信息失败: {error}", "enterStep": "进入 Step {step}: {name}", "returnToStep": "返回 Step {step}: {name}", - "customSimRounds": "自定义模拟轮数: {rounds} 轮" + "customSimRounds": "自定义模拟轮数: {rounds} 轮", + "report_agent": { + "m001": "LLM响应: {response}..." + }, + "zep_tools": { + "m001": "ZepToolsService 初始化完成", + "m002": "Zep {operation_name} 触发限速 (429), 等待 {wait} 秒后重试 (第 {attempt}/{max_retries} 次)...", + "m003": "Zep {operation_name} 第 {attempt} 次尝试失败: {str}, {wait}秒后重试...", + "m004": "Zep {operation_name} 在 {max_retries} 次尝试后仍失败: {str}", + "m005": "图谱搜索: graph_id={graph_id}, query={query}...", + "m006": "搜索完成: 找到 {len} 条相关事实", + "m007": "Zep Search API失败,降级为本地搜索: {str}", + "m008": "使用本地搜索: query={query}...", + "m009": "本地搜索完成: 找到 {len} 条相关事实", + "m010": "本地搜索失败: {str}", + "m011": "获取图谱 {graph_id} 的所有节点...", + "m012": "获取到 {len} 个节点", + "m013": "获取图谱 {graph_id} 的所有边...", + "m014": "获取到 {len} 条边", + "m015": "获取节点详情: {node_uuid}...", + "m016": "获取节点详情失败: {str}", + "m017": "获取节点 {node_uuid}... 的相关边", + "m018": "找到 {len} 条与节点相关的边", + "m019": "获取节点边失败: {str}", + "m020": "获取类型为 {entity_type} 的实体...", + "m021": "找到 {len} 个 {entity_type} 类型的实体", + "m022": "获取实体 {entity_name} 的关系摘要...", + "m023": "获取图谱 {graph_id} 的统计信息...", + "m024": "获取模拟上下文: {simulation_requirement}...", + "m025": "InsightForge 深度洞察检索: {query}...", + "m026": "生成 {len} 个子问题", + "m027": "获取节点 {uuid} 失败: {e}", + "m028": "InsightForge完成: {result}条事实, {result_2}个实体, {result_3}条关系", + "m029": "生成子问题失败: {str},使用默认子问题", + "m030": "PanoramaSearch 广度搜索: {query}...", + "m031": "PanoramaSearch完成: {result}条有效, {result_2}条历史", + "m032": "QuickSearch 简单搜索: {query}...", + "m033": "QuickSearch完成: {result}条结果", + "m034": "InterviewAgents 深度采访(真实API): {interview_requirement}...", + "m035": "未找到模拟 {simulation_id} 的人设文件", + "m036": "加载到 {len} 个Agent人设", + "m037": "选择了 {len} 个Agent进行采访: {selected_indices}", + "m038": "生成了 {len} 个采访问题", + "m039": "调用批量采访API(双平台): {len} 个Agent", + "m040": "采访API返回: {api_result} 个结果, success={api_result_2}", + "m041": "采访API返回失败: {error_msg}", + "m042": "采访API调用失败(环境未运行?): {e}", + "m043": "采访API调用异常: {e}", + "m044": "InterviewAgents完成: 采访了 {result} 个Agent(双平台)", + "m045": "从 reddit_profiles.json 加载了 {len} 个人设", + "m046": "读取 reddit_profiles.json 失败: {e}", + "m047": "从 twitter_profiles.csv 加载了 {len} 个人设", + "m048": "读取 twitter_profiles.csv 失败: {e}", + "m049": "LLM选择Agent失败,使用默认选择: {e}", + "m050": "生成采访问题失败: {e}", + "m051": "生成采访摘要失败: {e}" + }, + "simulation_runner": { + "m001": "加载运行状态失败: {str}", + "m002": "轮数已截断: {original_rounds} -> {total_rounds} (max_rounds={max_rounds})", + "m003": "已启用图谱记忆更新: simulation_id={simulation_id}, graph_id={graph_id}", + "m004": "创建图谱记忆更新器失败: {e}", + "m005": "模拟启动成功: {simulation_id}, pid={process}, platform={platform}", + "m006": "模拟完成: {simulation_id}", + "m007": "模拟失败: {simulation_id}, error={state}", + "m008": "监控线程异常: {simulation_id}, error={str}", + "m009": "已停止图谱记忆更新: simulation_id={simulation_id}", + "m010": "停止图谱记忆更新器失败: {e}", + "m011": "Twitter 模拟已完成: {state}, total_rounds={action_data}, total_actions={action_data_2}", + "m012": "Reddit 模拟已完成: {state}, total_rounds={action_data}, total_actions={action_data_2}", + "m013": "所有平台模拟已完成: {state}", + "m014": "读取动作日志失败: {log_path}, error={e}", + "m015": "终止进程树 (Windows): simulation={simulation_id}, pid={process}", + "m016": "进程未响应,强制终止: {simulation_id}", + "m017": "taskkill 失败,尝试 terminate: {e}", + "m018": "终止进程组 (Unix): simulation={simulation_id}, pgid={pgid}", + "m019": "进程组未响应 SIGTERM,强制终止: {simulation_id}", + "m020": "终止进程组失败: {simulation_id}, error={e}", + "m021": "已停止图谱记忆更新: simulation_id={simulation_id}", + "m022": "停止图谱记忆更新器失败: {e}", + "m023": "模拟已停止: {simulation_id}", + "m024": "清理模拟日志完成: {simulation_id}, 删除文件: {cleaned_files}", + "m025": "正在清理所有模拟进程...", + "m026": "停止图谱记忆更新器失败: {e}", + "m027": "终止模拟进程: {simulation_id}, pid={process}", + "m028": "尝试更新 state.json: {state_file}", + "m029": "已更新 state.json 状态为 stopped: {simulation_id}", + "m030": "state.json 不存在: {state_file}", + "m031": "更新 state.json 失败: {simulation_id}, error={state_err}", + "m032": "清理进程失败: {simulation_id}, error={e}", + "m033": "模拟进程清理完成", + "m034": "收到信号 {signum},开始清理...", + "m035": "无法注册信号处理器(不在主线程),仅使用 atexit", + "m036": "发送Interview命令: simulation_id={simulation_id}, agent_id={agent_id}, platform={platform}", + "m037": "发送批量Interview命令: simulation_id={simulation_id}, count={len}, platform={platform}", + "m038": "发送全局Interview命令: simulation_id={simulation_id}, agent_count={len}, platform={platform}", + "m039": "发送关闭环境命令: simulation_id={simulation_id}", + "m040": "读取Interview历史失败 ({platform_name}): {e}" + }, + "profile_generator": { + "m001": "跳过Zep检索:未设置graph_id", + "m002": "Zep边搜索第 {attempt} 次失败: {str}, 重试中...", + "m003": "Zep边搜索在 {max_retries} 次尝试后仍失败: {e}", + "m004": "Zep节点搜索第 {attempt} 次失败: {str}, 重试中...", + "m005": "Zep节点搜索在 {max_retries} 次尝试后仍失败: {e}", + "m006": "Zep混合检索完成: {entity_name}, 获取 {len} 条事实, {len_2} 个相关节点", + "m007": "Zep检索超时 ({entity_name})", + "m008": "Zep检索失败 ({entity_name}): {e}", + "m009": "LLM输出被截断 (attempt {attempt}), 尝试修复...", + "m010": "JSON解析失败 (attempt {attempt}): {str}", + "m011": "LLM调用失败 (attempt {attempt}): {str}", + "m012": "LLM生成人设失败({max_attempts}次尝试): {last_error}, 使用规则生成", + "m013": "从损坏的JSON中提取了部分信息", + "m014": "JSON修复失败,返回基础结构", + "m015": "实时保存 profiles 失败: {e}", + "m016": "生成实体 {entity} 的人设失败: {str}", + "m017": "开始并行生成 {total} 个Agent人设(并行数: {parallel_count})...", + "m018": "[{current}/{total}] {entity} 使用备用人设: {error}", + "m019": "[{current}/{total}] 成功生成人设: {entity} ({entity_type})", + "m020": "处理实体 {entity} 时发生异常: {str}", + "m021": "已保存 {len} 个Twitter Profile到 {file_path} (OASIS CSV格式)", + "m022": "已保存 {len} 个Reddit Profile到 {file_path} (JSON格式,包含user_id字段)", + "m023": "save_profiles_to_json已废弃,请使用save_profiles方法" + }, + "simulation_config": { + "m001": "开始智能生成模拟配置: simulation_id={simulation_id}, 实体数={len}", + "m002": "为初始帖子分配合适的发布者 Agent...", + "m003": "模拟配置生成完成: {len} 个Agent配置", + "m004": "LLM输出被截断 (attempt {attempt})", + "m005": "JSON解析失败 (attempt {attempt}): {str}", + "m006": "LLM调用失败 (attempt {attempt}): {str}", + "m007": "时间配置LLM生成失败: {e}, 使用默认配置", + "m008": "agents_per_hour_min ({agents_per_hour_min}) 超过总Agent数 ({num_entities}),已修正", + "m009": "agents_per_hour_max ({agents_per_hour_max}) 超过总Agent数 ({num_entities}),已修正", + "m010": "agents_per_hour_min >= max,已修正为 {agents_per_hour_min}", + "m011": "事件配置LLM生成失败: {e}, 使用默认配置", + "m012": "未找到类型 '{poster_type}' 的匹配 Agent,使用影响力最高的 Agent", + "m013": "初始帖子分配: poster_type='{poster_type}' -> agent_id={matched_agent_id}", + "m014": "Agent配置批次LLM生成失败: {e}, 使用规则生成" + }, + "zep_graph_memory_updater": { + "m001": "ZepGraphMemoryUpdater 初始化完成: graph_id={graph_id}, batch_size={self}", + "m002": "ZepGraphMemoryUpdater 已启动: graph_id={self}", + "m003": "ZepGraphMemoryUpdater 已停止: graph_id={self}, total_activities={self_2}, batches_sent={self_3}, items_sent={self_4}, failed={self_5}, skipped={self_6}", + "m004": "添加活动到Zep队列: {activity} - {activity_2}", + "m005": "工作循环异常: {e}", + "m006": "成功批量发送 {len} 条{display_name}活动到图谱 {self}", + "m007": "批量内容预览: {combined_text}...", + "m008": "批量发送到Zep失败 (尝试 {attempt}/{self}): {e}", + "m009": "批量发送到Zep失败,已重试{self}次: {e}", + "m010": "发送{display_name}平台剩余的 {len} 条活动", + "m011": "创建图谱记忆更新器: simulation_id={simulation_id}, graph_id={graph_id}", + "m012": "已停止图谱记忆更新器: simulation_id={simulation_id}", + "m013": "停止更新器失败: simulation_id={simulation_id}, error={e}", + "m014": "已停止所有图谱记忆更新器" + }, + "zep_entity_reader": { + "m001": "Zep {operation_name} 第 {attempt} 次尝试失败: {str}, {delay}秒后重试...", + "m002": "Zep {operation_name} 在 {max_retries} 次尝试后仍失败: {str}", + "m003": "获取图谱 {graph_id} 的所有节点...", + "m004": "共获取 {len} 个节点", + "m005": "获取图谱 {graph_id} 的所有边...", + "m006": "共获取 {len} 条边", + "m007": "获取节点 {node_uuid} 的边失败: {str}", + "m008": "开始筛选图谱 {graph_id} 的实体...", + "m009": "筛选完成: 总节点 {total_count}, 符合条件 {len}, 实体类型: {entity_types_found}", + "m010": "获取实体 {entity_uuid} 失败: {str}" + }, + "simulation_ipc": { + "m001": "发送IPC命令: {command_type}, command_id={command_id}", + "m002": "收到IPC响应: command_id={command_id}, status={response}", + "m003": "解析响应失败: {e}", + "m004": "等待IPC响应超时: command_id={command_id}", + "m005": "读取命令文件失败: {filepath}, {e}" + }, + "simulation_manager": { + "m001": "创建模拟: {simulation_id}, project={project_id}, graph={graph_id}", + "m002": "模拟准备完成: {simulation_id}, entities={state}, profiles={state_2}", + "m003": "模拟准备失败: {simulation_id}, error={str}" + }, + "simulation_api": { + "m002": "获取图谱实体: graph_id={graph_id}, entity_types={entity_types}, enrich={enrich}", + "m003": "获取图谱实体失败: {str}", + "m006": "获取实体详情失败: {str}", + "m008": "获取实体失败: {str}", + "m012": "创建模拟失败: {str}", + "m013": "检测模拟准备状态: {simulation_id}, status={status}, config_generated={config_generated}", + "m014": "自动更新模拟状态: {simulation_id} preparing -> ready", + "m015": "自动更新状态失败: {e}", + "m016": "模拟 {simulation_id} 检测结果: 已准备完成 (status={status}, config_generated={config_generated})", + "m017": "模拟 {simulation_id} 检测结果: 未准备完成 (status={status}, config_generated={config_generated})", + "m020": "开始处理 /prepare 请求: simulation_id={simulation_id}, force_regenerate={force_regenerate}", + "m021": "检查模拟 {simulation_id} 是否已准备完成...", + "m022": "检查结果: is_prepared={is_prepared}, prepare_info={prepare_info}", + "m023": "模拟 {simulation_id} 已准备完成,跳过重复生成", + "m024": "模拟 {simulation_id} 未准备完成,将启动准备任务", + "m027": "同步获取实体数量: graph_id={state}", + "m028": "预期实体数量: {filtered_preview}, 类型: {filtered_preview_2}", + "m029": "同步获取实体数量失败(将在后台任务中重试): {e}", + "m030": "准备模拟失败: {str}", + "m031": "启动准备任务失败: {str}", + "m034": "查询任务状态失败: {str}", + "m036": "获取模拟状态失败: {str}", + "m037": "列出模拟失败: {str}", + "m038": "查找 simulation {simulation_id} 的 report 失败: {e}", + "m039": "获取历史模拟失败: {str}", + "m040": "获取Profile失败: {str}", + "m042": "读取 profiles 文件失败(可能正在写入中): {e}", + "m043": "实时获取Profile失败: {str}", + "m045": "读取 config 文件失败(可能正在写入中): {e}", + "m046": "实时获取Config失败: {str}", + "m048": "获取配置失败: {str}", + "m050": "下载配置失败: {str}", + "m053": "下载脚本失败: {str}", + "m056": "生成Profile失败: {str}", + "m062": "强制模式:停止运行中的模拟 {simulation_id}", + "m063": "停止模拟时出现警告: {str}", + "m065": "强制模式:清理模拟日志 {simulation_id}", + "m066": "清理日志时出现警告: {cleanup_result}", + "m067": "模拟 {simulation_id} 准备工作已完成,重置状态为 ready(原状态: {state})", + "m070": "启用图谱记忆更新: simulation_id={simulation_id}, graph_id={graph_id}", + "m071": "启动模拟失败: {str}", + "m073": "停止模拟失败: {str}", + "m074": "获取运行状态失败: {str}", + "m075": "获取详细状态失败: {str}", + "m076": "获取动作历史失败: {str}", + "m077": "获取时间线失败: {str}", + "m078": "获取Agent统计失败: {str}", + "m079": "获取帖子失败: {str}", + "m080": "获取评论失败: {str}", + "m087": "Interview失败: {str}", + "m096": "批量Interview失败: {str}", + "m102": "全局Interview失败: {str}", + "m104": "获取Interview历史失败: {str}", + "m106": "获取环境状态失败: {str}", + "m108": "关闭环境失败: {str}" + }, + "report_api": { + "m001": "报告生成失败: {str}", + "m002": "启动报告生成任务失败: {str}", + "m003": "查询任务状态失败: {str}", + "m004": "获取报告失败: {str}", + "m005": "获取报告失败: {str}", + "m006": "列出报告失败: {str}", + "m007": "下载报告失败: {str}", + "m008": "删除报告失败: {str}", + "m009": "对话失败: {str}", + "m010": "获取报告进度失败: {str}", + "m011": "获取章节列表失败: {str}", + "m012": "获取章节内容失败: {str}", + "m013": "检查报告状态失败: {str}", + "m014": "获取Agent日志失败: {str}", + "m015": "获取Agent日志失败: {str}", + "m016": "获取控制台日志失败: {str}", + "m017": "获取控制台日志失败: {str}", + "m018": "图谱搜索失败: {str}", + "m019": "获取图谱统计失败: {str}" + }, + "graph_api": { + "m006": "=== 开始生成本体定义 ===", + "m007": "项目名称: {project_name}", + "m008": "模拟需求: {simulation_requirement}...", + "m011": "创建项目: {project}", + "m013": "文本提取完成,共 {len} 字符", + "m014": "调用 LLM 生成本体定义...", + "m015": "本体生成完成: {entity_count} 个实体类型, {edge_count} 个关系类型", + "m016": "=== 本体生成完成 === 项目ID: {project}", + "m017": "=== 开始构建图谱 ===", + "m018": "配置错误: {errors}", + "m019": "请求参数: project_id={project_id}", + "m026": "创建图谱构建任务: task_id={task_id}, project_id={project_id}" + }, + "bootstrap": { + "m001": "MiroFish Backend 启动中...", + "m002": "已注册模拟进程清理函数", + "m003": "请求: {request} {request_2}", + "m004": "请求体: {request}", + "m005": "响应: {response}", + "m006": "MiroFish Backend 启动完成" + } }, "report": { "taskStarted": "报告生成任务开始", diff --git a/scripts/_apply_translations.py b/scripts/_apply_translations.py new file mode 100644 index 00000000..56973ead --- /dev/null +++ b/scripts/_apply_translations.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 +"""Overwrite English values in ``locales/en.json`` from a translation map. + +Usage:: + + python scripts/_apply_translations.py path/to/translations.json + +The translation file is a flat JSON object ``{"a.b.c": "English text", ...}``. +Each key path must already exist in ``en.json``; missing keys raise. +""" + +from __future__ import annotations + +import json +import sys +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parent.parent + + +def _set_path(tree, path: str, value: str) -> None: + parts = path.split(".") + cursor = tree + for part in parts[:-1]: + if part not in cursor or not isinstance(cursor[part], dict): + raise KeyError(f"missing parent path: {path}") + cursor = cursor[part] + if parts[-1] not in cursor: + raise KeyError(f"missing leaf key: {path}") + cursor[parts[-1]] = value + + +def main() -> int: + if len(sys.argv) != 2: + print("usage: _apply_translations.py ", file=sys.stderr) + return 2 + blob_path = Path(sys.argv[1]) + translations = json.loads(blob_path.read_text(encoding="utf-8")) + en_path = REPO_ROOT / "locales" / "en.json" + en = json.loads(en_path.read_text(encoding="utf-8")) + for key, value in translations.items(): + _set_path(en, key, value) + en_path.write_text(json.dumps(en, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") + print(f"applied {len(translations)} translations to {en_path}") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/_codemod_i18n.py b/scripts/_codemod_i18n.py new file mode 100644 index 00000000..f5df4f45 --- /dev/null +++ b/scripts/_codemod_i18n.py @@ -0,0 +1,303 @@ +#!/usr/bin/env python3 +"""One-shot codemod for ticket #6. + +For a single in-scope backend Python file, walk every Chinese-bearing +``logger.{info,warning,error,debug,exception}(...)`` call and every +``jsonify({"error|message": "..."})`` call, replace the literal with a +``t(".", **kwargs)`` lookup, and emit the matching +zh-side locale entries (en-side stubs use the same Chinese text and are +translated manually afterwards). + +Usage:: + + python scripts/_codemod_i18n.py --log-namespace log. [--api-error-namespace api.error.] [--api-message-namespace api.message.] + +The script: + * Modifies the source file in place. + * Writes a JSON blob of new locale entries to stdout:: + + {"keys": {"log..": "原文", ...}} + +This blob is merged into both ``locales/en.json`` and ``locales/zh.json`` by a +separate pass (English values are translated by hand afterward). +""" + +from __future__ import annotations + +import argparse +import ast +import io +import json +import re +import sys +import tokenize +from pathlib import Path +from typing import Iterable + +CHINESE_RE = re.compile(r"[一-鿿]") +PLACEHOLDER_RE = re.compile(r"\{(\w+)\}") +SAFE_NAME_RE = re.compile(r"[^a-zA-Z0-9_]") + +DEFAULT_PLACEHOLDER_NAMES = [ + "value", "value2", "value3", "value4", "value5", "value6", "value7", +] + + +def _has_chinese(s: str) -> bool: + return bool(CHINESE_RE.search(s)) + + +def _expr_to_kw(expr: ast.AST, source: str) -> str: + """Pretty-print an expression node back into source text.""" + return ast.unparse(expr) + + +def _primary_name(expr_text: str) -> str | None: + """Extract the leading identifier from a Python expression text, if any.""" + match = re.match(r"^([A-Za-z_][A-Za-z0-9_]*)", expr_text.strip()) + return match.group(1) if match else None + + +def _slugify_expr(expr_text: str, used: set[str]) -> str: + primary = _primary_name(expr_text) + if primary: + base = primary.lower() + else: + base = SAFE_NAME_RE.sub("_", expr_text).strip("_") or "value" + base = re.sub(r"_+", "_", base).lower() + if not base or base[0].isdigit(): + base = f"v_{base}" + return _next_unique(base, used) + + +def _next_unique(name: str, used: set[str]) -> str: + if name not in used: + used.add(name) + return name + i = 2 + while f"{name}_{i}" in used: + i += 1 + final = f"{name}_{i}" + used.add(final) + return final + + +def _flatten_string_arg(node: ast.AST, source: str) -> tuple[str, dict[str, str]] | None: + """Return (template, kwargs) for a string-like argument or None. + + ``template`` uses ``{name}`` placeholders; ``kwargs`` maps each placeholder + name to the source text of its expression. + """ + if isinstance(node, ast.Constant) and isinstance(node.value, str): + return node.value, {} + if isinstance(node, ast.JoinedStr): + used: set[str] = set() + parts: list[str] = [] + kwargs: dict[str, str] = {} + for value in node.values: + if isinstance(value, ast.Constant) and isinstance(value.value, str): + parts.append(value.value) + elif isinstance(value, ast.FormattedValue): + expr_text = _expr_to_kw(value.value, source) + placeholder = _slugify_expr(expr_text, used) + kwargs[placeholder] = expr_text + parts.append("{" + placeholder + "}") + else: + return None + template = "".join(parts) + # Normalise braces inside literal text so {} not coming from a placeholder + # doesn't get misread by t().replace(); literals containing literal { or } + # are rare in this codebase but we'll guard anyway. + for ph_name in kwargs: + if template.count("{" + ph_name + "}") < 1: + return None + return template, kwargs + return None + + +def _slug_from_template(template: str, used: set[str], fallback_index: int) -> str: + """Build a key suffix. + + The naming scheme is ``m`` based on a per-file counter. This keeps + the JSON keys ASCII-only, easy to grep, and stable enough for review. + Semantic renaming can be done in a post-pass for the keys that justify it. + """ + candidate = f"m{fallback_index:03d}" + return _next_unique(candidate, used) + + +def _format_t_call(namespace: str, key: str, kwargs: dict[str, str]) -> str: + full_key = f"{namespace}.{key}" + if not kwargs: + return f't("{full_key}")' + formatted_kwargs = ", ".join(f"{name}={expr}" for name, expr in kwargs.items()) + return f't("{full_key}", {formatted_kwargs})' + + +class Rewriter(ast.NodeVisitor): + def __init__( + self, + source: str, + log_namespace: str, + api_error_namespace: str | None, + api_message_namespace: str | None, + ): + self.source = source + self.log_namespace = log_namespace + self.api_error_namespace = api_error_namespace + self.api_message_namespace = api_message_namespace + # Each replacement is (lineno, original_segment_text, new_text). + # ``lineno`` is the 1-based line on which the original segment starts; + # used as an anchor so multiple identical segments in the file can be + # disambiguated. + self.replacements: list[tuple[int, str, str]] = [] + self.entries: dict[str, str] = {} # full_key -> original chinese template + self.used_keys: set[str] = set() + self.counter = 0 + + def visit_Call(self, node: ast.Call): + if self._is_logger_call(node): + for i, arg in enumerate(node.args): + self._maybe_rewrite_arg(arg, self.log_namespace) + for kw in node.keywords: + if kw.value is not None: + self._maybe_rewrite_arg(kw.value, self.log_namespace) + elif self._is_jsonify_call(node): + for arg in node.args: + if isinstance(arg, ast.Dict): + for key, value in zip(arg.keys, arg.values): + if not isinstance(key, ast.Constant) or not isinstance(key.value, str): + continue + if key.value == "error" and self.api_error_namespace: + self._maybe_rewrite_arg(value, self.api_error_namespace) + elif key.value == "message" and self.api_message_namespace: + self._maybe_rewrite_arg(value, self.api_message_namespace) + self.generic_visit(node) + + @staticmethod + def _is_logger_call(node: ast.Call) -> bool: + func = node.func + return ( + isinstance(func, ast.Attribute) + and func.attr in {"debug", "info", "warning", "error", "exception", "critical"} + and isinstance(func.value, ast.Name) + and func.value.id == "logger" + ) + + @staticmethod + def _is_jsonify_call(node: ast.Call) -> bool: + func = node.func + if isinstance(func, ast.Name) and func.id == "jsonify": + return True + if isinstance(func, ast.Attribute) and func.attr == "jsonify": + return True + return False + + def _maybe_rewrite_arg(self, node: ast.AST, namespace: str) -> None: + flat = _flatten_string_arg(node, self.source) + if flat is None: + return + template, kwargs = flat + if not _has_chinese(template): + return + original_segment = ast.get_source_segment(self.source, node) + if original_segment is None: + return + self.counter += 1 + key = _slug_from_template(template, self.used_keys, self.counter) + full_key = f"{namespace}.{key}" + new_text = _format_t_call(namespace, key, kwargs) + self.replacements.append((node.lineno, original_segment, new_text)) + self.entries[full_key] = template + + +def _apply_replacements(source: str, replacements): + """Apply each (lineno, original_segment, new_text) replacement in order. + + Each replacement is anchored to the line on which its original segment + starts so that identical literals elsewhere in the file are not touched + by accident. + """ + lines = source.splitlines(keepends=True) + line_offsets = [0] + for line in lines: + line_offsets.append(line_offsets[-1] + len(line)) + + # Apply in source order so the search anchor stays correct as offsets shift. + sorted_reps = sorted(replacements, key=lambda r: r[0]) + delta = 0 + for lineno, original, new_text in sorted_reps: + anchor = line_offsets[lineno - 1] + delta + idx = source.find(original, anchor) + if idx == -1: + raise RuntimeError( + f"could not locate original segment on line {lineno}: {original!r}" + ) + source = source[:idx] + new_text + source[idx + len(original):] + delta += len(new_text) - len(original) + return source + + +_API_LOCALE_IMPORT = "from ..utils.locale import t\n" +_SERVICE_LOCALE_IMPORT = "from ..utils.locale import t\n" + + +def _ensure_t_import(source: str, target_path: Path) -> str: + """Add ``from ..utils.locale import t`` when no ``t`` is imported yet.""" + tree = ast.parse(source, filename=str(target_path)) + for node in ast.iter_child_nodes(tree): + if isinstance(node, ast.ImportFrom) and node.module and node.module.endswith("utils.locale"): + for alias in node.names: + if alias.name == "t": + return source # already imports t + # Append ``t`` to the existing import line. + new_names = [alias.name for alias in node.names] + ["t"] + new_line = f"from {'.' * node.level}{node.module} import {', '.join(new_names)}\n" + lines = source.splitlines(keepends=True) + # Preserve original line range; ImportFrom may span multiple lines but + # in this codebase they are always single-line. + start = node.lineno - 1 + end = (node.end_lineno or node.lineno) - 1 + return "".join(lines[:start]) + new_line + "".join(lines[end + 1:]) + # No locale import at all — insert one after the last top-level import. + lines = source.splitlines(keepends=True) + insert_at = 0 + for node in ast.iter_child_nodes(tree): + if isinstance(node, (ast.Import, ast.ImportFrom)): + insert_at = max(insert_at, (node.end_lineno or node.lineno)) + return "".join(lines[:insert_at]) + _SERVICE_LOCALE_IMPORT + "".join(lines[insert_at:]) + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument("path", type=Path) + parser.add_argument("--log-namespace", required=True) + parser.add_argument("--api-error-namespace", default=None) + parser.add_argument("--api-message-namespace", default=None) + parser.add_argument("--dry-run", action="store_true") + args = parser.parse_args(argv) + + source = args.path.read_text(encoding="utf-8") + tree = ast.parse(source, filename=str(args.path)) + rewriter = Rewriter( + source=source, + log_namespace=args.log_namespace, + api_error_namespace=args.api_error_namespace, + api_message_namespace=args.api_message_namespace, + ) + rewriter.visit(tree) + + if not rewriter.replacements: + print(json.dumps({"keys": {}, "modified": False})) + return 0 + + new_source = _apply_replacements(source, rewriter.replacements) + new_source = _ensure_t_import(new_source, args.path) + if not args.dry_run: + args.path.write_text(new_source, encoding="utf-8") + print(json.dumps({"keys": rewriter.entries, "modified": not args.dry_run}, ensure_ascii=False)) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/_merge_locale_keys.py b/scripts/_merge_locale_keys.py new file mode 100644 index 00000000..962366bf --- /dev/null +++ b/scripts/_merge_locale_keys.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 +"""Merge a flat ``{"keys": {"a.b.c": "..."}}`` blob into both locale files. + +Usage:: + + cat blob.json | python scripts/_merge_locale_keys.py + +The blob is the JSON line emitted by ``_codemod_i18n.py``. New keys are +inserted into both ``locales/en.json`` and ``locales/zh.json``. The Chinese +text is preserved verbatim on both sides; the English translations are +applied in a separate manual pass after every codemod run completes. +""" + +from __future__ import annotations + +import json +import sys +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parent.parent +LOCALES_DIR = REPO_ROOT / "locales" + + +def _set_path(tree: dict, key_path: str, value: str) -> bool: + """Insert ``value`` at the dotted ``key_path`` inside ``tree``. + + Returns True when the key is newly added; False when it already existed. + Raises ``ValueError`` if an intermediate segment exists but is not a dict. + """ + parts = key_path.split(".") + cursor = tree + for part in parts[:-1]: + nxt = cursor.get(part) + if nxt is None: + nxt = {} + cursor[part] = nxt + elif not isinstance(nxt, dict): + raise ValueError( + f"cannot insert {key_path}: existing value at '{part}' is not a dict" + ) + cursor = nxt + leaf = parts[-1] + if leaf in cursor: + return False + cursor[leaf] = value + return True + + +def _sort_dict_recursive(tree): + if isinstance(tree, dict): + return {k: _sort_dict_recursive(tree[k]) for k in sorted(tree.keys())} + return tree + + +def main() -> int: + blob = json.loads(sys.stdin.read()) + entries: dict[str, str] = blob.get("keys", {}) + if not entries: + print("no entries", file=sys.stderr) + return 0 + + en_path = LOCALES_DIR / "en.json" + zh_path = LOCALES_DIR / "zh.json" + en = json.loads(en_path.read_text(encoding="utf-8")) + zh = json.loads(zh_path.read_text(encoding="utf-8")) + + # Determine the nested sub-namespace to keep alphabetised + namespaces_touched: set[str] = set() + for full_key in entries: + parts = full_key.split(".") + # Re-sort up to the parent of the leaf so new keys land alphabetically. + namespaces_touched.add(".".join(parts[:-1])) + + added = 0 + for full_key, value in entries.items(): + if _set_path(en, full_key, value): + added += 1 + _set_path(zh, full_key, value) + + # Sort the touched sub-namespaces (and their parents) so diffs are stable. + for ns in namespaces_touched: + for tree in (en, zh): + cursor = tree + parts = ns.split(".") + for part in parts: + if part not in cursor or not isinstance(cursor[part], dict): + cursor = None + break + cursor = cursor[part] + if cursor is None: + continue + sorted_subtree = _sort_dict_recursive(cursor) + cursor.clear() + cursor.update(sorted_subtree) + + en_path.write_text(json.dumps(en, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") + zh_path.write_text(json.dumps(zh, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") + + print(f"added {added} new keys ({len(entries) - added} already present)") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/check_i18n_logs.py b/scripts/check_i18n_logs.py new file mode 100644 index 00000000..c98a3050 --- /dev/null +++ b/scripts/check_i18n_logs.py @@ -0,0 +1,239 @@ +#!/usr/bin/env python3 +"""Verify backend i18n externalization for ticket #6. + +Two checks (both run by default): + +* ``--logs``: scan the in-scope backend modules and report any Chinese + character (U+4E00-U+9FFF) that still appears inside the string-literal + arguments of ``logger.{info,warning,error,debug,exception}(...)`` calls or + inside the ``error`` / ``message`` field values of ``jsonify({...})`` calls. + +* ``--parity``: load every ``*.json`` in ``locales/`` (excluding + ``languages.json``) and verify that the recursive set of key paths is + identical across every file. + +Exit code is 0 when both checks pass and 1 otherwise. Each finding is printed +on its own line as ``:: : ``. Final line is +``OK`` or ``N issues``. + +The script depends only on the Python standard library so it can be invoked +from a clean checkout: ``python scripts/check_i18n_logs.py``. +""" + +from __future__ import annotations + +import argparse +import ast +import json +import re +import sys +from pathlib import Path +from typing import Iterable + +REPO_ROOT = Path(__file__).resolve().parent.parent + +# In-scope backend modules per .kiro/specs/i18n-externalize-backend-logs/design.md. +# ``backend/app/__init__.py`` is also covered to satisfy the ticket's +# repo-wide grep guard, even though it lives outside the listed module set. +SOURCE_FILES = [ + "backend/app/__init__.py", + "backend/app/services/report_agent.py", + "backend/app/services/zep_tools.py", + "backend/app/services/simulation_runner.py", + "backend/app/services/oasis_profile_generator.py", + "backend/app/services/simulation_config_generator.py", + "backend/app/services/zep_graph_memory_updater.py", + "backend/app/services/ontology_generator.py", + "backend/app/services/simulation_manager.py", + "backend/app/services/zep_entity_reader.py", + "backend/app/services/simulation_ipc.py", + "backend/app/services/graph_builder.py", + "backend/app/api/simulation.py", + "backend/app/api/report.py", + "backend/app/api/graph.py", +] + +LOCALES_DIR = REPO_ROOT / "locales" + +LOGGER_METHODS = {"debug", "info", "warning", "error", "exception", "critical"} +JSONIFY_TRANSLATED_FIELDS = {"error", "message"} + +CHINESE_RE = re.compile(r"[一-鿿]") + + +def _has_chinese(text: str) -> bool: + return bool(CHINESE_RE.search(text)) + + +def _string_literal_value(node: ast.AST) -> str | None: + """Return the string value of a literal ``Constant``/``JoinedStr``, else None.""" + if isinstance(node, ast.Constant) and isinstance(node.value, str): + return node.value + if isinstance(node, ast.JoinedStr): + parts: list[str] = [] + for value in node.values: + if isinstance(value, ast.Constant) and isinstance(value.value, str): + parts.append(value.value) + else: + # Conservatively render dynamic interpolation segments as a + # placeholder so that surrounding Chinese text in the static + # parts is still detected. + parts.append("�") + return "".join(parts) + return None + + +def _is_logger_call(node: ast.Call) -> bool: + func = node.func + return ( + isinstance(func, ast.Attribute) + and func.attr in LOGGER_METHODS + and isinstance(func.value, ast.Name) + and func.value.id == "logger" + ) + + +def _is_jsonify_call(node: ast.Call) -> bool: + func = node.func + if isinstance(func, ast.Name) and func.id == "jsonify": + return True + if isinstance(func, ast.Attribute) and func.attr == "jsonify": + return True + return False + + +def _scan_call_for_chinese(node: ast.Call, source_lines: list[str]) -> Iterable[tuple[int, str, str]]: + """Yield (line, reason, snippet) for any Chinese in this call's arguments.""" + if _is_logger_call(node): + for arg in node.args: + text = _string_literal_value(arg) + if text and _has_chinese(text): + yield ( + arg.lineno, + "chinese inside logger call argument", + _snippet(source_lines, arg.lineno), + ) + for kw in node.keywords: + text = _string_literal_value(kw.value) if kw.value is not None else None + if text and _has_chinese(text): + yield ( + kw.value.lineno, + "chinese inside logger call keyword argument", + _snippet(source_lines, kw.value.lineno), + ) + return + + if _is_jsonify_call(node): + for arg in node.args: + yield from _scan_jsonify_arg(arg, source_lines) + + +def _scan_jsonify_arg(arg: ast.AST, source_lines: list[str]) -> Iterable[tuple[int, str, str]]: + """Yield findings for Chinese inside ``error`` or ``message`` dict values.""" + if isinstance(arg, ast.Dict): + for key, value in zip(arg.keys, arg.values): + if not isinstance(key, ast.Constant) or not isinstance(key.value, str): + continue + if key.value not in JSONIFY_TRANSLATED_FIELDS: + continue + text = _string_literal_value(value) + if text and _has_chinese(text): + yield ( + value.lineno, + f"chinese inside jsonify {key.value} field", + _snippet(source_lines, value.lineno), + ) + + +def _snippet(source_lines: list[str], lineno: int) -> str: + if 1 <= lineno <= len(source_lines): + return source_lines[lineno - 1].rstrip() + return "" + + +def check_logs() -> list[str]: + """Return a list of findings (empty when clean).""" + findings: list[str] = [] + for rel_path in SOURCE_FILES: + abs_path = REPO_ROOT / rel_path + if not abs_path.exists(): + findings.append(f"{rel_path}:0: missing in-scope file: not found") + continue + source = abs_path.read_text(encoding="utf-8") + source_lines = source.splitlines() + try: + tree = ast.parse(source, filename=str(abs_path)) + except SyntaxError as exc: + findings.append(f"{rel_path}:{exc.lineno or 0}: syntax error: {exc.msg}") + continue + for node in ast.walk(tree): + if not isinstance(node, ast.Call): + continue + for line, reason, snippet in _scan_call_for_chinese(node, source_lines): + findings.append(f"{rel_path}:{line}: {reason}: {snippet.strip()}") + return findings + + +def _collect_key_paths(obj, prefix: str = "") -> set[str]: + paths: set[str] = set() + if isinstance(obj, dict): + for k, v in obj.items(): + child_prefix = f"{prefix}.{k}" if prefix else k + if isinstance(v, dict): + paths.update(_collect_key_paths(v, child_prefix)) + else: + paths.add(child_prefix) + return paths + + +def check_parity() -> list[str]: + findings: list[str] = [] + locale_files = sorted(p for p in LOCALES_DIR.glob("*.json") if p.name != "languages.json") + if len(locale_files) < 2: + return findings + key_sets: dict[str, set[str]] = {} + for path in locale_files: + try: + data = json.loads(path.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + findings.append(f"{path.relative_to(REPO_ROOT)}:0: invalid JSON: {exc.msg}") + continue + key_sets[path.name] = _collect_key_paths(data) + if len(key_sets) < 2: + return findings + union = set().union(*key_sets.values()) + for path_name, keys in key_sets.items(): + missing = sorted(union - keys) + for key_path in missing: + findings.append(f"locales/{path_name}:0: missing key path: {key_path}") + return findings + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--logs", action="store_true", help="run the source-scan check only") + parser.add_argument("--parity", action="store_true", help="run the locale-parity check only") + args = parser.parse_args(argv) + + run_logs = args.logs or not args.parity + run_parity = args.parity or not args.logs + # If neither flag is set, both default to True (handled above). + + findings: list[str] = [] + if run_logs: + findings.extend(check_logs()) + if run_parity: + findings.extend(check_parity()) + + for finding in findings: + print(finding) + + if findings: + print(f"{len(findings)} issues") + return 1 + print("OK") + return 0 + + +if __name__ == "__main__": + sys.exit(main())