fix(memory): address quality cleanup audit findings

2026-07-03 14:15:20 +02:00 · 2026-04-28 14:29:28 +08:00
parent c7088a8a6e
commit 8e07bfe3c1
9 changed files with 267 additions and 63 deletions
@@ -324,6 +324,35 @@ Memory candidates:
  }
 });

+test("parseWorkspaceMemoryCandidates redacts secrets in extraction rejection log", async () => {
+  const dataHome = await mkdtemp(join(tmpdir(), "wm-extraction-redact-data-"));
+  const previousXdgDataHome = process.env.XDG_DATA_HOME;
+  process.env.XDG_DATA_HOME = dataHome;
+
+  try {
+    const summary = `
+Memory candidates:
+- reference TypeError: bearer sk_test token=tok123 password=pass123 secret=sec123 api_key=key123
+`;
+
+    const items = parseWorkspaceMemoryCandidates(summary);
+
+    assert.equal(items.length, 0);
+    const logPath = join(dataHome, "opencode-working-memory", "extraction-rejections.jsonl");
+    const lines = (await waitForFile(logPath)).trim().split("\n");
+    assert.equal(lines.length, 1);
+    const event = JSON.parse(lines[0]);
+    assert.equal(
+      event.text,
+      "TypeError: bearer [REDACTED] token=[REDACTED] password=[REDACTED] secret=[REDACTED] api_key=[REDACTED]",
+    );
+  } finally {
+    if (previousXdgDataHome === undefined) delete process.env.XDG_DATA_HOME;
+    else process.env.XDG_DATA_HOME = previousXdgDataHome;
+    await rm(dataHome, { recursive: true, force: true });
+  }
+});
+
 test("parseWorkspaceMemoryCandidates rejects exact file count snapshots", () => {
  const summary = `
 Memory candidates:
@@ -30,38 +30,38 @@ function mem(
 }

 export const REAL_WORKSPACE_FIXTURES: Record<string, RealWorkspaceFixtureEntry[]> = {
-  "medical-atlas": [
-    mem("ma_ui_rule", "feedback", "UI 要統一風格：兩個表格都要 scrollable，約 20 rows", "active", "durable UI rule without user preference keyword"),
-    mem("ma_csp_rule", "feedback", "架構師建議中期將 CSP 改為 nonce/hash，而非 'unsafe-inline'", "active", "durable architecture recommendation"),
-    mem("ma_form_rule", "decision", "Form 添加防御性 action/method 屬性，避免 JS 失效時 GET 首頁", "active", "declarative design rule"),
-    mem("ma_logging_rule", "decision", "Cloud Logging filter 需支援多種 log 格式（jsonPayload.event_type, jsonPayload.message, textPayload）", "active", "durable spec using 需支援"),
+  "workspace-alpha": [
+    mem("alpha_ui_rule", "feedback", "UI should have consistent style: both tables scrollable, about 20 rows", "active", "durable UI rule without user preference keyword"),
+    mem("alpha_csp_rule", "feedback", "Architecture recommendation: migrate the content security policy to nonce or hash rules rather than unsafe inline scripts", "active", "durable architecture recommendation"),
+    mem("alpha_form_rule", "decision", "Form uses defensive action and method attributes so the fallback does not navigate to the home page when scripts fail", "active", "declarative design rule"),
+    mem("alpha_logging_rule", "decision", "Cloud logging filter supports multiple log formats: structured event type, structured message, and text payload", "active", "durable declarative logging spec"),
  ],
-  "opencode-record": [
-    mem("or_phase_snapshot", "project", "後端健康改進計劃已完成 Phase 1-4", "superseded", "progress snapshot"),
-    mem("or_test_snapshot", "project", "測試套件：1237 tests pass, 226 suites", "superseded", "test count snapshot"),
-    mem("or_sync_snapshot", "project", "USB 同步：37 個文件（bundles, server, frontend, tests, docs）", "superseded", "file sync snapshot"),
+  "workspace-beta": [
+    mem("beta_phase_snapshot", "project", "Backend health improvement plan completed Phase 1-4", "superseded", "progress snapshot"),
+    mem("beta_test_snapshot", "project", "Test suite: 1237 tests pass, 226 suites", "superseded", "test count snapshot"),
+    mem("beta_sync_snapshot", "project", "External drive synced 37 files including bundles, service, frontend, tests, and docs", "superseded", "file sync snapshot"),
  ],
-  "agent-reports": [
-    mem("ar_plan_decision", "feedback", "架構師建議執行 P3 前先確認有實際需求", "active", "durable plan decision"),
-    mem("ar_reviewer_fallback", "feedback", "`comprehensive-code-reviewer` subagent unreliable; use `phase-verifier` as fallback", "active", "durable workaround rule"),
-    mem("ar_wave_rule", "feedback", "每個 Wave 結束要找 verifier 確認，全部結束找 code review", "active", "durable workflow rule"),
-    mem("ar_remote_headers", "decision", "Remote headers 透過 `requestInit: { headers }` 傳入 `StreamableHTTPClientTransport`", "active", "declarative API rule"),
-    mem("ar_signal_order", "decision", "Graceful process cleanup signal order: SIGINT (300ms) → SIGTERM (700ms) → SIGKILL", "active", "durable process cleanup spec"),
-    mem("ar_ownership", "decision", "`McpRuntimeState` ownership model: CLI owns both runtime and mcpRuntime, dispose order is runtime first", "active", "durable ownership model"),
-    mem("ar_retry_policy", "decision", "Recovery retry policy: only once per tool call, only for transport/session failures", "active", "durable retry policy"),
+  "workspace-gamma": [
+    mem("gamma_need_check", "feedback", "Architecture recommendation: confirm actual demand before executing the later priority phase", "active", "durable plan decision"),
+    mem("gamma_review_fallback", "feedback", "Primary review automation can be unreliable; use phase verification as the fallback", "active", "durable workaround rule"),
+    mem("gamma_wave_rule", "feedback", "Each wave should end with verifier confirmation, and the full implementation should end with code review", "active", "durable workflow rule"),
+    mem("gamma_remote_headers", "decision", "Remote headers are passed through the HTTP transport request initialization headers option", "active", "declarative API rule"),
+    mem("gamma_signal_order", "decision", "Graceful process cleanup signal order: interrupt for 300ms, terminate for 700ms, then kill", "active", "durable process cleanup spec"),
+    mem("gamma_ownership", "decision", "Runtime state ownership model: the command-line entrypoint owns both runtime objects, and disposal order is primary runtime first", "active", "durable ownership model"),
+    mem("gamma_retry_policy", "decision", "Recovery retry policy: only once per tool call, only for transport or session failures", "active", "durable retry policy"),
  ],
-  "pdf-extraction": [
-    mem("pe_user_cycle", "feedback", "User 要求完整的 plan-review-feedback-modify-verify 循環，不是直接執行", "active", "mixed-language user workflow preference"),
-    mem("pe_ollama_batch", "feedback", "Ollama 大批量嵌入需要控制批次大小（20-50）和請求間隔", "active", "durable operational knowledge"),
-    mem("pe_option_b", "decision", "Phase 2 Fix 採用 Option B：multi-profile search grouping", "active", "design decision using 採用"),
-    mem("pe_single_source", "decision", "MCP source 維持單一 `book`，書籍身份在 source ID", "active", "design constraint using 維持"),
-    mem("pe_endpoint", "decision", "Ollama endpoint is `/api/embed` (not `/api/embeddings`) with `\"input\"` field", "active", "declarative API fact"),
-    mem("pe_filter_pipeline", "decision", "Filter pipeline: pre-chunk filtering (not post-chunk) to prevent embedding contamination", "active", "durable architecture rule"),
-    mem("pe_do_not_delete", "decision", "不刪除孤立的 reference-like 行（正文中的 \"et al.\" 等是合法引用）", "active", "do-not rule not matching current 不要 pattern"),
+  "workspace-delta": [
+    mem("delta_user_cycle", "feedback", "User requires a complete plan, review, feedback, modify, and verify loop rather than direct execution", "active", "user workflow preference"),
+    mem("delta_batching", "feedback", "Large-batch embedding requires controlled batch size around 20 to 50 items and a delay between requests", "active", "durable operational knowledge"),
+    mem("delta_option_b", "decision", "Phase 2 fix adopted Option B: grouped search across multiple profiles", "active", "design decision using adopted"),
+    mem("delta_single_source", "decision", "MCP source keeps a single generic source type, with item identity encoded in the source ID", "active", "design constraint using keeps"),
+    mem("delta_endpoint", "decision", "Embedding service endpoint is `/api/embed` rather than `/api/embeddings`, with the input field in the request body", "active", "declarative API fact"),
+    mem("delta_filter_pipeline", "decision", "Filter pipeline uses pre-chunk filtering rather than post-chunk filtering to prevent embedding contamination", "active", "durable architecture rule"),
+    mem("delta_do_not_delete", "decision", "Do not delete isolated reference-like lines because citation fragments in body text can be valid references", "active", "do-not rule"),
  ],
-  "self-repo": [
-    mem("sr_author_credit", "feedback", "User insists on preserving external contributor author credit and uses merge workflow", "active", "durable preference using insists"),
-    mem("sr_branding", "decision", "Product branding is \"OpenCode Working Memory\" without \"Plugin\" in the name", "active", "durable branding rule"),
-    mem("sr_changelog", "decision", "CHANGELOG version scope follows git tags: changes from v1.2.3 tag through HEAD belong to next version", "active", "durable release rule"),
+  "workspace-epsilon": [
+    mem("epsilon_author_credit", "feedback", "User insists on preserving external contributor author credit and uses merge workflow", "active", "durable preference using insists"),
+    mem("epsilon_branding", "decision", "Product branding is \"Generic Working Memory\" without \"Plugin\" in the name", "active", "durable branding rule"),
+    mem("epsilon_changelog", "decision", "Changelog version scope follows release tags: changes from the previous version tag through the current branch belong to the next version", "active", "durable release rule"),
  ],
 };
@@ -1080,6 +1080,89 @@ test("quality cleanup migration writes audit log for hard supersedes", async ()
  }
 });

+test("quality cleanup migration aborts supersede when audit log cannot be written", async () => {
+  const sandbox = await mkdtemp(join(tmpdir(), "wm-quality-audit-fail-"));
+  const dataHome = join(sandbox, "xdg-data-home");
+  const root = join(sandbox, "workspace");
+  const previousXdgDataHome = process.env.XDG_DATA_HOME;
+  const previousConsoleError = console.error;
+  process.env.XDG_DATA_HOME = dataHome;
+  console.error = () => {};
+
+  try {
+    await mkdir(root, { recursive: true });
+    const now = "2026-04-28T00:00:00.000Z";
+    const storePath = await workspaceMemoryPath(root);
+    await mkdir(dirname(storePath), { recursive: true });
+    await writeFile(storePath, JSON.stringify({
+      version: 1,
+      workspace: { root, key: await workspaceKey(root) },
+      limits: { maxRenderedChars: LONG_TERM_LIMITS.maxRenderedChars, maxEntries: LONG_TERM_LIMITS.maxEntries },
+      entries: [{
+        id: "hard_progress",
+        type: "project",
+        text: "Test suite: 1237 tests pass, 226 suites",
+        source: "compaction",
+        confidence: 0.75,
+        status: "active",
+        createdAt: now,
+        updatedAt: now,
+        staleAfterDays: 60,
+      }],
+      migrations: [],
+      updatedAt: now,
+    }, null, 2), "utf8");
+
+    const blockedLogDir = join(dataHome, "opencode-working-memory", "migration-logs");
+    await writeFile(blockedLogDir, "not a directory", "utf8");
+
+    const loaded = await loadWorkspaceMemory(root);
+    const persisted = JSON.parse(await readFile(storePath, "utf8")) as WorkspaceMemoryStore;
+
+    assert.equal(loaded.entries.find(entry => entry.id === "hard_progress")?.status, "active");
+    assert.equal(persisted.entries.find(entry => entry.id === "hard_progress")?.status, "active");
+    assert.equal(loaded.migrations?.includes("2026-04-28-quality-cleanup"), false);
+    assert.equal(persisted.migrations?.includes("2026-04-28-quality-cleanup"), false);
+  } finally {
+    console.error = previousConsoleError;
+    if (previousXdgDataHome === undefined) delete process.env.XDG_DATA_HOME;
+    else process.env.XDG_DATA_HOME = previousXdgDataHome;
+    await rm(sandbox, { recursive: true, force: true });
+  }
+});
+
+test("real workspace regression fixture is de-identified and English-only", () => {
+  const cjkText = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]/u;
+  const identifyingTerms = [
+    "medical-atlas",
+    "opencode-record",
+    "agent-reports",
+    "pdf-extraction",
+    "self-repo",
+    "OpenCode Working Memory",
+  ];
+  const failures: string[] = [];
+
+  for (const [workspaceName, fixtureEntries] of Object.entries(REAL_WORKSPACE_FIXTURES)) {
+    if (identifyingTerms.some(term => workspaceName.includes(term))) {
+      failures.push(`${workspaceName}: workspace key should be generalized`);
+    }
+
+    for (const entry of fixtureEntries) {
+      if (cjkText.test(entry.text)) {
+        failures.push(`${workspaceName}/${entry.id}: text must be English-only`);
+      }
+      for (const term of identifyingTerms) {
+        if (entry.text.includes(term)) {
+          failures.push(`${workspaceName}/${entry.id}: text contains identifying term ${term}`);
+        }
+      }
+    }
+  }
+
+  assert.equal(failures.length, 0, `Fixture privacy failures:\n${failures.join("\n")}`);
+});
+
 test("quality cleanup migration regression against real workspace samples", async () => {
  const failures: string[] = [];
  const now = "2026-04-28T00:00:00.000Z";