fix(memory): address quality cleanup audit findings

2026-06-02 06:19:36 +02:00 · 2026-04-28 14:29:28 +08:00
parent c7088a8a6e
commit 8e07bfe3c1
9 changed files with 267 additions and 63 deletions
@@ -51,3 +51,6 @@ pnpm-lock.yaml

 # Superpowers local planning artifacts
 docs/superpowers/plans/
+
+# Local migration dry-run roots
+scripts/dev/dry-run-roots.local.txt
@@ -7,14 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ## [1.4.0] - 2026-04-28

-### Memory Quality Cleanup
+### Added

- Unified quality gate for compaction memory candidates and cleanup checks.
+- Local migration audit log for the `2026-04-28-quality-cleanup` migration:
+  `~/.local/share/opencode-working-memory/migration-logs/2026-04-28-quality-cleanup.jsonl`.
+- Local extraction rejection log for rejected compaction memory candidates:
+  `~/.local/share/opencode-working-memory/extraction-rejections.jsonl`.
+- Sanitized real-workspace regression fixtures for memory cleanup migration behavior.
+
+### Changed
+
+- Unified memory quality rules in a shared quality gate for compaction memory candidates and cleanup checks.
 - Rewritten compaction memory prompt to reduce over-production of low-quality memories.
- Conservative one-time quality cleanup migration (`2026-04-28-quality-cleanup`) that supersedes only high-confidence garbage patterns: progress snapshots, raw errors, commit/CI snapshots, temporary status notes, active file snapshots, code/API signatures, path-heavy entries, and empty entries.
- Soft heuristic failures (`bad_feedback`, `bad_decision`) are intentionally excluded from automatic migration cleanup to protect durable declarative memories such as branding rules, API facts, release rules, and architecture decisions.
- Migration audit log: `~/.local/share/opencode-working-memory/migration-logs/2026-04-28-quality-cleanup.jsonl`.
- Extraction rejection log: `~/.local/share/opencode-working-memory/extraction-rejections.jsonl`.
+- Changed quality cleanup migration to be conservative: it supersedes only high-confidence garbage patterns, including progress snapshots, raw errors, commit/CI snapshots, temporary status notes, active file snapshots, code/API signatures, path-heavy entries, and empty entries.
+- Soft heuristic failures (`bad_feedback`, `bad_decision`) are intentionally excluded from automatic migration cleanup to protect durable declarative memories such as branding rules, API facts, release rules, user workflow preferences, and architecture decisions.

 ### Recovery note

@@ -4,37 +4,69 @@

 ### Memory Quality Cleanup

-This minor release automatically improves memory quality for all existing users on upgrade. Low-quality compaction memories are identified and superseded without requiring manual cleanup.
+This release improves automatic workspace memory quality without risking broad cleanup of useful existing memories.
+
+The quality gate is now shared across compaction extraction and migration checks, the compaction prompt is stricter about what should become durable memory, and the one-time migration is intentionally conservative.

 ### What Changed

- **Unified quality gate**: All memory types (feedback, decision, project, reference) now share the same quality rules instead of only project entries having a quality check.
- **Hardened compaction prompt**: The model is explicitly instructed that most compactions should produce zero memories, with clear good/bad examples.
- **Auto-supersede migration**: On first load after upgrade, existing low-quality `compaction` memories are automatically marked as `superseded` with quality tags. Explicit and manual memories are never affected.
+- **Unified quality rules**: memory quality checks now live in one shared module and apply consistently across feedback, decisions, project facts, and references.
+- **Stricter compaction output**: the compaction prompt now tells the model to save fewer memories and prefer durable facts, user preferences, architecture decisions, and hard-to-rediscover references.
+- **Conservative migration cleanup**: the `2026-04-28-quality-cleanup` migration only supersedes high-confidence garbage patterns, not every rejected memory.
+- **Audit logs**: automatic migration cleanup writes local JSONL audit records so superseded entries can be inspected and restored.
+- **Extraction rejection logs**: newly rejected compaction candidates are logged locally to help calibrate future quality rules.
+- **Regression coverage**: migration behavior is tested against sanitized real-workspace patterns to prevent mass false positives from coming back.

 ### What Gets Cleaned Up

-Low-quality memory patterns that are now rejected/superseded:
+The migration may supersede existing `source: "compaction"` memories only when they match hard garbage patterns:

- Progress snapshots: "Wave 1 completed successfully", "180 tests passed"
- Session-internal notes: "The assistant reviewed feedback and updated the plan"
- Implementation notes: "Implemented X in plugin.ts"
- Commit/CI references: "Commit a762e86 contains the fix"
+- Empty entries
+- Progress snapshots, such as "Wave 1 completed successfully"
+- Test or suite count snapshots, such as "180 tests passed"
 - Raw errors and stack traces
- Temporary status: "Currently running npm test"
+- Commit or CI snapshots
+- Temporary status notes, such as "Currently running npm test"
+- Active file snapshots
+- Code or API signatures
+- Path-heavy entries that are just rediscoverable file lists
+
+### What Is Protected
+
+The migration does not supersede entries whose only issue is a soft heuristic failure, such as:
+
+- `bad_feedback`
+- `bad_decision`
+
+This protects useful declarative memories like:
+
+- Product branding rules
+- API facts
+- Release rules
+- Architecture decisions
+- User workflow preferences
+
+Explicit and manual memories are also protected.

 ### Migration Behavior

- Runs exactly once per workspace (idempotent, non-destructive)
- Only affects `source: "compaction"` entries
- Explicit/manual memories are protected
- Superseded entries retain `status: "superseded"` and quality tags for audit
- No user action required
+- Runs once per workspace.
+- Only affects active `source: "compaction"` entries.
+- Marks matching entries as `status: "superseded"` instead of deleting them.
+- Adds `quality_cleanup` and `quality:<reason>` tags to superseded entries.
+- Writes audit logs to:
+  `~/.local/share/opencode-working-memory/migration-logs/2026-04-28-quality-cleanup.jsonl`
+- Writes extraction rejection logs to:
+  `~/.local/share/opencode-working-memory/extraction-rejections.jsonl`
+
+### Recovery
+
+If a useful memory is superseded, inspect the migration audit log and restore the entry by changing its status back to `"active"` in the workspace's `workspace-memory.json`.

 ### Upgrade Notes

 - No configuration changes required.
- Existing workspace memory files are automatically cleaned on first load.
+- Existing workspace memory files remain compatible.
 - The OpenCode config entry stays the same:

 ```json
@@ -45,7 +77,7 @@ Low-quality memory patterns that are now rejected/superseded:

 ### Validation

- `npm test` (196 tests)
+- `npm test`
 - `npm run typecheck`

 ---
@@ -1,12 +1,45 @@
+/**
+ * Local helper to trigger migration on workspace roots.
+ *
+ * Usage:
+ *   MIGRATION_DRY_RUN_ROOTS=/path/a:/path/b bun run scripts/dev/dry-run-migration.ts
+ *
+ * Or create a local file (gitignored):
+ *   echo "/path/to/workspace1" > scripts/dev/dry-run-roots.local.txt
+ *   echo "/path/to/workspace2" >> scripts/dev/dry-run-roots.local.txt
+ *   bun run scripts/dev/dry-run-migration.ts
+ */
+
+import { existsSync } from "node:fs";
+import { readFile } from "node:fs/promises";
+import { join } from "node:path";
 import { loadWorkspaceMemory } from "../../src/workspace-memory.ts";

-const roots = [
-  "/Users/sd_wo/work/opencode-working-memory",
-  "/Users/sd_wo/Documents/projects/Pre-cancer-atlas",
-  "/Users/sd_wo/work/opencode-record",
-  "/Users/sd_wo/work/pathology-agent-reports",
-  "/Users/sd_wo/work/pathology-extraction",
-];
+async function getRoots(): Promise<string[]> {
+  // Priority 1: environment variable
+  const envRoots = process.env.MIGRATION_DRY_RUN_ROOTS;
+  if (envRoots) {
+    return envRoots.split(":").filter(root => root.length > 0);
+  }
+
+  // Priority 2: local file
+  const localFile = join(import.meta.dirname, "dry-run-roots.local.txt");
+  if (existsSync(localFile)) {
+    const content = await readFile(localFile, "utf8");
+    return content.trim().split("\n").filter(root => root.length > 0);
+  }
+
+  // No roots configured
+  console.log("No workspace roots configured.");
+  console.log("Set MIGRATION_DRY_RUN_ROOTS=/path/a:/path/b or create dry-run-roots.local.txt");
+  return [];
+}
+
+const roots = await getRoots();
+
+if (roots.length === 0) {
+  process.exit(0);
+}

 for (const root of roots) {
  console.log(`Loading workspace memory: ${root}`);
@@ -248,6 +248,15 @@ async function logExtractionRejection(entry: ExtractionRejectionLogEntry): Promi
  }
 }

+function redactSensitiveText(text: string): string {
+  return text
+    .replace(/bearer\s+[a-zA-Z0-9._-]+/gi, "bearer [REDACTED]")
+    .replace(/token[=:]\s*[a-zA-Z0-9._-]+/gi, "token=[REDACTED]")
+    .replace(/password[=:]\s*[a-zA-Z0-9._-]+/gi, "password=[REDACTED]")
+    .replace(/secret[=:]\s*[a-zA-Z0-9._-]+/gi, "secret=[REDACTED]")
+    .replace(/api[-_]?key[=:]\s*[a-zA-Z0-9._-]+/gi, "api_key=[REDACTED]");
+}
+
 function shouldAcceptWorkspaceMemoryCandidate(
  entry: {
    type: LongTermType;
@@ -278,7 +287,7 @@ function shouldAcceptWorkspaceMemoryCandidate(
    void logExtractionRejection({
      timestamp: new Date().toISOString(),
      type: entry.type,
-      text,
+      text: redactSensitiveText(text),
      reasons: quality.reasons,
      source: "compaction",
    });
@@ -208,14 +208,23 @@ export async function normalizeWorkspaceMemoryWithAccounting(
  // One-time migrations for legacy/low-quality snapshot violations.
  // Run quality cleanup first so hard violations receive quality audit tags
  // before the older P0 project-only cleanup marks progress snapshots.
+  const beforeQualityCleanup = result;
  const qualityCleanup = runMigrationQualityCleanup(result, nowIso);
  result = qualityCleanup.store;
+  let skipRemainingMigrations = false;
  if (qualityCleanup.events.length > 0) {
-    await appendQualityCleanupMigrationLog(qualityCleanup.events).catch(error => {
+    try {
+      await appendQualityCleanupMigrationLog(qualityCleanup.events);
+    } catch (error) {
      console.error("[memory] failed to write quality cleanup migration log:", error);
-    });
+      console.error("[memory] aborting migration to maintain audit trail integrity");
+      result = beforeQualityCleanup;
+      skipRemainingMigrations = true;
+    }
+  }
+  if (!skipRemainingMigrations) {
+    result = runMigrationP0Cleanup(result, nowIso);
  }
-  result = runMigrationP0Cleanup(result, nowIso);

  // P0 accounting only considers active entries. Entries that were already
  // superseded before this normalization are preserved in storage; entries that
@@ -324,6 +324,35 @@ Memory candidates:
  }
 });

+test("parseWorkspaceMemoryCandidates redacts secrets in extraction rejection log", async () => {
+  const dataHome = await mkdtemp(join(tmpdir(), "wm-extraction-redact-data-"));
+  const previousXdgDataHome = process.env.XDG_DATA_HOME;
+  process.env.XDG_DATA_HOME = dataHome;
+
+  try {
+    const summary = `
+Memory candidates:
+- reference TypeError: bearer sk_test token=tok123 password=pass123 secret=sec123 api_key=key123
+`;
+
+    const items = parseWorkspaceMemoryCandidates(summary);
+
+    assert.equal(items.length, 0);
+    const logPath = join(dataHome, "opencode-working-memory", "extraction-rejections.jsonl");
+    const lines = (await waitForFile(logPath)).trim().split("\n");
+    assert.equal(lines.length, 1);
+    const event = JSON.parse(lines[0]);
+    assert.equal(
+      event.text,
+      "TypeError: bearer [REDACTED] token=[REDACTED] password=[REDACTED] secret=[REDACTED] api_key=[REDACTED]",
+    );
+  } finally {
+    if (previousXdgDataHome === undefined) delete process.env.XDG_DATA_HOME;
+    else process.env.XDG_DATA_HOME = previousXdgDataHome;
+    await rm(dataHome, { recursive: true, force: true });
+  }
+});
+
 test("parseWorkspaceMemoryCandidates rejects exact file count snapshots", () => {
  const summary = `
 Memory candidates:
@@ -30,38 +30,38 @@ function mem(
 }

 export const REAL_WORKSPACE_FIXTURES: Record<string, RealWorkspaceFixtureEntry[]> = {
-  "medical-atlas": [
-    mem("ma_ui_rule", "feedback", "UI 要統一風格：兩個表格都要 scrollable，約 20 rows", "active", "durable UI rule without user preference keyword"),
-    mem("ma_csp_rule", "feedback", "架構師建議中期將 CSP 改為 nonce/hash，而非 'unsafe-inline'", "active", "durable architecture recommendation"),
-    mem("ma_form_rule", "decision", "Form 添加防御性 action/method 屬性，避免 JS 失效時 GET 首頁", "active", "declarative design rule"),
-    mem("ma_logging_rule", "decision", "Cloud Logging filter 需支援多種 log 格式（jsonPayload.event_type, jsonPayload.message, textPayload）", "active", "durable spec using 需支援"),
+  "workspace-alpha": [
+    mem("alpha_ui_rule", "feedback", "UI should have consistent style: both tables scrollable, about 20 rows", "active", "durable UI rule without user preference keyword"),
+    mem("alpha_csp_rule", "feedback", "Architecture recommendation: migrate the content security policy to nonce or hash rules rather than unsafe inline scripts", "active", "durable architecture recommendation"),
+    mem("alpha_form_rule", "decision", "Form uses defensive action and method attributes so the fallback does not navigate to the home page when scripts fail", "active", "declarative design rule"),
+    mem("alpha_logging_rule", "decision", "Cloud logging filter supports multiple log formats: structured event type, structured message, and text payload", "active", "durable declarative logging spec"),
  ],
-  "opencode-record": [
-    mem("or_phase_snapshot", "project", "後端健康改進計劃已完成 Phase 1-4", "superseded", "progress snapshot"),
-    mem("or_test_snapshot", "project", "測試套件：1237 tests pass, 226 suites", "superseded", "test count snapshot"),
-    mem("or_sync_snapshot", "project", "USB 同步：37 個文件（bundles, server, frontend, tests, docs）", "superseded", "file sync snapshot"),
+  "workspace-beta": [
+    mem("beta_phase_snapshot", "project", "Backend health improvement plan completed Phase 1-4", "superseded", "progress snapshot"),
+    mem("beta_test_snapshot", "project", "Test suite: 1237 tests pass, 226 suites", "superseded", "test count snapshot"),
+    mem("beta_sync_snapshot", "project", "External drive synced 37 files including bundles, service, frontend, tests, and docs", "superseded", "file sync snapshot"),
  ],
-  "agent-reports": [
-    mem("ar_plan_decision", "feedback", "架構師建議執行 P3 前先確認有實際需求", "active", "durable plan decision"),
-    mem("ar_reviewer_fallback", "feedback", "`comprehensive-code-reviewer` subagent unreliable; use `phase-verifier` as fallback", "active", "durable workaround rule"),
-    mem("ar_wave_rule", "feedback", "每個 Wave 結束要找 verifier 確認，全部結束找 code review", "active", "durable workflow rule"),
-    mem("ar_remote_headers", "decision", "Remote headers 透過 `requestInit: { headers }` 傳入 `StreamableHTTPClientTransport`", "active", "declarative API rule"),
-    mem("ar_signal_order", "decision", "Graceful process cleanup signal order: SIGINT (300ms) → SIGTERM (700ms) → SIGKILL", "active", "durable process cleanup spec"),
-    mem("ar_ownership", "decision", "`McpRuntimeState` ownership model: CLI owns both runtime and mcpRuntime, dispose order is runtime first", "active", "durable ownership model"),
-    mem("ar_retry_policy", "decision", "Recovery retry policy: only once per tool call, only for transport/session failures", "active", "durable retry policy"),
+  "workspace-gamma": [
+    mem("gamma_need_check", "feedback", "Architecture recommendation: confirm actual demand before executing the later priority phase", "active", "durable plan decision"),
+    mem("gamma_review_fallback", "feedback", "Primary review automation can be unreliable; use phase verification as the fallback", "active", "durable workaround rule"),
+    mem("gamma_wave_rule", "feedback", "Each wave should end with verifier confirmation, and the full implementation should end with code review", "active", "durable workflow rule"),
+    mem("gamma_remote_headers", "decision", "Remote headers are passed through the HTTP transport request initialization headers option", "active", "declarative API rule"),
+    mem("gamma_signal_order", "decision", "Graceful process cleanup signal order: interrupt for 300ms, terminate for 700ms, then kill", "active", "durable process cleanup spec"),
+    mem("gamma_ownership", "decision", "Runtime state ownership model: the command-line entrypoint owns both runtime objects, and disposal order is primary runtime first", "active", "durable ownership model"),
+    mem("gamma_retry_policy", "decision", "Recovery retry policy: only once per tool call, only for transport or session failures", "active", "durable retry policy"),
  ],
-  "pdf-extraction": [
-    mem("pe_user_cycle", "feedback", "User 要求完整的 plan-review-feedback-modify-verify 循環，不是直接執行", "active", "mixed-language user workflow preference"),
-    mem("pe_ollama_batch", "feedback", "Ollama 大批量嵌入需要控制批次大小（20-50）和請求間隔", "active", "durable operational knowledge"),
-    mem("pe_option_b", "decision", "Phase 2 Fix 採用 Option B：multi-profile search grouping", "active", "design decision using 採用"),
-    mem("pe_single_source", "decision", "MCP source 維持單一 `book`，書籍身份在 source ID", "active", "design constraint using 維持"),
-    mem("pe_endpoint", "decision", "Ollama endpoint is `/api/embed` (not `/api/embeddings`) with `\"input\"` field", "active", "declarative API fact"),
-    mem("pe_filter_pipeline", "decision", "Filter pipeline: pre-chunk filtering (not post-chunk) to prevent embedding contamination", "active", "durable architecture rule"),
-    mem("pe_do_not_delete", "decision", "不刪除孤立的 reference-like 行（正文中的 \"et al.\" 等是合法引用）", "active", "do-not rule not matching current 不要 pattern"),
+  "workspace-delta": [
+    mem("delta_user_cycle", "feedback", "User requires a complete plan, review, feedback, modify, and verify loop rather than direct execution", "active", "user workflow preference"),
+    mem("delta_batching", "feedback", "Large-batch embedding requires controlled batch size around 20 to 50 items and a delay between requests", "active", "durable operational knowledge"),
+    mem("delta_option_b", "decision", "Phase 2 fix adopted Option B: grouped search across multiple profiles", "active", "design decision using adopted"),
+    mem("delta_single_source", "decision", "MCP source keeps a single generic source type, with item identity encoded in the source ID", "active", "design constraint using keeps"),
+    mem("delta_endpoint", "decision", "Embedding service endpoint is `/api/embed` rather than `/api/embeddings`, with the input field in the request body", "active", "declarative API fact"),
+    mem("delta_filter_pipeline", "decision", "Filter pipeline uses pre-chunk filtering rather than post-chunk filtering to prevent embedding contamination", "active", "durable architecture rule"),
+    mem("delta_do_not_delete", "decision", "Do not delete isolated reference-like lines because citation fragments in body text can be valid references", "active", "do-not rule"),
  ],
-  "self-repo": [
-    mem("sr_author_credit", "feedback", "User insists on preserving external contributor author credit and uses merge workflow", "active", "durable preference using insists"),
-    mem("sr_branding", "decision", "Product branding is \"OpenCode Working Memory\" without \"Plugin\" in the name", "active", "durable branding rule"),
-    mem("sr_changelog", "decision", "CHANGELOG version scope follows git tags: changes from v1.2.3 tag through HEAD belong to next version", "active", "durable release rule"),
+  "workspace-epsilon": [
+    mem("epsilon_author_credit", "feedback", "User insists on preserving external contributor author credit and uses merge workflow", "active", "durable preference using insists"),
+    mem("epsilon_branding", "decision", "Product branding is \"Generic Working Memory\" without \"Plugin\" in the name", "active", "durable branding rule"),
+    mem("epsilon_changelog", "decision", "Changelog version scope follows release tags: changes from the previous version tag through the current branch belong to the next version", "active", "durable release rule"),
  ],
 };
@@ -1080,6 +1080,89 @@ test("quality cleanup migration writes audit log for hard supersedes", async ()
  }
 });

+test("quality cleanup migration aborts supersede when audit log cannot be written", async () => {
+  const sandbox = await mkdtemp(join(tmpdir(), "wm-quality-audit-fail-"));
+  const dataHome = join(sandbox, "xdg-data-home");
+  const root = join(sandbox, "workspace");
+  const previousXdgDataHome = process.env.XDG_DATA_HOME;
+  const previousConsoleError = console.error;
+  process.env.XDG_DATA_HOME = dataHome;
+  console.error = () => {};
+
+  try {
+    await mkdir(root, { recursive: true });
+    const now = "2026-04-28T00:00:00.000Z";
+    const storePath = await workspaceMemoryPath(root);
+    await mkdir(dirname(storePath), { recursive: true });
+    await writeFile(storePath, JSON.stringify({
+      version: 1,
+      workspace: { root, key: await workspaceKey(root) },
+      limits: { maxRenderedChars: LONG_TERM_LIMITS.maxRenderedChars, maxEntries: LONG_TERM_LIMITS.maxEntries },
+      entries: [{
+        id: "hard_progress",
+        type: "project",
+        text: "Test suite: 1237 tests pass, 226 suites",
+        source: "compaction",
+        confidence: 0.75,
+        status: "active",
+        createdAt: now,
+        updatedAt: now,
+        staleAfterDays: 60,
+      }],
+      migrations: [],
+      updatedAt: now,
+    }, null, 2), "utf8");
+
+    const blockedLogDir = join(dataHome, "opencode-working-memory", "migration-logs");
+    await writeFile(blockedLogDir, "not a directory", "utf8");
+
+    const loaded = await loadWorkspaceMemory(root);
+    const persisted = JSON.parse(await readFile(storePath, "utf8")) as WorkspaceMemoryStore;
+
+    assert.equal(loaded.entries.find(entry => entry.id === "hard_progress")?.status, "active");
+    assert.equal(persisted.entries.find(entry => entry.id === "hard_progress")?.status, "active");
+    assert.equal(loaded.migrations?.includes("2026-04-28-quality-cleanup"), false);
+    assert.equal(persisted.migrations?.includes("2026-04-28-quality-cleanup"), false);
+  } finally {
+    console.error = previousConsoleError;
+    if (previousXdgDataHome === undefined) delete process.env.XDG_DATA_HOME;
+    else process.env.XDG_DATA_HOME = previousXdgDataHome;
+    await rm(sandbox, { recursive: true, force: true });
+  }
+});
+
+test("real workspace regression fixture is de-identified and English-only", () => {
+  const cjkText = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]/u;
+  const identifyingTerms = [
+    "medical-atlas",
+    "opencode-record",
+    "agent-reports",
+    "pdf-extraction",
+    "self-repo",
+    "OpenCode Working Memory",
+  ];
+  const failures: string[] = [];
+
+  for (const [workspaceName, fixtureEntries] of Object.entries(REAL_WORKSPACE_FIXTURES)) {
+    if (identifyingTerms.some(term => workspaceName.includes(term))) {
+      failures.push(`${workspaceName}: workspace key should be generalized`);
+    }
+
+    for (const entry of fixtureEntries) {
+      if (cjkText.test(entry.text)) {
+        failures.push(`${workspaceName}/${entry.id}: text must be English-only`);
+      }
+      for (const term of identifyingTerms) {
+        if (entry.text.includes(term)) {
+          failures.push(`${workspaceName}/${entry.id}: text contains identifying term ${term}`);
+        }
+      }
+    }
+  }
+
+  assert.equal(failures.length, 0, `Fixture privacy failures:\n${failures.join("\n")}`);
+});
+
 test("quality cleanup migration regression against real workspace samples", async () => {
  const failures: string[] = [];
  const now = "2026-04-28T00:00:00.000Z";