fix(memory): address quality cleanup audit findings

This commit is contained in:
Ralph Chang
2026-04-28 14:29:28 +08:00
parent c7088a8a6e
commit 8e07bfe3c1
9 changed files with 267 additions and 63 deletions
+3
View File
@@ -51,3 +51,6 @@ pnpm-lock.yaml
# Superpowers local planning artifacts
docs/superpowers/plans/
# Local migration dry-run roots
scripts/dev/dry-run-roots.local.txt
+12 -6
View File
@@ -7,14 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [1.4.0] - 2026-04-28
### Memory Quality Cleanup
### Added
- Unified quality gate for compaction memory candidates and cleanup checks.
- Local migration audit log for the `2026-04-28-quality-cleanup` migration:
`~/.local/share/opencode-working-memory/migration-logs/2026-04-28-quality-cleanup.jsonl`.
- Local extraction rejection log for rejected compaction memory candidates:
`~/.local/share/opencode-working-memory/extraction-rejections.jsonl`.
- Sanitized real-workspace regression fixtures for memory cleanup migration behavior.
### Changed
- Unified memory quality rules in a shared quality gate for compaction memory candidates and cleanup checks.
- Rewritten compaction memory prompt to reduce over-production of low-quality memories.
- Conservative one-time quality cleanup migration (`2026-04-28-quality-cleanup`) that supersedes only high-confidence garbage patterns: progress snapshots, raw errors, commit/CI snapshots, temporary status notes, active file snapshots, code/API signatures, path-heavy entries, and empty entries.
- Soft heuristic failures (`bad_feedback`, `bad_decision`) are intentionally excluded from automatic migration cleanup to protect durable declarative memories such as branding rules, API facts, release rules, and architecture decisions.
- Migration audit log: `~/.local/share/opencode-working-memory/migration-logs/2026-04-28-quality-cleanup.jsonl`.
- Extraction rejection log: `~/.local/share/opencode-working-memory/extraction-rejections.jsonl`.
- Changed quality cleanup migration to be conservative: it supersedes only high-confidence garbage patterns, including progress snapshots, raw errors, commit/CI snapshots, temporary status notes, active file snapshots, code/API signatures, path-heavy entries, and empty entries.
- Soft heuristic failures (`bad_feedback`, `bad_decision`) are intentionally excluded from automatic migration cleanup to protect durable declarative memories such as branding rules, API facts, release rules, user workflow preferences, and architecture decisions.
### Recovery note
+49 -17
View File
@@ -4,37 +4,69 @@
### Memory Quality Cleanup
This minor release automatically improves memory quality for all existing users on upgrade. Low-quality compaction memories are identified and superseded without requiring manual cleanup.
This release improves automatic workspace memory quality without risking broad cleanup of useful existing memories.
The quality gate is now shared across compaction extraction and migration checks, the compaction prompt is stricter about what should become durable memory, and the one-time migration is intentionally conservative.
### What Changed
- **Unified quality gate**: All memory types (feedback, decision, project, reference) now share the same quality rules instead of only project entries having a quality check.
- **Hardened compaction prompt**: The model is explicitly instructed that most compactions should produce zero memories, with clear good/bad examples.
- **Auto-supersede migration**: On first load after upgrade, existing low-quality `compaction` memories are automatically marked as `superseded` with quality tags. Explicit and manual memories are never affected.
- **Unified quality rules**: memory quality checks now live in one shared module and apply consistently across feedback, decisions, project facts, and references.
- **Stricter compaction output**: the compaction prompt now tells the model to save fewer memories and prefer durable facts, user preferences, architecture decisions, and hard-to-rediscover references.
- **Conservative migration cleanup**: the `2026-04-28-quality-cleanup` migration only supersedes high-confidence garbage patterns, not every rejected memory.
- **Audit logs**: automatic migration cleanup writes local JSONL audit records so superseded entries can be inspected and restored.
- **Extraction rejection logs**: newly rejected compaction candidates are logged locally to help calibrate future quality rules.
- **Regression coverage**: migration behavior is tested against sanitized real-workspace patterns to prevent mass false positives from coming back.
### What Gets Cleaned Up
Low-quality memory patterns that are now rejected/superseded:
The migration may supersede existing `source: "compaction"` memories only when they match hard garbage patterns:
- Progress snapshots: "Wave 1 completed successfully", "180 tests passed"
- Session-internal notes: "The assistant reviewed feedback and updated the plan"
- Implementation notes: "Implemented X in plugin.ts"
- Commit/CI references: "Commit a762e86 contains the fix"
- Empty entries
- Progress snapshots, such as "Wave 1 completed successfully"
- Test or suite count snapshots, such as "180 tests passed"
- Raw errors and stack traces
- Temporary status: "Currently running npm test"
- Commit or CI snapshots
- Temporary status notes, such as "Currently running npm test"
- Active file snapshots
- Code or API signatures
- Path-heavy entries that are just rediscoverable file lists
### What Is Protected
The migration does not supersede entries whose only issue is a soft heuristic failure, such as:
- `bad_feedback`
- `bad_decision`
This protects useful declarative memories like:
- Product branding rules
- API facts
- Release rules
- Architecture decisions
- User workflow preferences
Explicit and manual memories are also protected.
### Migration Behavior
- Runs exactly once per workspace (idempotent, non-destructive)
- Only affects `source: "compaction"` entries
- Explicit/manual memories are protected
- Superseded entries retain `status: "superseded"` and quality tags for audit
- No user action required
- Runs once per workspace.
- Only affects active `source: "compaction"` entries.
- Marks matching entries as `status: "superseded"` instead of deleting them.
- Adds `quality_cleanup` and `quality:<reason>` tags to superseded entries.
- Writes audit logs to:
`~/.local/share/opencode-working-memory/migration-logs/2026-04-28-quality-cleanup.jsonl`
- Writes extraction rejection logs to:
`~/.local/share/opencode-working-memory/extraction-rejections.jsonl`
### Recovery
If a useful memory is superseded, inspect the migration audit log and restore the entry by changing its status back to `"active"` in the workspace's `workspace-memory.json`.
### Upgrade Notes
- No configuration changes required.
- Existing workspace memory files are automatically cleaned on first load.
- Existing workspace memory files remain compatible.
- The OpenCode config entry stays the same:
```json
@@ -45,7 +77,7 @@ Low-quality memory patterns that are now rejected/superseded:
### Validation
- `npm test` (196 tests)
- `npm test`
- `npm run typecheck`
---
+40 -7
View File
@@ -1,12 +1,45 @@
/**
* Local helper to trigger migration on workspace roots.
*
* Usage:
* MIGRATION_DRY_RUN_ROOTS=/path/a:/path/b bun run scripts/dev/dry-run-migration.ts
*
* Or create a local file (gitignored):
* echo "/path/to/workspace1" > scripts/dev/dry-run-roots.local.txt
* echo "/path/to/workspace2" >> scripts/dev/dry-run-roots.local.txt
* bun run scripts/dev/dry-run-migration.ts
*/
import { existsSync } from "node:fs";
import { readFile } from "node:fs/promises";
import { join } from "node:path";
import { loadWorkspaceMemory } from "../../src/workspace-memory.ts";
const roots = [
"/Users/sd_wo/work/opencode-working-memory",
"/Users/sd_wo/Documents/projects/Pre-cancer-atlas",
"/Users/sd_wo/work/opencode-record",
"/Users/sd_wo/work/pathology-agent-reports",
"/Users/sd_wo/work/pathology-extraction",
];
async function getRoots(): Promise<string[]> {
// Priority 1: environment variable
const envRoots = process.env.MIGRATION_DRY_RUN_ROOTS;
if (envRoots) {
return envRoots.split(":").filter(root => root.length > 0);
}
// Priority 2: local file
const localFile = join(import.meta.dirname, "dry-run-roots.local.txt");
if (existsSync(localFile)) {
const content = await readFile(localFile, "utf8");
return content.trim().split("\n").filter(root => root.length > 0);
}
// No roots configured
console.log("No workspace roots configured.");
console.log("Set MIGRATION_DRY_RUN_ROOTS=/path/a:/path/b or create dry-run-roots.local.txt");
return [];
}
const roots = await getRoots();
if (roots.length === 0) {
process.exit(0);
}
for (const root of roots) {
console.log(`Loading workspace memory: ${root}`);
+10 -1
View File
@@ -248,6 +248,15 @@ async function logExtractionRejection(entry: ExtractionRejectionLogEntry): Promi
}
}
function redactSensitiveText(text: string): string {
return text
.replace(/bearer\s+[a-zA-Z0-9._-]+/gi, "bearer [REDACTED]")
.replace(/token[=:]\s*[a-zA-Z0-9._-]+/gi, "token=[REDACTED]")
.replace(/password[=:]\s*[a-zA-Z0-9._-]+/gi, "password=[REDACTED]")
.replace(/secret[=:]\s*[a-zA-Z0-9._-]+/gi, "secret=[REDACTED]")
.replace(/api[-_]?key[=:]\s*[a-zA-Z0-9._-]+/gi, "api_key=[REDACTED]");
}
function shouldAcceptWorkspaceMemoryCandidate(
entry: {
type: LongTermType;
@@ -278,7 +287,7 @@ function shouldAcceptWorkspaceMemoryCandidate(
void logExtractionRejection({
timestamp: new Date().toISOString(),
type: entry.type,
text,
text: redactSensitiveText(text),
reasons: quality.reasons,
source: "compaction",
});
+12 -3
View File
@@ -208,14 +208,23 @@ export async function normalizeWorkspaceMemoryWithAccounting(
// One-time migrations for legacy/low-quality snapshot violations.
// Run quality cleanup first so hard violations receive quality audit tags
// before the older P0 project-only cleanup marks progress snapshots.
const beforeQualityCleanup = result;
const qualityCleanup = runMigrationQualityCleanup(result, nowIso);
result = qualityCleanup.store;
let skipRemainingMigrations = false;
if (qualityCleanup.events.length > 0) {
await appendQualityCleanupMigrationLog(qualityCleanup.events).catch(error => {
try {
await appendQualityCleanupMigrationLog(qualityCleanup.events);
} catch (error) {
console.error("[memory] failed to write quality cleanup migration log:", error);
});
console.error("[memory] aborting migration to maintain audit trail integrity");
result = beforeQualityCleanup;
skipRemainingMigrations = true;
}
}
if (!skipRemainingMigrations) {
result = runMigrationP0Cleanup(result, nowIso);
}
result = runMigrationP0Cleanup(result, nowIso);
// P0 accounting only considers active entries. Entries that were already
// superseded before this normalization are preserved in storage; entries that
+29
View File
@@ -324,6 +324,35 @@ Memory candidates:
}
});
test("parseWorkspaceMemoryCandidates redacts secrets in extraction rejection log", async () => {
const dataHome = await mkdtemp(join(tmpdir(), "wm-extraction-redact-data-"));
const previousXdgDataHome = process.env.XDG_DATA_HOME;
process.env.XDG_DATA_HOME = dataHome;
try {
const summary = `
Memory candidates:
- reference TypeError: bearer sk_test token=tok123 password=pass123 secret=sec123 api_key=key123
`;
const items = parseWorkspaceMemoryCandidates(summary);
assert.equal(items.length, 0);
const logPath = join(dataHome, "opencode-working-memory", "extraction-rejections.jsonl");
const lines = (await waitForFile(logPath)).trim().split("\n");
assert.equal(lines.length, 1);
const event = JSON.parse(lines[0]);
assert.equal(
event.text,
"TypeError: bearer [REDACTED] token=[REDACTED] password=[REDACTED] secret=[REDACTED] api_key=[REDACTED]",
);
} finally {
if (previousXdgDataHome === undefined) delete process.env.XDG_DATA_HOME;
else process.env.XDG_DATA_HOME = previousXdgDataHome;
await rm(dataHome, { recursive: true, force: true });
}
});
test("parseWorkspaceMemoryCandidates rejects exact file count snapshots", () => {
const summary = `
Memory candidates:
+29 -29
View File
@@ -30,38 +30,38 @@ function mem(
}
export const REAL_WORKSPACE_FIXTURES: Record<string, RealWorkspaceFixtureEntry[]> = {
"medical-atlas": [
mem("ma_ui_rule", "feedback", "UI 要統一風格:兩個表格都要 scrollable,約 20 rows", "active", "durable UI rule without user preference keyword"),
mem("ma_csp_rule", "feedback", "架構師建議中期將 CSP 改為 nonce/hash,而非 'unsafe-inline'", "active", "durable architecture recommendation"),
mem("ma_form_rule", "decision", "Form 添加防御性 action/method 屬性,避免 JS 失效時 GET 首頁", "active", "declarative design rule"),
mem("ma_logging_rule", "decision", "Cloud Logging filter 需支援多種 log 格式(jsonPayload.event_type, jsonPayload.message, textPayload", "active", "durable spec using 需支援"),
"workspace-alpha": [
mem("alpha_ui_rule", "feedback", "UI should have consistent style: both tables scrollable, about 20 rows", "active", "durable UI rule without user preference keyword"),
mem("alpha_csp_rule", "feedback", "Architecture recommendation: migrate the content security policy to nonce or hash rules rather than unsafe inline scripts", "active", "durable architecture recommendation"),
mem("alpha_form_rule", "decision", "Form uses defensive action and method attributes so the fallback does not navigate to the home page when scripts fail", "active", "declarative design rule"),
mem("alpha_logging_rule", "decision", "Cloud logging filter supports multiple log formats: structured event type, structured message, and text payload", "active", "durable declarative logging spec"),
],
"opencode-record": [
mem("or_phase_snapshot", "project", "後端健康改進計劃已完成 Phase 1-4", "superseded", "progress snapshot"),
mem("or_test_snapshot", "project", "測試套件:1237 tests pass, 226 suites", "superseded", "test count snapshot"),
mem("or_sync_snapshot", "project", "USB 同步:37 個文件(bundles, server, frontend, tests, docs", "superseded", "file sync snapshot"),
"workspace-beta": [
mem("beta_phase_snapshot", "project", "Backend health improvement plan completed Phase 1-4", "superseded", "progress snapshot"),
mem("beta_test_snapshot", "project", "Test suite: 1237 tests pass, 226 suites", "superseded", "test count snapshot"),
mem("beta_sync_snapshot", "project", "External drive synced 37 files including bundles, service, frontend, tests, and docs", "superseded", "file sync snapshot"),
],
"agent-reports": [
mem("ar_plan_decision", "feedback", "架構師建議執行 P3 前先確認有實際需求", "active", "durable plan decision"),
mem("ar_reviewer_fallback", "feedback", "`comprehensive-code-reviewer` subagent unreliable; use `phase-verifier` as fallback", "active", "durable workaround rule"),
mem("ar_wave_rule", "feedback", "每個 Wave 結束要找 verifier 確認,全部結束找 code review", "active", "durable workflow rule"),
mem("ar_remote_headers", "decision", "Remote headers 透過 `requestInit: { headers }` 傳入 `StreamableHTTPClientTransport`", "active", "declarative API rule"),
mem("ar_signal_order", "decision", "Graceful process cleanup signal order: SIGINT (300ms) → SIGTERM (700ms) → SIGKILL", "active", "durable process cleanup spec"),
mem("ar_ownership", "decision", "`McpRuntimeState` ownership model: CLI owns both runtime and mcpRuntime, dispose order is runtime first", "active", "durable ownership model"),
mem("ar_retry_policy", "decision", "Recovery retry policy: only once per tool call, only for transport/session failures", "active", "durable retry policy"),
"workspace-gamma": [
mem("gamma_need_check", "feedback", "Architecture recommendation: confirm actual demand before executing the later priority phase", "active", "durable plan decision"),
mem("gamma_review_fallback", "feedback", "Primary review automation can be unreliable; use phase verification as the fallback", "active", "durable workaround rule"),
mem("gamma_wave_rule", "feedback", "Each wave should end with verifier confirmation, and the full implementation should end with code review", "active", "durable workflow rule"),
mem("gamma_remote_headers", "decision", "Remote headers are passed through the HTTP transport request initialization headers option", "active", "declarative API rule"),
mem("gamma_signal_order", "decision", "Graceful process cleanup signal order: interrupt for 300ms, terminate for 700ms, then kill", "active", "durable process cleanup spec"),
mem("gamma_ownership", "decision", "Runtime state ownership model: the command-line entrypoint owns both runtime objects, and disposal order is primary runtime first", "active", "durable ownership model"),
mem("gamma_retry_policy", "decision", "Recovery retry policy: only once per tool call, only for transport or session failures", "active", "durable retry policy"),
],
"pdf-extraction": [
mem("pe_user_cycle", "feedback", "User 要求完整的 plan-review-feedback-modify-verify 循環,不是直接執行", "active", "mixed-language user workflow preference"),
mem("pe_ollama_batch", "feedback", "Ollama 大批量嵌入需要控制批次大小(20-50)和請求間隔", "active", "durable operational knowledge"),
mem("pe_option_b", "decision", "Phase 2 Fix 採用 Option Bmulti-profile search grouping", "active", "design decision using 採用"),
mem("pe_single_source", "decision", "MCP source 維持單一 `book`,書籍身份在 source ID", "active", "design constraint using 維持"),
mem("pe_endpoint", "decision", "Ollama endpoint is `/api/embed` (not `/api/embeddings`) with `\"input\"` field", "active", "declarative API fact"),
mem("pe_filter_pipeline", "decision", "Filter pipeline: pre-chunk filtering (not post-chunk) to prevent embedding contamination", "active", "durable architecture rule"),
mem("pe_do_not_delete", "decision", "不刪除孤立的 reference-like 行(正文中的 \"et al.\" 等是合法引用)", "active", "do-not rule not matching current 不要 pattern"),
"workspace-delta": [
mem("delta_user_cycle", "feedback", "User requires a complete plan, review, feedback, modify, and verify loop rather than direct execution", "active", "user workflow preference"),
mem("delta_batching", "feedback", "Large-batch embedding requires controlled batch size around 20 to 50 items and a delay between requests", "active", "durable operational knowledge"),
mem("delta_option_b", "decision", "Phase 2 fix adopted Option B: grouped search across multiple profiles", "active", "design decision using adopted"),
mem("delta_single_source", "decision", "MCP source keeps a single generic source type, with item identity encoded in the source ID", "active", "design constraint using keeps"),
mem("delta_endpoint", "decision", "Embedding service endpoint is `/api/embed` rather than `/api/embeddings`, with the input field in the request body", "active", "declarative API fact"),
mem("delta_filter_pipeline", "decision", "Filter pipeline uses pre-chunk filtering rather than post-chunk filtering to prevent embedding contamination", "active", "durable architecture rule"),
mem("delta_do_not_delete", "decision", "Do not delete isolated reference-like lines because citation fragments in body text can be valid references", "active", "do-not rule"),
],
"self-repo": [
mem("sr_author_credit", "feedback", "User insists on preserving external contributor author credit and uses merge workflow", "active", "durable preference using insists"),
mem("sr_branding", "decision", "Product branding is \"OpenCode Working Memory\" without \"Plugin\" in the name", "active", "durable branding rule"),
mem("sr_changelog", "decision", "CHANGELOG version scope follows git tags: changes from v1.2.3 tag through HEAD belong to next version", "active", "durable release rule"),
"workspace-epsilon": [
mem("epsilon_author_credit", "feedback", "User insists on preserving external contributor author credit and uses merge workflow", "active", "durable preference using insists"),
mem("epsilon_branding", "decision", "Product branding is \"Generic Working Memory\" without \"Plugin\" in the name", "active", "durable branding rule"),
mem("epsilon_changelog", "decision", "Changelog version scope follows release tags: changes from the previous version tag through the current branch belong to the next version", "active", "durable release rule"),
],
};
+83
View File
@@ -1080,6 +1080,89 @@ test("quality cleanup migration writes audit log for hard supersedes", async ()
}
});
test("quality cleanup migration aborts supersede when audit log cannot be written", async () => {
const sandbox = await mkdtemp(join(tmpdir(), "wm-quality-audit-fail-"));
const dataHome = join(sandbox, "xdg-data-home");
const root = join(sandbox, "workspace");
const previousXdgDataHome = process.env.XDG_DATA_HOME;
const previousConsoleError = console.error;
process.env.XDG_DATA_HOME = dataHome;
console.error = () => {};
try {
await mkdir(root, { recursive: true });
const now = "2026-04-28T00:00:00.000Z";
const storePath = await workspaceMemoryPath(root);
await mkdir(dirname(storePath), { recursive: true });
await writeFile(storePath, JSON.stringify({
version: 1,
workspace: { root, key: await workspaceKey(root) },
limits: { maxRenderedChars: LONG_TERM_LIMITS.maxRenderedChars, maxEntries: LONG_TERM_LIMITS.maxEntries },
entries: [{
id: "hard_progress",
type: "project",
text: "Test suite: 1237 tests pass, 226 suites",
source: "compaction",
confidence: 0.75,
status: "active",
createdAt: now,
updatedAt: now,
staleAfterDays: 60,
}],
migrations: [],
updatedAt: now,
}, null, 2), "utf8");
const blockedLogDir = join(dataHome, "opencode-working-memory", "migration-logs");
await writeFile(blockedLogDir, "not a directory", "utf8");
const loaded = await loadWorkspaceMemory(root);
const persisted = JSON.parse(await readFile(storePath, "utf8")) as WorkspaceMemoryStore;
assert.equal(loaded.entries.find(entry => entry.id === "hard_progress")?.status, "active");
assert.equal(persisted.entries.find(entry => entry.id === "hard_progress")?.status, "active");
assert.equal(loaded.migrations?.includes("2026-04-28-quality-cleanup"), false);
assert.equal(persisted.migrations?.includes("2026-04-28-quality-cleanup"), false);
} finally {
console.error = previousConsoleError;
if (previousXdgDataHome === undefined) delete process.env.XDG_DATA_HOME;
else process.env.XDG_DATA_HOME = previousXdgDataHome;
await rm(sandbox, { recursive: true, force: true });
}
});
test("real workspace regression fixture is de-identified and English-only", () => {
const cjkText = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]/u;
const identifyingTerms = [
"medical-atlas",
"opencode-record",
"agent-reports",
"pdf-extraction",
"self-repo",
"OpenCode Working Memory",
];
const failures: string[] = [];
for (const [workspaceName, fixtureEntries] of Object.entries(REAL_WORKSPACE_FIXTURES)) {
if (identifyingTerms.some(term => workspaceName.includes(term))) {
failures.push(`${workspaceName}: workspace key should be generalized`);
}
for (const entry of fixtureEntries) {
if (cjkText.test(entry.text)) {
failures.push(`${workspaceName}/${entry.id}: text must be English-only`);
}
for (const term of identifyingTerms) {
if (entry.text.includes(term)) {
failures.push(`${workspaceName}/${entry.id}: text contains identifying term ${term}`);
}
}
}
}
assert.equal(failures.length, 0, `Fixture privacy failures:\n${failures.join("\n")}`);
});
test("quality cleanup migration regression against real workspace samples", async () => {
const failures: string[] = [];
const now = "2026-04-28T00:00:00.000Z";