fix(memory): address quality cleanup audit findings

This commit is contained in:
Ralph Chang
2026-04-28 14:29:28 +08:00
parent c7088a8a6e
commit 8e07bfe3c1
9 changed files with 267 additions and 63 deletions
+29
View File
@@ -324,6 +324,35 @@ Memory candidates:
}
});
test("parseWorkspaceMemoryCandidates redacts secrets in extraction rejection log", async () => {
const dataHome = await mkdtemp(join(tmpdir(), "wm-extraction-redact-data-"));
const previousXdgDataHome = process.env.XDG_DATA_HOME;
process.env.XDG_DATA_HOME = dataHome;
try {
const summary = `
Memory candidates:
- reference TypeError: bearer sk_test token=tok123 password=pass123 secret=sec123 api_key=key123
`;
const items = parseWorkspaceMemoryCandidates(summary);
assert.equal(items.length, 0);
const logPath = join(dataHome, "opencode-working-memory", "extraction-rejections.jsonl");
const lines = (await waitForFile(logPath)).trim().split("\n");
assert.equal(lines.length, 1);
const event = JSON.parse(lines[0]);
assert.equal(
event.text,
"TypeError: bearer [REDACTED] token=[REDACTED] password=[REDACTED] secret=[REDACTED] api_key=[REDACTED]",
);
} finally {
if (previousXdgDataHome === undefined) delete process.env.XDG_DATA_HOME;
else process.env.XDG_DATA_HOME = previousXdgDataHome;
await rm(dataHome, { recursive: true, force: true });
}
});
test("parseWorkspaceMemoryCandidates rejects exact file count snapshots", () => {
const summary = `
Memory candidates:
+29 -29
View File
@@ -30,38 +30,38 @@ function mem(
}
export const REAL_WORKSPACE_FIXTURES: Record<string, RealWorkspaceFixtureEntry[]> = {
"medical-atlas": [
mem("ma_ui_rule", "feedback", "UI 要統一風格:兩個表格都要 scrollable,約 20 rows", "active", "durable UI rule without user preference keyword"),
mem("ma_csp_rule", "feedback", "架構師建議中期將 CSP 改為 nonce/hash,而非 'unsafe-inline'", "active", "durable architecture recommendation"),
mem("ma_form_rule", "decision", "Form 添加防御性 action/method 屬性,避免 JS 失效時 GET 首頁", "active", "declarative design rule"),
mem("ma_logging_rule", "decision", "Cloud Logging filter 需支援多種 log 格式(jsonPayload.event_type, jsonPayload.message, textPayload", "active", "durable spec using 需支援"),
"workspace-alpha": [
mem("alpha_ui_rule", "feedback", "UI should have consistent style: both tables scrollable, about 20 rows", "active", "durable UI rule without user preference keyword"),
mem("alpha_csp_rule", "feedback", "Architecture recommendation: migrate the content security policy to nonce or hash rules rather than unsafe inline scripts", "active", "durable architecture recommendation"),
mem("alpha_form_rule", "decision", "Form uses defensive action and method attributes so the fallback does not navigate to the home page when scripts fail", "active", "declarative design rule"),
mem("alpha_logging_rule", "decision", "Cloud logging filter supports multiple log formats: structured event type, structured message, and text payload", "active", "durable declarative logging spec"),
],
"opencode-record": [
mem("or_phase_snapshot", "project", "後端健康改進計劃已完成 Phase 1-4", "superseded", "progress snapshot"),
mem("or_test_snapshot", "project", "測試套件:1237 tests pass, 226 suites", "superseded", "test count snapshot"),
mem("or_sync_snapshot", "project", "USB 同步:37 個文件(bundles, server, frontend, tests, docs", "superseded", "file sync snapshot"),
"workspace-beta": [
mem("beta_phase_snapshot", "project", "Backend health improvement plan completed Phase 1-4", "superseded", "progress snapshot"),
mem("beta_test_snapshot", "project", "Test suite: 1237 tests pass, 226 suites", "superseded", "test count snapshot"),
mem("beta_sync_snapshot", "project", "External drive synced 37 files including bundles, service, frontend, tests, and docs", "superseded", "file sync snapshot"),
],
"agent-reports": [
mem("ar_plan_decision", "feedback", "架構師建議執行 P3 前先確認有實際需求", "active", "durable plan decision"),
mem("ar_reviewer_fallback", "feedback", "`comprehensive-code-reviewer` subagent unreliable; use `phase-verifier` as fallback", "active", "durable workaround rule"),
mem("ar_wave_rule", "feedback", "每個 Wave 結束要找 verifier 確認,全部結束找 code review", "active", "durable workflow rule"),
mem("ar_remote_headers", "decision", "Remote headers 透過 `requestInit: { headers }` 傳入 `StreamableHTTPClientTransport`", "active", "declarative API rule"),
mem("ar_signal_order", "decision", "Graceful process cleanup signal order: SIGINT (300ms) → SIGTERM (700ms) → SIGKILL", "active", "durable process cleanup spec"),
mem("ar_ownership", "decision", "`McpRuntimeState` ownership model: CLI owns both runtime and mcpRuntime, dispose order is runtime first", "active", "durable ownership model"),
mem("ar_retry_policy", "decision", "Recovery retry policy: only once per tool call, only for transport/session failures", "active", "durable retry policy"),
"workspace-gamma": [
mem("gamma_need_check", "feedback", "Architecture recommendation: confirm actual demand before executing the later priority phase", "active", "durable plan decision"),
mem("gamma_review_fallback", "feedback", "Primary review automation can be unreliable; use phase verification as the fallback", "active", "durable workaround rule"),
mem("gamma_wave_rule", "feedback", "Each wave should end with verifier confirmation, and the full implementation should end with code review", "active", "durable workflow rule"),
mem("gamma_remote_headers", "decision", "Remote headers are passed through the HTTP transport request initialization headers option", "active", "declarative API rule"),
mem("gamma_signal_order", "decision", "Graceful process cleanup signal order: interrupt for 300ms, terminate for 700ms, then kill", "active", "durable process cleanup spec"),
mem("gamma_ownership", "decision", "Runtime state ownership model: the command-line entrypoint owns both runtime objects, and disposal order is primary runtime first", "active", "durable ownership model"),
mem("gamma_retry_policy", "decision", "Recovery retry policy: only once per tool call, only for transport or session failures", "active", "durable retry policy"),
],
"pdf-extraction": [
mem("pe_user_cycle", "feedback", "User 要求完整的 plan-review-feedback-modify-verify 循環,不是直接執行", "active", "mixed-language user workflow preference"),
mem("pe_ollama_batch", "feedback", "Ollama 大批量嵌入需要控制批次大小(20-50)和請求間隔", "active", "durable operational knowledge"),
mem("pe_option_b", "decision", "Phase 2 Fix 採用 Option Bmulti-profile search grouping", "active", "design decision using 採用"),
mem("pe_single_source", "decision", "MCP source 維持單一 `book`,書籍身份在 source ID", "active", "design constraint using 維持"),
mem("pe_endpoint", "decision", "Ollama endpoint is `/api/embed` (not `/api/embeddings`) with `\"input\"` field", "active", "declarative API fact"),
mem("pe_filter_pipeline", "decision", "Filter pipeline: pre-chunk filtering (not post-chunk) to prevent embedding contamination", "active", "durable architecture rule"),
mem("pe_do_not_delete", "decision", "不刪除孤立的 reference-like 行(正文中的 \"et al.\" 等是合法引用)", "active", "do-not rule not matching current 不要 pattern"),
"workspace-delta": [
mem("delta_user_cycle", "feedback", "User requires a complete plan, review, feedback, modify, and verify loop rather than direct execution", "active", "user workflow preference"),
mem("delta_batching", "feedback", "Large-batch embedding requires controlled batch size around 20 to 50 items and a delay between requests", "active", "durable operational knowledge"),
mem("delta_option_b", "decision", "Phase 2 fix adopted Option B: grouped search across multiple profiles", "active", "design decision using adopted"),
mem("delta_single_source", "decision", "MCP source keeps a single generic source type, with item identity encoded in the source ID", "active", "design constraint using keeps"),
mem("delta_endpoint", "decision", "Embedding service endpoint is `/api/embed` rather than `/api/embeddings`, with the input field in the request body", "active", "declarative API fact"),
mem("delta_filter_pipeline", "decision", "Filter pipeline uses pre-chunk filtering rather than post-chunk filtering to prevent embedding contamination", "active", "durable architecture rule"),
mem("delta_do_not_delete", "decision", "Do not delete isolated reference-like lines because citation fragments in body text can be valid references", "active", "do-not rule"),
],
"self-repo": [
mem("sr_author_credit", "feedback", "User insists on preserving external contributor author credit and uses merge workflow", "active", "durable preference using insists"),
mem("sr_branding", "decision", "Product branding is \"OpenCode Working Memory\" without \"Plugin\" in the name", "active", "durable branding rule"),
mem("sr_changelog", "decision", "CHANGELOG version scope follows git tags: changes from v1.2.3 tag through HEAD belong to next version", "active", "durable release rule"),
"workspace-epsilon": [
mem("epsilon_author_credit", "feedback", "User insists on preserving external contributor author credit and uses merge workflow", "active", "durable preference using insists"),
mem("epsilon_branding", "decision", "Product branding is \"Generic Working Memory\" without \"Plugin\" in the name", "active", "durable branding rule"),
mem("epsilon_changelog", "decision", "Changelog version scope follows release tags: changes from the previous version tag through the current branch belong to the next version", "active", "durable release rule"),
],
};
+83
View File
@@ -1080,6 +1080,89 @@ test("quality cleanup migration writes audit log for hard supersedes", async ()
}
});
test("quality cleanup migration aborts supersede when audit log cannot be written", async () => {
const sandbox = await mkdtemp(join(tmpdir(), "wm-quality-audit-fail-"));
const dataHome = join(sandbox, "xdg-data-home");
const root = join(sandbox, "workspace");
const previousXdgDataHome = process.env.XDG_DATA_HOME;
const previousConsoleError = console.error;
process.env.XDG_DATA_HOME = dataHome;
console.error = () => {};
try {
await mkdir(root, { recursive: true });
const now = "2026-04-28T00:00:00.000Z";
const storePath = await workspaceMemoryPath(root);
await mkdir(dirname(storePath), { recursive: true });
await writeFile(storePath, JSON.stringify({
version: 1,
workspace: { root, key: await workspaceKey(root) },
limits: { maxRenderedChars: LONG_TERM_LIMITS.maxRenderedChars, maxEntries: LONG_TERM_LIMITS.maxEntries },
entries: [{
id: "hard_progress",
type: "project",
text: "Test suite: 1237 tests pass, 226 suites",
source: "compaction",
confidence: 0.75,
status: "active",
createdAt: now,
updatedAt: now,
staleAfterDays: 60,
}],
migrations: [],
updatedAt: now,
}, null, 2), "utf8");
const blockedLogDir = join(dataHome, "opencode-working-memory", "migration-logs");
await writeFile(blockedLogDir, "not a directory", "utf8");
const loaded = await loadWorkspaceMemory(root);
const persisted = JSON.parse(await readFile(storePath, "utf8")) as WorkspaceMemoryStore;
assert.equal(loaded.entries.find(entry => entry.id === "hard_progress")?.status, "active");
assert.equal(persisted.entries.find(entry => entry.id === "hard_progress")?.status, "active");
assert.equal(loaded.migrations?.includes("2026-04-28-quality-cleanup"), false);
assert.equal(persisted.migrations?.includes("2026-04-28-quality-cleanup"), false);
} finally {
console.error = previousConsoleError;
if (previousXdgDataHome === undefined) delete process.env.XDG_DATA_HOME;
else process.env.XDG_DATA_HOME = previousXdgDataHome;
await rm(sandbox, { recursive: true, force: true });
}
});
test("real workspace regression fixture is de-identified and English-only", () => {
const cjkText = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]/u;
const identifyingTerms = [
"medical-atlas",
"opencode-record",
"agent-reports",
"pdf-extraction",
"self-repo",
"OpenCode Working Memory",
];
const failures: string[] = [];
for (const [workspaceName, fixtureEntries] of Object.entries(REAL_WORKSPACE_FIXTURES)) {
if (identifyingTerms.some(term => workspaceName.includes(term))) {
failures.push(`${workspaceName}: workspace key should be generalized`);
}
for (const entry of fixtureEntries) {
if (cjkText.test(entry.text)) {
failures.push(`${workspaceName}/${entry.id}: text must be English-only`);
}
for (const term of identifyingTerms) {
if (entry.text.includes(term)) {
failures.push(`${workspaceName}/${entry.id}: text contains identifying term ${term}`);
}
}
}
}
assert.equal(failures.length, 0, `Fixture privacy failures:\n${failures.join("\n")}`);
});
test("quality cleanup migration regression against real workspace samples", async () => {
const failures: string[] = [];
const now = "2026-04-28T00:00:00.000Z";