diff --git a/README.md b/README.md index 718bed5..fc16202 100644 --- a/README.md +++ b/README.md @@ -255,7 +255,7 @@ On VS Code's 2.45M‑line codebase, SocratiCode answers architectural questions - **Cross-project search** — Search across multiple related projects in a single query. Link projects via `.socraticode.json` or the `SOCRATICODE_LINKED_PROJECTS` env var, then set `includeLinked: true` on `codebase_search`. Results are tagged with project labels and deduplicated via client-side RRF fusion. - **Branch-aware indexing** — Maintain separate indexes per git branch by setting `SOCRATICODE_BRANCH_AWARE=true`. Each branch gets its own Qdrant collections, so switching branches instantly switches to the correct index. Ideal for CI/CD pipelines and PR review workflows. - **Respects ignore rules** — Honors all `.gitignore` files (root + nested), plus an optional `.socraticodeignore` for additional exclusions. Includes sensible built-in defaults. `.gitignore` processing can be disabled via `RESPECT_GITIGNORE=false`. Dot-directories (e.g. `.agent`) can be included via `INCLUDE_DOT_FILES=true`. -- **Custom file extensions** — Projects with non-standard extensions (e.g. `.tpl`, `.blade`) can be included via `EXTRA_EXTENSIONS` env var or `extraExtensions` tool parameter. Works for both indexing and code graph. To go further and treat a custom extension as a real language (full AST chunking, symbols, call graph) rather than plaintext, map it with `EXTENSION_LANGUAGE_MAP` (e.g. `.inc:php`). +- **Custom file extensions** — Projects with non-standard extensions (e.g. `.tpl`, `.blade`) can be included via `EXTRA_EXTENSIONS` env var or `extraExtensions` tool parameter. Such files are indexed as plaintext and appear as leaf nodes in the code graph (no AST chunking or symbols). To instead treat a custom extension as a real language (full AST chunking, symbols, call graph), map it with `EXTENSION_LANGUAGE_MAP` (e.g. `.inc:php`). - **Configurable infrastructure** — All ports, hosts, and API keys are configurable via environment variables. Qdrant API key support for enterprise deployments. - **Enterprise-ready simplicity** — No agent coordination tuning, no memory limit environment variables, no coordinator/conductor capacity knobs, no backpressure configuration. SocratiCode scales by relying on production-grade infrastructure (Qdrant, proven embedding APIs) rather than complex in-process orchestration. - **Auto-setup & zero configuration** — Just install the Claude Plugin/Skill or add the MCP server to your AI host config. On first use, the server automatically checks Docker, pulls images, starts Qdrant and Ollama containers, and downloads the embedding model. No config files, no YAML, no environment variables to tune, no native dependencies to compile. Works everywhere Docker runs. diff --git a/src/constants.ts b/src/constants.ts index 5138205..bdc2576 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -390,6 +390,10 @@ export function getLanguageFromExtension( ext: string, override: Map = EXTENSION_LANGUAGE_MAP, ): string { - const target = override.get(ext) ?? ext; + // Normalize so a caller passing an uppercase ext still matches the + // (lowercased) override keys; the only case-sensitive built-in key, `.R`, + // collapses to `.r` with the same value, so this changes nothing else. + const normalized = ext.toLowerCase(); + const target = override.get(normalized) ?? normalized; return EXTENSION_TO_LANGUAGE[target] || "plaintext"; } diff --git a/src/services/code-graph.ts b/src/services/code-graph.ts index 415ff77..ef5872c 100644 --- a/src/services/code-graph.ts +++ b/src/services/code-graph.ts @@ -582,7 +582,10 @@ export function getAstGrepLang( ext: string, override: Map = EXTENSION_LANGUAGE_MAP, ): Lang | string | null { - const target = override.get(ext) ?? ext; + // Match getLanguageFromExtension: normalize casing so override lookups (keys + // are stored lowercased) and the grammar stay aligned with the label. + const normalized = ext.toLowerCase(); + const target = override.get(normalized) ?? normalized; return EXTENSION_TO_AST_GREP_LANG[target] ?? null; } diff --git a/tests/unit/constants.test.ts b/tests/unit/constants.test.ts index d7e5a69..3087575 100644 --- a/tests/unit/constants.test.ts +++ b/tests/unit/constants.test.ts @@ -323,6 +323,8 @@ describe("constants", () => { it("resolves an EXTENSION_LANGUAGE_MAP override to the target language label", () => { const override = new Map([[".inc", ".php"]]); expect(getLanguageFromExtension(".inc", override)).toBe("php"); + // Case-insensitive: an uppercase ext still matches the lowercased key. + expect(getLanguageFromExtension(".INC", override)).toBe("php"); // Unmapped extensions are unaffected by the override. expect(getLanguageFromExtension(".xyz", override)).toBe("plaintext"); }); diff --git a/tests/unit/indexer.test.ts b/tests/unit/indexer.test.ts index 6b57172..6cc7546 100644 --- a/tests/unit/indexer.test.ts +++ b/tests/unit/indexer.test.ts @@ -128,16 +128,21 @@ describe("indexer utilities", () => { }); it("accepts extensions registered via EXTENSION_LANGUAGE_MAP", () => { - // isIndexableFile reads the global override map; mutate it directly - // (it is the same Map the function consults) and clean up after. + // isIndexableFile reads the global override map; mutate it directly (it is + // the same Map the function consults) and restore the prior state after, + // so the test stays hermetic even if `.inc` is ever pre-configured. + const baseline = isIndexableFile("foo.class.inc"); + const hadPrevious = EXTENSION_LANGUAGE_MAP.has(".inc"); + const previous = EXTENSION_LANGUAGE_MAP.get(".inc"); EXTENSION_LANGUAGE_MAP.set(".inc", ".php"); try { expect(isIndexableFile("foo.class.inc")).toBe(true); } finally { - EXTENSION_LANGUAGE_MAP.delete(".inc"); + if (hadPrevious) EXTENSION_LANGUAGE_MAP.set(".inc", previous as string); + else EXTENSION_LANGUAGE_MAP.delete(".inc"); } - // Once removed, the extension is no longer indexable. - expect(isIndexableFile("foo.class.inc")).toBe(false); + // Restored to the pre-test baseline. + expect(isIndexableFile("foo.class.inc")).toBe(baseline); }); it("accepts .cfg and .ini extensions", () => { @@ -158,6 +163,8 @@ describe("indexer utilities", () => { it("resolves a mapped extension to the target language's grammar", () => { const override = new Map([[".inc", ".php"]]); expect(getAstGrepLang(".inc", override)).toBe("php"); + // Case-insensitive, matching getLanguageFromExtension. + expect(getAstGrepLang(".INC", override)).toBe("php"); // The vocabulary subtlety: a Lang-enum language resolves correctly too. const tsOverride = new Map([[".component", ".ts"]]); expect(String(getAstGrepLang(".component", tsOverride))).toBe("TypeScript");