diff --git a/README.md b/README.md index 1a8577f..718bed5 100644 --- a/README.md +++ b/README.md @@ -255,7 +255,7 @@ On VS Code's 2.45M‑line codebase, SocratiCode answers architectural questions - **Cross-project search** — Search across multiple related projects in a single query. Link projects via `.socraticode.json` or the `SOCRATICODE_LINKED_PROJECTS` env var, then set `includeLinked: true` on `codebase_search`. Results are tagged with project labels and deduplicated via client-side RRF fusion. - **Branch-aware indexing** — Maintain separate indexes per git branch by setting `SOCRATICODE_BRANCH_AWARE=true`. Each branch gets its own Qdrant collections, so switching branches instantly switches to the correct index. Ideal for CI/CD pipelines and PR review workflows. - **Respects ignore rules** — Honors all `.gitignore` files (root + nested), plus an optional `.socraticodeignore` for additional exclusions. Includes sensible built-in defaults. `.gitignore` processing can be disabled via `RESPECT_GITIGNORE=false`. Dot-directories (e.g. `.agent`) can be included via `INCLUDE_DOT_FILES=true`. -- **Custom file extensions** — Projects with non-standard extensions (e.g. `.tpl`, `.blade`) can be included via `EXTRA_EXTENSIONS` env var or `extraExtensions` tool parameter. Works for both indexing and code graph. +- **Custom file extensions** — Projects with non-standard extensions (e.g. `.tpl`, `.blade`) can be included via `EXTRA_EXTENSIONS` env var or `extraExtensions` tool parameter. Works for both indexing and code graph. To go further and treat a custom extension as a real language (full AST chunking, symbols, call graph) rather than plaintext, map it with `EXTENSION_LANGUAGE_MAP` (e.g. `.inc:php`). - **Configurable infrastructure** — All ports, hosts, and API keys are configurable via environment variables. Qdrant API key support for enterprise deployments. - **Enterprise-ready simplicity** — No agent coordination tuning, no memory limit environment variables, no coordinator/conductor capacity knobs, no backpressure configuration. SocratiCode scales by relying on production-grade infrastructure (Qdrant, proven embedding APIs) rather than complex in-process orchestration. - **Auto-setup & zero configuration** — Just install the Claude Plugin/Skill or add the MCP server to your AI host config. On first use, the server automatically checks Docker, pulls images, starts Qdrant and Ollama containers, and downloads the embedding model. No config files, no YAML, no environment variables to tune, no native dependencies to compile. Works everywhere Docker runs. @@ -1222,6 +1222,7 @@ The rest of this section documents the variables themselves. Pass them using whi | `RESPECT_GITIGNORE` | `true` | Set to `false` to skip `.gitignore` processing. Built-in defaults and `.socraticodeignore` still apply. | | `INCLUDE_DOT_FILES` | `false` | Set to `true` to include dot-directories (e.g. `.agent`, `.config`) in indexing. By default, directories and files starting with `.` are excluded. Useful for projects where important code lives in dot-directories. | | `EXTRA_EXTENSIONS` | *(none)* | Comma-separated list of additional file extensions to scan (e.g. `.tpl,.blade,.hbs`). Applies to both indexing and code graph. Files with extra extensions are indexed as plaintext and appear as leaf nodes in the code graph. Can also be passed per-operation via the `extraExtensions` tool parameter. | +| `EXTENSION_LANGUAGE_MAP` | *(none)* | Comma-separated `extension:language` overrides that make a non-standard extension be treated as a real language end to end (semantic/AST chunking, symbols, call graph), e.g. `EXTENSION_LANGUAGE_MAP=.inc:php,.module:php` for Drupal/PHP. Unlike `EXTRA_EXTENSIONS` (which indexes as plaintext), the mapped extension gets the full language treatment and is auto-discovered without also listing it in `EXTRA_EXTENSIONS`. The target must be a language SocratiCode has an AST grammar for (the Full Support list above plus the AST-graph languages); unknown targets are ignored with a startup warning. Overrides built-in mappings too (e.g. `.h:cpp`). | | `MAX_FILE_SIZE_MB` | `5` | Maximum file size in MB. Files larger than this are skipped during indexing. Increase for repos with large generated or data files you want indexed. | | `SEARCH_DEFAULT_LIMIT` | `10` | Default number of results returned by `codebase_search` (1-50). Each result is a ranked code chunk with file path, line range, and content. Higher values give broader coverage but produce more output. Can still be overridden per-query via the `limit` tool parameter. | | `SEARCH_MIN_SCORE` | `0.10` | Minimum RRF (Reciprocal Rank Fusion) score threshold (0-1). Results below this score are filtered out. Helps remove low-relevance noise from search results. Set to `0` to disable filtering (returns all results up to `limit`). Can be overridden per-query via the `minScore` tool parameter. Works together with `limit`: results are first filtered by score, then capped at `limit`. | diff --git a/src/constants.ts b/src/constants.ts index 07c1713..5138205 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -265,31 +265,131 @@ export const SPECIAL_FILES = new Set([ ]); /** Map file extension to human-readable language name */ -export function getLanguageFromExtension(ext: string): string { - const map: Record = { - ".js": "javascript", ".jsx": "javascript", ".mjs": "javascript", ".cjs": "javascript", - ".ts": "typescript", ".tsx": "typescript", - ".py": "python", ".pyw": "python", ".pyi": "python", - ".java": "java", ".kt": "kotlin", ".kts": "kotlin", ".scala": "scala", - ".c": "c", ".h": "c", ".cpp": "cpp", ".hpp": "cpp", ".cc": "cpp", ".hh": "cpp", ".cxx": "cpp", - ".cs": "csharp", - ".go": "go", - ".rs": "rust", - ".rb": "ruby", - ".php": "php", - ".swift": "swift", - ".sh": "shell", ".bash": "shell", ".zsh": "shell", - ".html": "html", ".htm": "html", - ".css": "css", ".scss": "scss", ".sass": "sass", ".less": "less", - ".vue": "vue", ".svelte": "svelte", - ".json": "json", ".yaml": "yaml", ".yml": "yaml", - ".toml": "toml", ".xml": "xml", - ".md": "markdown", ".mdx": "markdown", ".rst": "rst", - ".sql": "sql", - ".dart": "dart", - ".lua": "lua", - ".r": "r", ".R": "r", - ".dockerfile": "dockerfile", - }; - return map[ext] || "plaintext"; +/** Canonical file-extension → language-label map. */ +const EXTENSION_TO_LANGUAGE: Record = { + ".js": "javascript", ".jsx": "javascript", ".mjs": "javascript", ".cjs": "javascript", + ".ts": "typescript", ".tsx": "typescript", + ".py": "python", ".pyw": "python", ".pyi": "python", + ".java": "java", ".kt": "kotlin", ".kts": "kotlin", ".scala": "scala", + ".c": "c", ".h": "c", ".cpp": "cpp", ".hpp": "cpp", ".cc": "cpp", ".hh": "cpp", ".cxx": "cpp", + ".cs": "csharp", + ".go": "go", + ".rs": "rust", + ".rb": "ruby", + ".php": "php", + ".swift": "swift", + ".sh": "shell", ".bash": "shell", ".zsh": "shell", + ".html": "html", ".htm": "html", + ".css": "css", ".scss": "scss", ".sass": "sass", ".less": "less", + ".vue": "vue", ".svelte": "svelte", + ".json": "json", ".yaml": "yaml", ".yml": "yaml", + ".toml": "toml", ".xml": "xml", + ".md": "markdown", ".mdx": "markdown", ".rst": "rst", + ".sql": "sql", + ".dart": "dart", + ".lua": "lua", + ".r": "r", ".R": "r", + ".dockerfile": "dockerfile", +}; + +/** + * Language name → a canonical extension that already carries that language's + * label and AST grammar in {@link EXTENSION_TO_LANGUAGE} / `getAstGrepLang`. + * Used by EXTENSION_LANGUAGE_MAP so a custom extension inherits BOTH the label + * and the grammar consistently, without us re-encoding the (deliberately + * different) vocabularies of the two maps (`shell` vs `bash`, `Lang` enums for + * JS/TS/HTML/CSS). Only languages with an AST grammar are listed — mapping to + * anything else would give no benefit over EXTRA_EXTENSIONS. + */ +const LANGUAGE_TO_CANONICAL_EXT: Record = { + javascript: ".js", js: ".js", + typescript: ".ts", ts: ".ts", + tsx: ".tsx", + python: ".py", py: ".py", + java: ".java", + kotlin: ".kt", + scala: ".scala", + c: ".c", + cpp: ".cpp", "c++": ".cpp", + csharp: ".cs", "c#": ".cs", + go: ".go", golang: ".go", + rust: ".rs", + ruby: ".rb", + php: ".php", + swift: ".swift", + dart: ".dart", + lua: ".lua", + shell: ".sh", bash: ".sh", sh: ".sh", + html: ".html", + css: ".css", scss: ".scss", sass: ".sass", less: ".less", + vue: ".vue", + svelte: ".svelte", +}; + +/** + * Parse the `EXTENSION_LANGUAGE_MAP` env var (format `.inc:php,.module:php`). + * Returns a map of input extension → canonical extension of the target + * language, plus the list of entries that were rejected (malformed, or a + * target language with no AST grammar) so the caller can warn loudly. + * Extensions are normalized (lowercased, leading dot ensured); a repeated + * extension takes its last value. + */ +export function parseExtensionLanguageMap(value?: string): { + map: Map; + invalid: string[]; +} { + const map = new Map(); + const invalid: string[] = []; + if (!value?.trim()) return { map, invalid }; + + for (const pair of value.split(",")) { + const trimmed = pair.trim(); + if (!trimmed) continue; + const idx = trimmed.indexOf(":"); + // Need a non-empty extension before ":" and a non-empty language after it. + if (idx <= 0 || idx === trimmed.length - 1) { + invalid.push(trimmed); + continue; + } + const rawExt = trimmed.slice(0, idx).trim().toLowerCase(); + const lang = trimmed.slice(idx + 1).trim().toLowerCase(); + if (!rawExt || !lang) { + invalid.push(trimmed); + continue; + } + const ext = rawExt.startsWith(".") ? rawExt : `.${rawExt}`; + const canonical = LANGUAGE_TO_CANONICAL_EXT[lang]; + if (!canonical) { + invalid.push(trimmed); + continue; + } + map.set(ext, canonical); + } + return { map, invalid }; +} + +const _extensionLanguageMap = parseExtensionLanguageMap(process.env.EXTENSION_LANGUAGE_MAP); + +/** + * Custom extension → canonical extension overrides from EXTENSION_LANGUAGE_MAP. + * Consulted by `getLanguageFromExtension`, `getAstGrepLang`, and the indexable + * checks so a mapped extension is treated as its target language end to end + * (label, AST chunking, symbols, call graph, discovery). + */ +export const EXTENSION_LANGUAGE_MAP = _extensionLanguageMap.map; + +/** Entries from EXTENSION_LANGUAGE_MAP that were rejected (warned at startup). */ +export const EXTENSION_LANGUAGE_MAP_INVALID = _extensionLanguageMap.invalid; + +/** + * Map a file extension to a language label. An EXTENSION_LANGUAGE_MAP override + * is resolved through the target language's canonical extension so the label + * matches what a native file of that language would get. + */ +export function getLanguageFromExtension( + ext: string, + override: Map = EXTENSION_LANGUAGE_MAP, +): string { + const target = override.get(ext) ?? ext; + return EXTENSION_TO_LANGUAGE[target] || "plaintext"; } diff --git a/src/index.ts b/src/index.ts index 953bf85..bc35f9f 100644 --- a/src/index.ts +++ b/src/index.ts @@ -33,7 +33,7 @@ import { writeSync } from "node:fs"; import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { z } from "zod"; -import { SOCRATICODE_VERSION } from "./constants.js"; +import { EXTENSION_LANGUAGE_MAP_INVALID, SOCRATICODE_VERSION } from "./constants.js"; import { logger, setMcpLogSender } from "./services/logger.js"; import { autoResumeIndexedProjects, gracefulShutdown } from "./services/startup.js"; import { handleContextTool } from "./tools/context-tools.js"; @@ -472,6 +472,16 @@ async function main() { const transport = new StdioServerTransport(); await server.connect(transport); + // Surface any EXTENSION_LANGUAGE_MAP entries we had to drop (malformed, or a + // target language with no AST grammar) rather than silently ignoring them. + if (EXTENSION_LANGUAGE_MAP_INVALID.length > 0) { + logger.warn( + "EXTENSION_LANGUAGE_MAP: ignored invalid entries. Each must be `:` " + + "with a language SocratiCode has an AST grammar for (e.g. .inc:php).", + { ignored: EXTENSION_LANGUAGE_MAP_INVALID }, + ); + } + // Auto-resume watchers and incremental updates for already-indexed projects // Fire-and-forget — runs in background, non-blocking, non-fatal autoResumeIndexedProjects(); diff --git a/src/services/code-graph.ts b/src/services/code-graph.ts index 8d7cbdd..415ff77 100644 --- a/src/services/code-graph.ts +++ b/src/services/code-graph.ts @@ -6,7 +6,7 @@ import { createRequire } from "node:module"; import path from "node:path"; import { Lang, registerDynamicLanguage } from "@ast-grep/napi"; import { graphCollectionName, projectIdFromPath } from "../config.js"; -import { EXTRA_EXTENSIONS, getLanguageFromExtension, MAX_GRAPH_FILE_BYTES, toForwardSlash } from "../constants.js"; +import { EXTENSION_LANGUAGE_MAP, EXTRA_EXTENSIONS, getLanguageFromExtension, MAX_GRAPH_FILE_BYTES, toForwardSlash } from "../constants.js"; import type { CodeGraph, CodeGraphEdge, CodeGraphNode, SymbolEdge, SymbolGraphFilePayload, SymbolGraphMeta, SymbolNode, SymbolRef, @@ -543,35 +543,47 @@ export function ensureDynamicLanguages(): void { // ── Language mapping for ast-grep ──────────────────────────────────────── /** Map file extensions to ast-grep language identifiers */ -export function getAstGrepLang(ext: string): Lang | string | null { - const map: Record = { - // Dynamic languages (string identifiers) - ".py": "python", ".pyw": "python", ".pyi": "python", - ".java": "java", - ".kt": "kotlin", ".kts": "kotlin", - ".scala": "scala", - ".c": "c", ".h": "c", - ".cpp": "cpp", ".hpp": "cpp", ".cc": "cpp", ".hh": "cpp", ".cxx": "cpp", - ".cs": "csharp", - ".go": "go", - ".rs": "rust", - ".rb": "ruby", - ".php": "php", - ".swift": "swift", - ".dart": "dart", - ".lua": "lua", - ".sh": "bash", ".bash": "bash", ".zsh": "bash", - // Composite languages (parsed via HTML + script re-parse) - ".svelte": "svelte", - ".vue": "vue", - // Built-in languages (Lang enum) - ".js": Lang.JavaScript, ".jsx": Lang.JavaScript, ".mjs": Lang.JavaScript, ".cjs": Lang.JavaScript, - ".ts": Lang.TypeScript, - ".tsx": Lang.Tsx, - ".html": Lang.Html, ".htm": Lang.Html, - ".css": Lang.Css, ".scss": Lang.Css, ".sass": Lang.Css, ".less": Lang.Css, ".styl": Lang.Css, - }; - return map[ext] ?? null; +const EXTENSION_TO_AST_GREP_LANG: Record = { + // Dynamic languages (string identifiers) + ".py": "python", ".pyw": "python", ".pyi": "python", + ".java": "java", + ".kt": "kotlin", ".kts": "kotlin", + ".scala": "scala", + ".c": "c", ".h": "c", + ".cpp": "cpp", ".hpp": "cpp", ".cc": "cpp", ".hh": "cpp", ".cxx": "cpp", + ".cs": "csharp", + ".go": "go", + ".rs": "rust", + ".rb": "ruby", + ".php": "php", + ".swift": "swift", + ".dart": "dart", + ".lua": "lua", + ".sh": "bash", ".bash": "bash", ".zsh": "bash", + // Composite languages (parsed via HTML + script re-parse) + ".svelte": "svelte", + ".vue": "vue", + // Built-in languages (Lang enum) + ".js": Lang.JavaScript, ".jsx": Lang.JavaScript, ".mjs": Lang.JavaScript, ".cjs": Lang.JavaScript, + ".ts": Lang.TypeScript, + ".tsx": Lang.Tsx, + ".html": Lang.Html, ".htm": Lang.Html, + ".css": Lang.Css, ".scss": Lang.Css, ".sass": Lang.Css, ".less": Lang.Css, ".styl": Lang.Css, +}; + +/** + * Map a file extension to its ast-grep grammar (or null when none). An + * EXTENSION_LANGUAGE_MAP override is resolved through the target language's + * canonical extension, so a mapped extension (e.g. `.inc` → php) gets the same + * grammar a native file of that language would, keeping symbol extraction and + * AST chunking consistent with the language label. + */ +export function getAstGrepLang( + ext: string, + override: Map = EXTENSION_LANGUAGE_MAP, +): Lang | string | null { + const target = override.get(ext) ?? ext; + return EXTENSION_TO_AST_GREP_LANG[target] ?? null; } // ── Graph building ─────────────────────────────────────────────────────── diff --git a/src/services/indexer.ts b/src/services/indexer.ts index d5137cb..82805d1 100644 --- a/src/services/indexer.ts +++ b/src/services/indexer.ts @@ -10,6 +10,7 @@ import { collectionName, projectIdFromPath } from "../config.js"; import { CHUNK_OVERLAP, CHUNK_SIZE, + EXTENSION_LANGUAGE_MAP, EXTRA_EXTENSIONS, getLanguageFromExtension, INDEX_BATCH_SIZE, @@ -239,6 +240,9 @@ export function isIndexableFile(fileName: string, extraExts?: Set): bool if (SPECIAL_FILES.has(fileName)) return true; const ext = path.extname(fileName).toLowerCase(); if (SUPPORTED_EXTENSIONS.has(ext)) return true; + // Extensions mapped to a real language via EXTENSION_LANGUAGE_MAP are + // first-class source files, not plaintext extras. + if (EXTENSION_LANGUAGE_MAP.has(ext)) return true; // Check extra extensions (from env var + tool parameter) const extras = extraExts ?? EXTRA_EXTENSIONS; return extras.has(ext); diff --git a/src/services/watcher.ts b/src/services/watcher.ts index 4d98645..f1f744f 100644 --- a/src/services/watcher.ts +++ b/src/services/watcher.ts @@ -4,7 +4,7 @@ import path from "node:path"; import type { AsyncSubscription, Event } from "@parcel/watcher"; import watcher from "@parcel/watcher"; import { collectionName, projectIdFromPath } from "../config.js"; -import { SPECIAL_FILES, SUPPORTED_EXTENSIONS } from "../constants.js"; +import { EXTENSION_LANGUAGE_MAP, SPECIAL_FILES, SUPPORTED_EXTENSIONS } from "../constants.js"; import { invalidateGraphCache } from "./code-graph.js"; import { createIgnoreFilter, shouldIgnore } from "./ignore.js"; import { isIndexingInProgress, updateProjectIndex } from "./indexer.js"; @@ -38,7 +38,9 @@ function isIndexableFile(filePath: string): boolean { const fileName = path.basename(filePath); if (SPECIAL_FILES.has(fileName)) return true; const ext = path.extname(filePath).toLowerCase(); - return SUPPORTED_EXTENSIONS.has(ext); + // EXTENSION_LANGUAGE_MAP extensions are real source files, so edits to them + // must trigger an incremental update like any other supported file. + return SUPPORTED_EXTENSIONS.has(ext) || EXTENSION_LANGUAGE_MAP.has(ext); } /** diff --git a/tests/unit/constants.test.ts b/tests/unit/constants.test.ts index 8656eb7..d7e5a69 100644 --- a/tests/unit/constants.test.ts +++ b/tests/unit/constants.test.ts @@ -14,6 +14,7 @@ import { OLLAMA_HOST, OLLAMA_IMAGE, OLLAMA_PORT, + parseExtensionLanguageMap, parseExtraExtensions, QDRANT_CONTAINER_NAME, QDRANT_GRPC_PORT, @@ -318,6 +319,76 @@ describe("constants", () => { it("maps .php to php", () => { expect(getLanguageFromExtension(".php")).toBe("php"); }); + + it("resolves an EXTENSION_LANGUAGE_MAP override to the target language label", () => { + const override = new Map([[".inc", ".php"]]); + expect(getLanguageFromExtension(".inc", override)).toBe("php"); + // Unmapped extensions are unaffected by the override. + expect(getLanguageFromExtension(".xyz", override)).toBe("plaintext"); + }); + + it("an empty override leaves built-in mappings unchanged", () => { + const empty = new Map(); + expect(getLanguageFromExtension(".inc", empty)).toBe("plaintext"); + expect(getLanguageFromExtension(".ts", empty)).toBe("typescript"); + }); + }); + + describe("parseExtensionLanguageMap", () => { + it("returns empty map and no invalid entries for undefined/empty", () => { + expect(parseExtensionLanguageMap(undefined)).toEqual({ map: new Map(), invalid: [] }); + expect(parseExtensionLanguageMap(" ")).toEqual({ map: new Map(), invalid: [] }); + }); + + it("maps an extension to its target language's canonical extension", () => { + const { map, invalid } = parseExtensionLanguageMap(".inc:php"); + expect(map.get(".inc")).toBe(".php"); + expect(invalid).toEqual([]); + }); + + it("normalizes the extension (lowercases, adds leading dot)", () => { + const { map } = parseExtensionLanguageMap("MODULE:php"); + expect(map.get(".module")).toBe(".php"); + }); + + it("resolves the vocabulary difference via canonical extension", () => { + // `shell` label vs `bash` grammar, and the Lang-enum languages, are all + // reached through the canonical extension, not re-encoded here. + const { map } = parseExtensionLanguageMap(".bashrc:shell,.component:typescript"); + expect(map.get(".bashrc")).toBe(".sh"); + expect(map.get(".component")).toBe(".ts"); + }); + + it("accepts common language aliases", () => { + const { map } = parseExtensionLanguageMap(".a:c++,.b:c#,.c:golang,.d:py"); + expect(map.get(".a")).toBe(".cpp"); + expect(map.get(".b")).toBe(".cs"); + expect(map.get(".c")).toBe(".go"); + expect(map.get(".d")).toBe(".py"); + }); + + it("rejects a target language with no AST grammar (e.g. the reporter's .foo:bar)", () => { + const { map, invalid } = parseExtensionLanguageMap(".foo:bar,.txt:markdown"); + expect(map.size).toBe(0); + expect(invalid).toEqual([".foo:bar", ".txt:markdown"]); + }); + + it("rejects malformed entries (no colon, no extension, no language)", () => { + const { map, invalid } = parseExtensionLanguageMap("badentry,:php,.x:"); + expect(map.size).toBe(0); + expect(invalid).toEqual(["badentry", ":php", ".x:"]); + }); + + it("keeps valid entries while rejecting invalid ones in the same value", () => { + const { map, invalid } = parseExtensionLanguageMap(".inc:php, .foo:bar, .h:cpp"); + expect([...map.entries()]).toEqual([[".inc", ".php"], [".h", ".cpp"]]); + expect(invalid).toEqual([".foo:bar"]); + }); + + it("last value wins for a repeated extension", () => { + const { map } = parseExtensionLanguageMap(".inc:php,.inc:python"); + expect(map.get(".inc")).toBe(".py"); + }); }); describe("parseExtraExtensions", () => { diff --git a/tests/unit/indexer.test.ts b/tests/unit/indexer.test.ts index 005b4eb..6b57172 100644 --- a/tests/unit/indexer.test.ts +++ b/tests/unit/indexer.test.ts @@ -4,8 +4,8 @@ import fs from "node:fs"; import os from "node:os"; import path from "node:path"; import { afterAll, beforeAll, describe, expect, it, vi } from "vitest"; -import { CHUNK_SIZE, MAX_CHUNK_CHARS } from "../../src/constants.js"; -import { ensureDynamicLanguages } from "../../src/services/code-graph.js"; +import { CHUNK_SIZE, EXTENSION_LANGUAGE_MAP, MAX_CHUNK_CHARS } from "../../src/constants.js"; +import { ensureDynamicLanguages, getAstGrepLang } from "../../src/services/code-graph.js"; import { chunkFileContent, chunkId, getIndexableFiles, hashContent, isIndexableFile } from "../../src/services/indexer.js"; // Register dynamic language grammars once for AST-aware chunking tests @@ -127,6 +127,19 @@ describe("indexer utilities", () => { expect(isIndexableFile("image.png", extras)).toBe(false); }); + it("accepts extensions registered via EXTENSION_LANGUAGE_MAP", () => { + // isIndexableFile reads the global override map; mutate it directly + // (it is the same Map the function consults) and clean up after. + EXTENSION_LANGUAGE_MAP.set(".inc", ".php"); + try { + expect(isIndexableFile("foo.class.inc")).toBe(true); + } finally { + EXTENSION_LANGUAGE_MAP.delete(".inc"); + } + // Once removed, the extension is no longer indexable. + expect(isIndexableFile("foo.class.inc")).toBe(false); + }); + it("accepts .cfg and .ini extensions", () => { expect(isIndexableFile("config.cfg")).toBe(true); expect(isIndexableFile("settings.ini")).toBe(true); @@ -139,6 +152,24 @@ describe("indexer utilities", () => { }); }); + // ── getAstGrepLang (EXTENSION_LANGUAGE_MAP override) ─────────── + + describe("getAstGrepLang override", () => { + it("resolves a mapped extension to the target language's grammar", () => { + const override = new Map([[".inc", ".php"]]); + expect(getAstGrepLang(".inc", override)).toBe("php"); + // The vocabulary subtlety: a Lang-enum language resolves correctly too. + const tsOverride = new Map([[".component", ".ts"]]); + expect(String(getAstGrepLang(".component", tsOverride))).toBe("TypeScript"); + }); + + it("returns null for an unmapped extension and leaves built-ins intact", () => { + const override = new Map([[".inc", ".php"]]); + expect(getAstGrepLang(".xyz", override)).toBeNull(); + expect(getAstGrepLang(".php", override)).toBe("php"); + }); + }); + // ── chunkFileContent ───────────────────────────────────────── describe("chunkFileContent", () => {