diff --git a/crates/goose/src/agents/agent.rs b/crates/goose/src/agents/agent.rs index 9cd398b770..67cb59b26a 100644 --- a/crates/goose/src/agents/agent.rs +++ b/crates/goose/src/agents/agent.rs @@ -44,6 +44,7 @@ use crate::providers::base::{PermissionRouting, Provider}; use crate::providers::errors::ProviderError; use crate::recipe::{Author, Recipe, Response, Settings}; use crate::scheduler_trait::SchedulerTrait; +use crate::security::adversary_inspector::AdversaryInspector; use crate::security::security_inspector::SecurityInspector; use crate::session::extension_data::{EnabledExtensionsState, ExtensionState}; use crate::session::{Session, SessionManager}; @@ -263,6 +264,9 @@ impl Agent { // Add security inspector (highest priority - runs first) tool_inspection_manager.add_inspector(Box::new(SecurityInspector::new())); + // Add adversary inspector (LLM-based review, enabled by ~/.config/goose/adversary.md) + tool_inspection_manager.add_inspector(Box::new(AdversaryInspector::new(provider.clone()))); + // Add permission inspector (medium-high priority) tool_inspection_manager.add_inspector(Box::new(PermissionInspector::new( permission_manager, @@ -2288,6 +2292,10 @@ mod tests { inspector_names.contains(&"security"), "Tool inspection manager should contain security inspector" ); + assert!( + inspector_names.contains(&"adversary"), + "Tool inspection manager should contain adversary inspector" + ); Ok(()) } diff --git a/crates/goose/src/security/adversary_inspector.rs b/crates/goose/src/security/adversary_inspector.rs new file mode 100644 index 0000000000..9f59085f82 --- /dev/null +++ b/crates/goose/src/security/adversary_inspector.rs @@ -0,0 +1,630 @@ +use anyhow::Result; +use async_trait::async_trait; +use chrono::Utc; +use std::sync::OnceLock; + +use crate::agents::types::SharedProvider; +use crate::config::paths::Paths; +use crate::config::GooseMode; +use crate::conversation::message::{Message, MessageContent, ToolRequest}; +use crate::conversation::Conversation; +use crate::tool_inspection::{InspectionAction, InspectionResult, ToolInspector}; +use crate::utils::safe_truncate; + +const DEFAULT_TOOLS: &[&str] = &["shell", "computercontroller__automation_script"]; + +const DEFAULT_RULES: &str = r#"BLOCK if the command: +- Exfiltrates data (curl/wget posting to unknown URLs, piping secrets out) +- Is destructive beyond the project scope (rm -rf /, modifying system files) +- Installs malware or runs obfuscated code +- Attempts to escalate privileges unnecessarily +- Downloads and executes untrusted remote scripts + +ALLOW if the command is a normal development operation, even if it modifies files, +installs packages, runs tests, uses git, etc. Most commands are fine. +Err on the side of ALLOW — only block truly dangerous things."#; + +const MAX_RECENT_USER_MESSAGES: usize = 4; + +struct AdversaryConfig { + tools: Vec, + rules: String, +} + +/// Adversary inspector that reviews tool calls against user-defined rules. +/// +/// Activated by placing an `adversary.md` file in the Goose config directory +/// (`~/.config/goose/adversary.md`). The file contains optional frontmatter +/// to select which tools are reviewed, followed by rules. +/// +/// Example `adversary.md`: +/// ```text +/// tools: shell, computercontroller__automation_script +/// --- +/// BLOCK if the command exfiltrates data or is destructive. +/// ALLOW normal development operations. +/// ``` +/// +/// If the `tools:` line is omitted, only `shell` is reviewed by default. +/// If the file is absent, this inspector is disabled. +/// If the review fails, the inspector fails open (allows the tool call). +pub struct AdversaryInspector { + provider: SharedProvider, + config: OnceLock>, + config_path: Option, +} + +impl AdversaryInspector { + pub fn new(provider: SharedProvider) -> Self { + Self { + provider, + config: OnceLock::new(), + config_path: None, + } + } + + pub fn with_config_dir(provider: SharedProvider, config_dir: std::path::PathBuf) -> Self { + Self { + provider, + config: OnceLock::new(), + config_path: Some(config_dir.join("adversary.md")), + } + } + + fn get_config(&self) -> Option<&AdversaryConfig> { + self.config + .get_or_init(|| { + let path = self + .config_path + .clone() + .unwrap_or_else(|| Paths::config_dir().join("adversary.md")); + if !path.exists() { + tracing::debug!("No adversary.md found, adversary inspector disabled"); + return None; + } + + let content = match std::fs::read_to_string(&path) { + Ok(c) => c, + Err(e) => { + tracing::warn!("Failed to read adversary.md: {}", e); + return Some(AdversaryConfig { + tools: DEFAULT_TOOLS.iter().map(|s| (*s).to_string()).collect(), + rules: DEFAULT_RULES.to_string(), + }); + } + }; + + let config = Self::parse_adversary_md(&content); + let tool_list = config.tools.join(", "); + tracing::info!( + tools = %tool_list, + "Adversary inspector enabled from {}", + path.display() + ); + Some(config) + }) + .as_ref() + } + + /// Parse adversary.md content, extracting optional `tools:` frontmatter. + /// + /// Format: + /// ```text + /// tools: shell, computercontroller__automation_script + /// --- + /// BLOCK if ... + /// ``` + /// + /// If no `tools:` line or `---` separator, the entire content is rules + /// and tools defaults to `["shell"]`. + fn parse_adversary_md(content: &str) -> AdversaryConfig { + let trimmed = content.trim(); + if trimmed.is_empty() { + return AdversaryConfig { + tools: DEFAULT_TOOLS.iter().map(|s| (*s).to_string()).collect(), + rules: DEFAULT_RULES.to_string(), + }; + } + + // Look for frontmatter: lines before a `---` separator + if let Some((frontmatter, rest)) = trimmed.split_once("\n---") { + let rules = rest.trim(); + + let mut tools: Option> = None; + for line in frontmatter.lines() { + let line = line.trim(); + if let Some(value) = line.strip_prefix("tools:") { + tools = Some( + value + .split(',') + .map(|t| t.trim().to_string()) + .filter(|t| !t.is_empty()) + .collect(), + ); + } + } + + let rules = if rules.is_empty() { + DEFAULT_RULES.to_string() + } else { + rules.to_string() + }; + + AdversaryConfig { + tools: tools + .unwrap_or_else(|| DEFAULT_TOOLS.iter().map(|s| (*s).to_string()).collect()), + rules, + } + } else { + // No frontmatter — entire content is rules + AdversaryConfig { + tools: DEFAULT_TOOLS.iter().map(|s| (*s).to_string()).collect(), + rules: trimmed.to_string(), + } + } + } + + fn should_review(config: &AdversaryConfig, tool_request: &ToolRequest) -> bool { + let tool_name = match &tool_request.tool_call { + Ok(tc) => tc.name.as_ref(), + Err(_) => return false, + }; + config.tools.iter().any(|t| t == tool_name) + } + + fn format_tool_call(tool_request: &ToolRequest) -> String { + match &tool_request.tool_call { + Ok(tc) => { + let mut s = format!("Tool: {}", tc.name); + if let Some(args) = &tc.arguments { + if let Some(cmd) = args.get("command").and_then(|v| v.as_str()) { + s = format!("Tool: {} — command: {}", tc.name, cmd); + } else if let Ok(json) = serde_json::to_string_pretty(args) { + s.push_str("\nArguments: "); + s.push_str(&json); + } + } + s + } + Err(e) => format!("(malformed tool call: {})", e), + } + } + + fn extract_recent_user_messages(messages: &[Message], count: usize) -> Vec { + messages + .iter() + .rev() + .filter(|m| m.role == rmcp::model::Role::User) + .filter_map(|m| { + let text: String = m + .content + .iter() + .filter_map(|c| match c { + MessageContent::Text(t) => Some(t.text.clone()), + _ => None, + }) + .collect::>() + .join("\n"); + if text.is_empty() { + None + } else { + Some(text) + } + }) + .take(count) + .collect::>() + .into_iter() + .rev() + .collect() + } + + fn extract_original_task(messages: &[Message]) -> String { + for msg in messages { + if msg.role == rmcp::model::Role::User { + let text: String = msg + .content + .iter() + .filter_map(|c| match c { + MessageContent::Text(t) => Some(t.text.clone()), + _ => None, + }) + .collect::>() + .join("\n"); + if !text.is_empty() { + return safe_truncate(&text, 500); + } + } + } + "(unknown)".to_string() + } + + async fn consult_llm( + &self, + tool_description: &str, + original_task: &str, + recent_messages: &[String], + rules: &str, + ) -> Result<(bool, String)> { + let provider_guard = self.provider.lock().await; + let provider = match provider_guard.clone() { + Some(p) => p, + None => return Ok((true, "No provider available".to_string())), + }; + drop(provider_guard); + + let history_section = if !recent_messages.is_empty() { + let mut s = String::from("Recent user messages (oldest first):\n"); + for (i, msg) in recent_messages.iter().enumerate() { + let truncated = safe_truncate(msg, 200); + s.push_str(&format!("{}. {}\n", i + 1, truncated)); + } + s.push('\n'); + s + } else { + String::new() + }; + + let user_message = format!( + "The user's original task: {}\n\n\ + {}\ + The tool call to review:\n\ + ```\n\ + {}\n\ + ```\n\n\ + {}\n\n\ + Respond with a single word on the first line: ALLOW or BLOCK\n\ + Then on the next line, a brief reason.", + original_task, history_section, tool_description, rules + ); + + let system_prompt = "You are an adversarial security reviewer, protecting the user \ + in case the other agent is rogue. An AI coding agent is about to execute a tool \ + call. Your ONLY job: decide if this tool call is safe given the user's task and \ + rules. Respond with ALLOW or BLOCK on the first line, then a brief reason on \ + the next line."; + + let check_messages = vec![Message::new( + rmcp::model::Role::User, + Utc::now().timestamp(), + vec![MessageContent::text(user_message)], + )]; + let conversation = Conversation::new_unvalidated(check_messages); + + let model_config = provider.get_model_config(); + let (response, _usage) = provider + .complete( + &model_config, + "", + system_prompt, + conversation.messages(), + &[], + ) + .await + .map_err(|e| anyhow::anyhow!("Adversary LLM call failed: {}", e))?; + + let output: String = response + .content + .iter() + .filter_map(|c| match c { + MessageContent::Text(t) => Some(t.text.clone()), + _ => None, + }) + .collect::>() + .join("\n"); + + let output = output.trim(); + let upper = output.to_uppercase(); + + if upper.starts_with("BLOCK") || upper.contains("\nBLOCK") { + let reason = output + .lines() + .skip(1) + .collect::>() + .join(" ") + .trim() + .to_string(); + let reason = if reason.is_empty() { + "Blocked by adversary".to_string() + } else { + reason + }; + Ok((false, reason)) + } else { + let reason = output + .lines() + .skip(1) + .collect::>() + .join(" ") + .trim() + .to_string(); + Ok((true, reason)) + } + } +} + +#[async_trait] +impl ToolInspector for AdversaryInspector { + fn name(&self) -> &'static str { + "adversary" + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn is_enabled(&self) -> bool { + self.get_config().is_some() + } + + async fn inspect( + &self, + _session_id: &str, + tool_requests: &[ToolRequest], + messages: &[Message], + _goose_mode: GooseMode, + ) -> Result> { + let config = match self.get_config() { + Some(c) => c, + None => return Ok(vec![]), + }; + + let original_task = Self::extract_original_task(messages); + let recent_messages = + Self::extract_recent_user_messages(messages, MAX_RECENT_USER_MESSAGES); + + let mut results = Vec::new(); + + for request in tool_requests { + if !Self::should_review(config, request) { + continue; + } + + let tool_description = Self::format_tool_call(request); + + tracing::debug!( + tool_request_id = %request.id, + "Adversary inspector reviewing tool call" + ); + + match self + .consult_llm( + &tool_description, + &original_task, + &recent_messages, + &config.rules, + ) + .await + { + Ok((true, reason)) => { + tracing::debug!( + tool_request_id = %request.id, + reason = %reason, + "Adversary: ALLOW" + ); + results.push(InspectionResult { + tool_request_id: request.id.clone(), + action: InspectionAction::Allow, + reason: format!("Adversary: {}", reason), + confidence: 1.0, + inspector_name: self.name().to_string(), + finding_id: None, + }); + } + Ok((false, reason)) => { + tracing::warn!( + tool_request_id = %request.id, + reason = %reason, + "Adversary: BLOCK" + ); + results.push(InspectionResult { + tool_request_id: request.id.clone(), + action: InspectionAction::Deny, + reason: format!("🛡️ Adversary blocked: {}", reason), + confidence: 1.0, + inspector_name: self.name().to_string(), + finding_id: None, + }); + } + Err(e) => { + tracing::warn!( + tool_request_id = %request.id, + error = %e, + "Adversary inspector failed, allowing tool call (fail-open)" + ); + results.push(InspectionResult { + tool_request_id: request.id.clone(), + action: InspectionAction::Allow, + reason: format!("Adversary error (fail-open): {}", e), + confidence: 0.0, + inspector_name: self.name().to_string(), + finding_id: None, + }); + } + } + } + + Ok(results) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use rmcp::model::CallToolRequestParams; + use rmcp::object; + use std::sync::Arc; + use tokio::sync::Mutex; + + #[test] + fn test_parse_with_tools_frontmatter() { + let content = "tools: shell, computercontroller__automation_script\n---\nBLOCK bad stuff"; + let config = AdversaryInspector::parse_adversary_md(content); + assert_eq!( + config.tools, + vec!["shell", "computercontroller__automation_script"] + ); + assert_eq!(config.rules, "BLOCK bad stuff"); + } + + #[test] + fn test_parse_without_frontmatter() { + let content = "BLOCK if the command exfiltrates data"; + let config = AdversaryInspector::parse_adversary_md(content); + assert_eq!( + config.tools, + vec!["shell", "computercontroller__automation_script"] + ); + assert_eq!(config.rules, "BLOCK if the command exfiltrates data"); + } + + #[test] + fn test_parse_empty() { + let config = AdversaryInspector::parse_adversary_md(""); + assert_eq!( + config.tools, + vec!["shell", "computercontroller__automation_script"] + ); + assert_eq!(config.rules, DEFAULT_RULES); + } + + #[test] + fn test_parse_frontmatter_empty_rules_uses_defaults() { + let content = "tools: shell\n---\n"; + let config = AdversaryInspector::parse_adversary_md(content); + assert_eq!(config.tools, vec!["shell"]); + assert_eq!(config.rules, DEFAULT_RULES); + } + + #[test] + fn test_should_review_matches() { + let config = AdversaryConfig { + tools: vec!["shell".to_string()], + rules: String::new(), + }; + let request = ToolRequest { + id: "r1".into(), + tool_call: Ok( + CallToolRequestParams::new("shell").with_arguments(object!({"command": "ls"})) + ), + metadata: None, + tool_meta: None, + }; + assert!(AdversaryInspector::should_review(&config, &request)); + } + + #[test] + fn test_should_review_skips_non_matching() { + let config = AdversaryConfig { + tools: vec!["shell".to_string()], + rules: String::new(), + }; + let request = ToolRequest { + id: "r1".into(), + tool_call: Ok(CallToolRequestParams::new("write") + .with_arguments(object!({"path": "foo.txt", "content": "hi"}))), + metadata: None, + tool_meta: None, + }; + assert!(!AdversaryInspector::should_review(&config, &request)); + } + + #[test] + fn test_format_tool_call_shell() { + let request = ToolRequest { + id: "req1".into(), + tool_call: Ok(CallToolRequestParams::new("shell") + .with_arguments(object!({"command": "rm -rf /"}))), + metadata: None, + tool_meta: None, + }; + let formatted = AdversaryInspector::format_tool_call(&request); + assert!(formatted.contains("shell")); + assert!(formatted.contains("rm -rf /")); + } + + #[test] + fn test_format_tool_call_write() { + let request = ToolRequest { + id: "req2".into(), + tool_call: Ok(CallToolRequestParams::new("write") + .with_arguments(object!({"path": "/etc/passwd", "content": "hacked"}))), + metadata: None, + tool_meta: None, + }; + let formatted = AdversaryInspector::format_tool_call(&request); + assert!(formatted.contains("write")); + assert!(formatted.contains("/etc/passwd")); + } + + #[test] + fn test_extract_original_task() { + let messages = vec![ + Message::new( + rmcp::model::Role::User, + Utc::now().timestamp(), + vec![MessageContent::text("Refactor the auth module")], + ), + Message::new( + rmcp::model::Role::Assistant, + Utc::now().timestamp(), + vec![MessageContent::text("Sure, I'll start by...")], + ), + ]; + let task = AdversaryInspector::extract_original_task(&messages); + assert_eq!(task, "Refactor the auth module"); + } + + #[test] + fn test_extract_recent_user_messages() { + let messages = vec![ + Message::new( + rmcp::model::Role::User, + Utc::now().timestamp(), + vec![MessageContent::text("First message")], + ), + Message::new( + rmcp::model::Role::Assistant, + Utc::now().timestamp(), + vec![MessageContent::text("Response")], + ), + Message::new( + rmcp::model::Role::User, + Utc::now().timestamp(), + vec![MessageContent::text("Second message")], + ), + Message::new( + rmcp::model::Role::User, + Utc::now().timestamp(), + vec![MessageContent::text("Third message")], + ), + ]; + let recent = AdversaryInspector::extract_recent_user_messages(&messages, 2); + assert_eq!(recent.len(), 2); + assert_eq!(recent[0], "Second message"); + assert_eq!(recent[1], "Third message"); + } + + #[tokio::test] + async fn test_disabled_when_no_adversary_md() { + let tmp = tempfile::tempdir().unwrap(); + + let provider: SharedProvider = Arc::new(Mutex::new(None)); + let inspector = AdversaryInspector::with_config_dir(provider, tmp.path().to_path_buf()); + assert!(!inspector.is_enabled()); + + let request = ToolRequest { + id: "req1".into(), + tool_call: Ok( + CallToolRequestParams::new("shell").with_arguments(object!({"command": "ls"})) + ), + metadata: None, + tool_meta: None, + }; + + let results = inspector + .inspect("test", &[request], &[], GooseMode::Auto) + .await + .unwrap(); + assert!(results.is_empty()); + } +} diff --git a/crates/goose/src/security/mod.rs b/crates/goose/src/security/mod.rs index 3c771f0326..098a4f6358 100644 --- a/crates/goose/src/security/mod.rs +++ b/crates/goose/src/security/mod.rs @@ -1,3 +1,4 @@ +pub mod adversary_inspector; pub mod classification_client; pub mod patterns; pub mod scanner; diff --git a/crates/goose/tests/adversary_inspector_tests.rs b/crates/goose/tests/adversary_inspector_tests.rs new file mode 100644 index 0000000000..cd11bd563a --- /dev/null +++ b/crates/goose/tests/adversary_inspector_tests.rs @@ -0,0 +1,172 @@ +use goose::config::GooseMode; +use goose::conversation::message::{Message, MessageContent, ToolRequest}; +use goose::security::adversary_inspector::AdversaryInspector; +use goose::tool_inspection::ToolInspector; +use rmcp::model::CallToolRequestParams; +use rmcp::object; +use std::sync::Arc; +use tokio::sync::Mutex; + +fn make_request( + id: &str, + tool: &str, + args: serde_json::Map, +) -> ToolRequest { + ToolRequest { + id: id.into(), + tool_call: Ok(CallToolRequestParams::new(tool.to_string()).with_arguments(args)), + metadata: None, + tool_meta: None, + } +} + +fn write_adversary_md(dir: &std::path::Path, content: &str) { + std::fs::create_dir_all(dir).unwrap(); + std::fs::write(dir.join("adversary.md"), content).unwrap(); +} + +#[tokio::test] +async fn test_adversary_disabled_without_config_file() { + let tmp = tempfile::tempdir().unwrap(); + + let provider = Arc::new(Mutex::new(None)); + let inspector = AdversaryInspector::with_config_dir(provider, tmp.path().to_path_buf()); + + assert_eq!(inspector.name(), "adversary"); + assert!(!inspector.is_enabled()); + + let results = inspector + .inspect( + "test-session", + &[make_request( + "r1", + "shell", + object!({"command": "rm -rf /"}), + )], + &[], + GooseMode::SmartApprove, + ) + .await + .unwrap(); + + assert!(results.is_empty()); +} + +#[tokio::test] +async fn test_adversary_enabled_default_tools() { + let tmp = tempfile::tempdir().unwrap(); + write_adversary_md(tmp.path(), "BLOCK everything for testing"); + + let provider = Arc::new(Mutex::new(None)); + let inspector = AdversaryInspector::with_config_dir(provider, tmp.path().to_path_buf()); + + assert!(inspector.is_enabled()); + + let messages = vec![Message::new( + rmcp::model::Role::User, + chrono::Utc::now().timestamp(), + vec![MessageContent::text("build the project")], + )]; + + // shell is reviewed by default — no provider means fail-open (Allow) + let results = inspector + .inspect( + "test-session", + &[make_request( + "r1", + "shell", + object!({"command": "cargo build"}), + )], + &messages, + GooseMode::SmartApprove, + ) + .await + .unwrap(); + + assert_eq!(results.len(), 1); + assert!(matches!( + results[0].action, + goose::tool_inspection::InspectionAction::Allow + )); + + // write is NOT reviewed by default — skipped entirely + let results = inspector + .inspect( + "test-session", + &[make_request( + "r1", + "write", + object!({"path": "foo.txt", "content": "hi"}), + )], + &messages, + GooseMode::SmartApprove, + ) + .await + .unwrap(); + + assert!(results.is_empty()); +} + +#[tokio::test] +async fn test_adversary_custom_tool_filter() { + let tmp = tempfile::tempdir().unwrap(); + write_adversary_md( + tmp.path(), + "tools: shell, computercontroller__automation_script\n---\nBLOCK bad stuff", + ); + + let provider = Arc::new(Mutex::new(None)); + let inspector = AdversaryInspector::with_config_dir(provider, tmp.path().to_path_buf()); + + assert!(inspector.is_enabled()); + + let messages = vec![Message::new( + rmcp::model::Role::User, + chrono::Utc::now().timestamp(), + vec![MessageContent::text("do something")], + )]; + + // shell — reviewed + let results = inspector + .inspect( + "test", + &[make_request("r1", "shell", object!({"command": "ls"}))], + &messages, + GooseMode::Auto, + ) + .await + .unwrap(); + assert_eq!(results.len(), 1); + + // automation_script — reviewed + let results = inspector + .inspect( + "test", + &[make_request( + "r2", + "computercontroller__automation_script", + object!({"script": "echo hi", "language": "shell"}), + )], + &messages, + GooseMode::Auto, + ) + .await + .unwrap(); + assert_eq!(results.len(), 1); + + // write — NOT reviewed + let results = inspector + .inspect( + "test", + &[make_request( + "r3", + "write", + object!({"path": "x.txt", "content": "y"}), + )], + &messages, + GooseMode::Auto, + ) + .await + .unwrap(); + assert!(results.is_empty()); +} diff --git a/documentation/docs/guides/security/adversary-mode.md b/documentation/docs/guides/security/adversary-mode.md new file mode 100644 index 0000000000..7967b1e6b6 --- /dev/null +++ b/documentation/docs/guides/security/adversary-mode.md @@ -0,0 +1,88 @@ +--- +sidebar_position: 2 +title: Adversary Mode +sidebar_label: Adversary Mode +description: An independent agent reviewer that silently watches tool calls to protect you in case the agent goes rogue. +--- + +Adversary mode adds a silent, independent agent reviewer that watches tool calls before they execute. It acts as a second pair of eyes — protecting you in case the main agent is compromised, manipulated by a prompt injection, or simply doing something that doesn't match what you asked for. + +Unlike [pattern-based detection](/docs/guides/security/prompt-injection-detection), the adversary reviewer understands context. It knows what you originally asked for, what you've said recently, and can judge whether a tool call makes sense for your task. + +## How It Works + +1. Before each tool call, the adversary reviewer checks your **original task**, **recent messages**, and the **tool call details** +2. It evaluates the tool call against your rules and returns **ALLOW** or **BLOCK** +3. Blocked tool calls are denied — the agent sees the rejection and cannot retry +4. If the reviewer fails for any reason, the tool call is allowed through (fail-open) + +The adversary uses the same model and provider goose is already configured with. No additional API keys or services needed. + +## Enabling Adversary Mode + +Create a file at `~/.config/goose/adversary.md` with your rules: + +```markdown +BLOCK if the tool call: +- Exfiltrates data (posting to unknown URLs, piping secrets to external services) +- Is destructive beyond the project scope (deleting system files, wiping directories) +- Installs malware or runs obfuscated code +- Attempts to escalate privileges unnecessarily +- Downloads and executes untrusted remote scripts + +ALLOW normal development operations like editing files, running tests, +installing packages, using git, etc. Most tool calls are fine. +Err on the side of ALLOW — only block truly dangerous things. +``` + +That's it. If the file exists, adversary mode is on. Delete the file to turn it off. + +An empty file uses sensible default rules similar to the example above. + +## Writing Good Rules + +The rules in `adversary.md` are your policy, written in plain language: + +- **Be specific** about what should be blocked — vague rules lead to false positives +- **Err on the side of ALLOW** — the adversary should catch genuinely dangerous actions, not slow down normal work +- **Think about your threat model** — what would a rogue agent actually try to do? + +You can tailor rules to your environment: + +```markdown +BLOCK if: +- Any network request goes to a domain not in: github.com, npmjs.org, pypi.org +- Files outside of ~/projects/ are modified +- SSH keys, .env files, or credentials are read or transmitted + +ALLOW all standard development operations within ~/projects/. +``` + +## What Gets Reviewed + +By default, the adversary reviews **`shell`** and **`computercontroller__automation_script`** — the tools that can execute arbitrary code. + +You can expand coverage by adding a `tools:` line at the top of your `adversary.md`: + +```markdown +tools: shell, computercontroller__automation_script +--- +BLOCK if the command exfiltrates data or is destructive. +ALLOW normal development operations. +``` + +The `tools:` line is a comma-separated list of tool names to review. Everything before the `---` separator is configuration; everything after is your rules. If you omit the `tools:` line, `shell` and `computercontroller__automation_script` are reviewed by default. + +Some tool names you might want to add: + +| Tool name | What it does | +|-----------|-------------| +| `shell` | Executes shell commands (default) | +| `computercontroller__automation_script` | Runs shell, Ruby, AppleScript, or PowerShell scripts (default) | +| `computercontroller__computer_control` | UI automation — clicks, keystrokes, typing | +| `computercontroller__web_scrape` | Fetches arbitrary URLs | + +## See Also + +- [Prompt Injection Detection](/docs/guides/security/prompt-injection-detection) — pattern-based detection (complementary, always-on when enabled) +- [goose Permission Modes](/docs/guides/goose-permissions) — control goose's autonomy level diff --git a/documentation/docs/guides/security/index.mdx b/documentation/docs/guides/security/index.mdx index 13e83a0da1..917d49992e 100644 --- a/documentation/docs/guides/security/index.mdx +++ b/documentation/docs/guides/security/index.mdx @@ -15,6 +15,11 @@ import styles from '@site/src/components/Card/styles.module.css';

📚 Documentation & Guides

+