mirror of
https://github.com/block/goose.git
synced 2026-06-02 06:19:33 +02:00
fix: prevent tool-use marker leakage in toolshim output (#8310)
Signed-off-by: Eugenio La Cava <eugeniolcv@gmail.com> Signed-off-by: Michael Neale <michael.neale@gmail.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Co-authored-by: Michael Neale <michael.neale@gmail.com>
This commit is contained in:
@@ -64,6 +64,12 @@ do_not_version/
|
||||
|
||||
/working_dir
|
||||
|
||||
# Local build scripts and generated snapshot artifacts
|
||||
/build.bat
|
||||
/build.ps1
|
||||
/build_check.ps1
|
||||
/crates/goose/src/agents/snapshots/*.snap.new
|
||||
|
||||
# Error log artifacts from mcp replay tests
|
||||
crates/goose/tests/mcp_replays/*errors.txt
|
||||
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
use anyhow::Result;
|
||||
use goose_cli::cli::cli;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
async fn run() -> Result<()> {
|
||||
if let Err(e) = goose_cli::logging::setup_logging(None) {
|
||||
eprintln!("Warning: Failed to initialize logging: {}", e);
|
||||
}
|
||||
@@ -17,3 +16,21 @@ async fn main() -> Result<()> {
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let handle = std::thread::Builder::new()
|
||||
.name("goose-cli-main".to_string())
|
||||
.stack_size(8 * 1024 * 1024)
|
||||
.spawn(|| {
|
||||
let runtime = tokio::runtime::Builder::new_multi_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
.expect("Failed to build Tokio runtime");
|
||||
runtime.block_on(run())
|
||||
})
|
||||
.map_err(|e| anyhow::anyhow!("Failed to spawn goose-cli main thread: {}", e))?;
|
||||
|
||||
handle
|
||||
.join()
|
||||
.map_err(|_| anyhow::anyhow!("goose-cli main thread panicked"))?
|
||||
}
|
||||
|
||||
@@ -17,10 +17,11 @@ use crate::providers::base::stream_from_single_message;
|
||||
use crate::providers::base::{MessageStream, Provider, ProviderUsage};
|
||||
use crate::providers::errors::ProviderError;
|
||||
use crate::providers::toolshim::{
|
||||
augment_message_with_tool_calls, convert_tool_messages_to_text,
|
||||
modify_system_prompt_for_tool_json, OllamaInterpreter,
|
||||
augment_message_with_selected_tool_interpreter, convert_tool_messages_to_text,
|
||||
modify_system_prompt_for_tool_json, sanitize_residual_markers,
|
||||
};
|
||||
use rmcp::model::Tool;
|
||||
use tracing::warn;
|
||||
|
||||
async fn enhance_model_error(error: ProviderError, provider: &Arc<dyn Provider>) -> ProviderError {
|
||||
let ProviderError::RequestFailed(ref msg) = error else {
|
||||
@@ -123,13 +124,16 @@ async fn toolshim_postprocess(
|
||||
response: Message,
|
||||
toolshim_tools: &[Tool],
|
||||
) -> Result<Message, ProviderError> {
|
||||
let interpreter = OllamaInterpreter::new().map_err(|e| {
|
||||
ProviderError::ExecutionError(format!("Failed to create OllamaInterpreter: {}", e))
|
||||
})?;
|
||||
|
||||
augment_message_with_tool_calls(&interpreter, response, toolshim_tools)
|
||||
.await
|
||||
.map_err(|e| ProviderError::ExecutionError(format!("Failed to augment message: {}", e)))
|
||||
match augment_message_with_selected_tool_interpreter(response.clone(), toolshim_tools).await {
|
||||
Ok(message) => Ok(message),
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"Toolshim augmentation failed, skipping tool augmentation: {}",
|
||||
e
|
||||
);
|
||||
Ok(sanitize_residual_markers(response))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Agent {
|
||||
@@ -302,20 +306,67 @@ impl Agent {
|
||||
};
|
||||
|
||||
Ok(Box::pin(try_stream! {
|
||||
while let Some(result) = stream.next().await {
|
||||
let (mut message, usage) = result?;
|
||||
if config.toolshim {
|
||||
// Toolshim mode: accumulate the full response before processing
|
||||
// so that tool-use markers spanning multiple chunks are detected
|
||||
// and stripped before any output reaches the UI.
|
||||
let mut accumulated_message: Option<Message> = None;
|
||||
let mut final_usage: Option<ProviderUsage> = None;
|
||||
|
||||
// Store the model information in the global store
|
||||
if let Some(usage) = usage.as_ref() {
|
||||
crate::providers::base::set_current_model(&usage.model);
|
||||
while let Some(result) = stream.next().await {
|
||||
let (msg_opt, usage_opt) = result?;
|
||||
|
||||
if let Some(usage) = usage_opt.as_ref() {
|
||||
crate::providers::base::set_current_model(&usage.model);
|
||||
}
|
||||
|
||||
if let Some(msg) = msg_opt {
|
||||
accumulated_message = Some(match accumulated_message {
|
||||
Some(mut prev) => {
|
||||
for new_content in msg.content {
|
||||
match (&mut prev.content.last_mut(), &new_content) {
|
||||
(
|
||||
Some(MessageContent::Text(last_text)),
|
||||
MessageContent::Text(new_text),
|
||||
) => {
|
||||
last_text.text.push_str(&new_text.text);
|
||||
}
|
||||
_ => {
|
||||
prev.content.push(new_content);
|
||||
}
|
||||
}
|
||||
}
|
||||
prev
|
||||
}
|
||||
None => msg,
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(usage) = usage_opt {
|
||||
final_usage = Some(usage);
|
||||
}
|
||||
|
||||
// Yield empty item so the agent loop can check cancellation
|
||||
yield (None, None);
|
||||
}
|
||||
|
||||
// Post-process / structure the response only if tool interpretation is enabled
|
||||
if message.is_some() && config.toolshim {
|
||||
message = Some(toolshim_postprocess(message.unwrap(), &toolshim_tools).await?);
|
||||
if let Some(msg) = accumulated_message {
|
||||
let processed = toolshim_postprocess(msg, &toolshim_tools).await?;
|
||||
yield (Some(processed), final_usage);
|
||||
} else if final_usage.is_some() {
|
||||
// Preserve usage-only responses (no message content)
|
||||
yield (None, final_usage);
|
||||
}
|
||||
} else {
|
||||
while let Some(result) = stream.next().await {
|
||||
let (message, usage) = result?;
|
||||
|
||||
yield (message, usage);
|
||||
if let Some(usage) = usage.as_ref() {
|
||||
crate::providers::base::set_current_model(&usage.model);
|
||||
}
|
||||
|
||||
yield (message, usage);
|
||||
}
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -479,7 +479,7 @@ impl Drop for RequestLog {
|
||||
|
||||
/// Safely parse a JSON string that may contain doubly-encoded or malformed JSON.
|
||||
/// This function first attempts to parse the input string as-is. If that fails,
|
||||
/// it applies control character escaping and tries again.
|
||||
/// it applies control character escaping and truncated JSON repair and tries again.
|
||||
///
|
||||
/// This approach preserves valid JSON like `{"key1": "value1",\n"key2": "value"}`
|
||||
/// (which contains a literal \n but is perfectly valid JSON) while still fixing
|
||||
@@ -490,13 +490,71 @@ pub fn safely_parse_json(s: &str) -> Result<serde_json::Value, serde_json::Error
|
||||
match serde_json::from_str(s) {
|
||||
Ok(value) => Ok(value),
|
||||
Err(_) => {
|
||||
// If that fails, try with control character escaping
|
||||
let escaped = json_escape_control_chars_in_string(s);
|
||||
serde_json::from_str(&escaped)
|
||||
for candidate in [
|
||||
repair_truncated_json(s),
|
||||
json_escape_control_chars_in_string(s),
|
||||
] {
|
||||
if let Ok(value) = serde_json::from_str(&candidate) {
|
||||
return Ok(value);
|
||||
}
|
||||
}
|
||||
|
||||
let repaired = repair_truncated_json(&json_escape_control_chars_in_string(s));
|
||||
serde_json::from_str(&repaired)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn repair_truncated_json(s: &str) -> String {
|
||||
let mut repaired = String::with_capacity(s.len() + 8);
|
||||
let mut in_string = false;
|
||||
let mut escape_next = false;
|
||||
let mut closers = Vec::new();
|
||||
|
||||
for c in s.chars() {
|
||||
repaired.push(c);
|
||||
|
||||
if in_string {
|
||||
if escape_next {
|
||||
escape_next = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
match c {
|
||||
'\\' => escape_next = true,
|
||||
'"' => in_string = false,
|
||||
_ => {}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
match c {
|
||||
'"' => in_string = true,
|
||||
'{' => closers.push('}'),
|
||||
'[' => closers.push(']'),
|
||||
'}' | ']' => {
|
||||
if closers.last() == Some(&c) {
|
||||
closers.pop();
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
if in_string {
|
||||
if escape_next {
|
||||
repaired.push('\\');
|
||||
}
|
||||
repaired.push('"');
|
||||
}
|
||||
|
||||
while let Some(closer) = closers.pop() {
|
||||
repaired.push(closer);
|
||||
}
|
||||
|
||||
repaired
|
||||
}
|
||||
|
||||
/// Helper to escape control characters in a string that is supposed to be a JSON document.
|
||||
/// This function iterates through the input string `s` and replaces any literal
|
||||
/// control characters (U+0000 to U+001F) with their JSON-escaped equivalents
|
||||
@@ -809,9 +867,16 @@ mod tests {
|
||||
let result = safely_parse_json(good_json).unwrap();
|
||||
assert_eq!(result["test"], "value");
|
||||
|
||||
// Test completely invalid JSON that can't be fixed
|
||||
let broken_json = r#"{"key": "unclosed_string"#;
|
||||
assert!(safely_parse_json(broken_json).is_err());
|
||||
// Test truncated JSON with unclosed string, object, and array
|
||||
let truncated_json = r#"{"key": "unclosed_string","nested": {"items": [1, 2, 3"#;
|
||||
let result = safely_parse_json(truncated_json).unwrap();
|
||||
assert_eq!(result["key"], "unclosed_string");
|
||||
assert_eq!(result["nested"]["items"], json!([1, 2, 3]));
|
||||
|
||||
// Test dangling backslash at end of a truncated string
|
||||
let dangling_escape_json = String::from(r#"{"path":"abc\"#);
|
||||
let result = safely_parse_json(&dangling_escape_json).unwrap();
|
||||
assert_eq!(result["path"], "abc\\");
|
||||
|
||||
// Test empty object
|
||||
let empty_json = "{}";
|
||||
|
||||
@@ -94,14 +94,24 @@ export function getTextAndImageContent(message: Message): {
|
||||
}
|
||||
}
|
||||
|
||||
// Strip <think> tags from assistant text — the thinking is surfaced via getThinkingContent
|
||||
// Strip assistant-only markup that shouldn't appear in rendered text
|
||||
if (message.role === 'assistant') {
|
||||
textContent = stripToolCallMarkers(textContent);
|
||||
textContent = textContent.replace(/<think>[\s\S]*?<\/think>/gi, '');
|
||||
}
|
||||
|
||||
return { textContent, imagePaths };
|
||||
}
|
||||
|
||||
function stripToolCallMarkers(text: string): string {
|
||||
// Remove all tool call XML markers and their content
|
||||
return text
|
||||
.replace(/<\|tool_calls_section_begin\|>[\s\S]*?<\|tool_calls_section_end\|>/g, '')
|
||||
.replace(/<\|tool_call_begin\|>[\s\S]*?<\|tool_call_end\|>/g, '')
|
||||
.replace(/<\|tool_call_argument_begin\|>[\s\S]*?<\|tool_call_argument_end\|>/g, '')
|
||||
.trim();
|
||||
}
|
||||
|
||||
export function getThinkingContent(message: Message): string | null {
|
||||
const parts: string[] = [];
|
||||
|
||||
|
||||
Reference in New Issue
Block a user