From d18555ca6290c8faff6801a8f48243f7e3a0f52c Mon Sep 17 00:00:00 2001 From: Dale Lakes <6843636+dale-lakes@users.noreply.github.com> Date: Thu, 2 Apr 2026 10:01:38 -0400 Subject: [PATCH] fix: reconsolidate split tool-call messages to follow OpenAI format (#7921) Signed-off-by: Dale Lakes <6843636+spitfire55@users.noreply.github.com> Signed-off-by: Dale Lakes Signed-off-by: Clyde Co-authored-by: Dale Lakes <6843636+spitfire55@users.noreply.github.com> Co-authored-by: Claude Opus 4.6 Co-authored-by: Clyde --- crates/goose/src/providers/formats/openai.rs | 178 +++++++++++++++++++ 1 file changed, 178 insertions(+) diff --git a/crates/goose/src/providers/formats/openai.rs b/crates/goose/src/providers/formats/openai.rs index 1dda9186a7..d0029c5caf 100644 --- a/crates/goose/src/providers/formats/openai.rs +++ b/crates/goose/src/providers/formats/openai.rs @@ -340,9 +340,117 @@ pub fn format_messages(messages: &[Message], image_format: &ImageFormat) -> Vec< messages_spec.extend(output); } + merge_split_tool_call_messages(&mut messages_spec); messages_spec } +/// The agent splits a single assistant response with N tool_calls into N +/// interleaved `asst(TC)/tool` pairs, cloning `reasoning_content` onto each. +/// This function merges them back into one assistant message with all tool_calls, +/// followed by the tool results — the standard OpenAI format. +/// +/// Only merges when `reasoning_content` is present and matches, since that is +/// the only signal that messages were split from the same turn. +fn merge_split_tool_call_messages(messages: &mut Vec) { + let mut i = 0; + while i < messages.len() { + let is_assistant_tool_call = messages[i].get("role") == Some(&json!("assistant")) + && messages[i] + .get("tool_calls") + .and_then(|tc| tc.as_array()) + .is_some_and(|a| !a.is_empty()); + let base_reasoning = messages[i].get("reasoning_content"); + + if !is_assistant_tool_call || base_reasoning.is_none() { + i += 1; + continue; + } + let base_reasoning = base_reasoning.unwrap().clone(); + + let mut extra_tool_calls: Vec = Vec::new(); + let mut collected: Vec = Vec::new(); + let mut scan = i + 1; + + loop { + if scan >= messages.len() || messages[scan].get("role") != Some(&json!("tool")) { + break; + } + + // Skip past tool result and any image-only user messages that + // format_messages inserts after tool results containing images. + let mut peek = scan + 1; + while peek < messages.len() && is_image_only_user_message(&messages[peek]) { + peek += 1; + } + + if peek >= messages.len() { + break; + } + let next = &messages[peek]; + let has_no_content = next.get("content").is_none_or(|c| { + c.is_null() + || c.as_str().is_some_and(|s| s.is_empty()) + || c.as_array().is_some_and(|a| a.is_empty()) + }); + let is_split = next.get("role") == Some(&json!("assistant")) + && next + .get("tool_calls") + .and_then(|tc| tc.as_array()) + .is_some_and(|a| !a.is_empty()) + && has_no_content + && next.get("reasoning_content") == Some(&base_reasoning); + + if !is_split { + break; + } + + collected.extend(messages[scan..peek].iter().cloned()); + if let Some(tc) = messages[peek] + .get("tool_calls") + .and_then(|tc| tc.as_array()) + { + extra_tool_calls.extend(tc.iter().cloned()); + } + scan = peek + 1; + } + + if extra_tool_calls.is_empty() { + i += 1; + continue; + } + + if let Some(base_tc) = messages[i] + .get_mut("tool_calls") + .and_then(|tc| tc.as_array_mut()) + { + base_tc.extend(extra_tool_calls); + } + + let insert_at = i + 1; + messages.drain(insert_at..scan); + let num_collected = collected.len(); + for (j, msg) in collected.into_iter().enumerate() { + messages.insert(insert_at + j, msg); + } + + i = insert_at + num_collected; + } +} + +/// True if `msg` is a synthetic image-only user message (content is exclusively image_url items). +fn is_image_only_user_message(msg: &Value) -> bool { + msg.get("role") == Some(&json!("user")) + && msg + .get("content") + .and_then(|c| c.as_array()) + .is_some_and(|arr| { + !arr.is_empty() + && arr + .iter() + .all(|item| item.get("type") == Some(&json!("image_url"))) + }) +} + pub fn format_tools(tools: &[Tool]) -> anyhow::Result> { let mut tool_names = std::collections::HashSet::new(); let mut result = Vec::new(); @@ -2110,4 +2218,74 @@ data: [DONE]"#; "expected an error but stream completed successfully" ); } + + #[test] + fn test_merge_split_tool_calls_with_reasoning() { + let mut messages = vec![ + json!({"role": "assistant", "tool_calls": [{"id": "tc1", "type": "function", "function": {"name": "read", "arguments": "{}"}}], "reasoning_content": "thinking..."}), + json!({"role": "tool", "tool_call_id": "tc1", "content": "result1"}), + json!({"role": "assistant", "tool_calls": [{"id": "tc2", "type": "function", "function": {"name": "write", "arguments": "{}"}}], "reasoning_content": "thinking..."}), + json!({"role": "tool", "tool_call_id": "tc2", "content": "result2"}), + ]; + merge_split_tool_call_messages(&mut messages); + + assert_eq!(messages.len(), 3); + assert_eq!(messages[0]["tool_calls"].as_array().unwrap().len(), 2); + assert_eq!(messages[1]["role"], "tool"); + assert_eq!(messages[2]["role"], "tool"); + } + + #[test] + fn test_no_merge_without_reasoning() { + let mut messages = vec![ + json!({"role": "assistant", "tool_calls": [{"id": "tc1", "type": "function", "function": {"name": "read", "arguments": "{}"}}]}), + json!({"role": "tool", "tool_call_id": "tc1", "content": "result1"}), + json!({"role": "assistant", "tool_calls": [{"id": "tc2", "type": "function", "function": {"name": "write", "arguments": "{}"}}]}), + json!({"role": "tool", "tool_call_id": "tc2", "content": "result2"}), + ]; + merge_split_tool_call_messages(&mut messages); + + assert_eq!(messages.len(), 4); + assert_eq!(messages[0]["tool_calls"].as_array().unwrap().len(), 1); + assert_eq!(messages[2]["tool_calls"].as_array().unwrap().len(), 1); + } + + #[test] + fn test_merge_split_tool_calls_with_image_gap() { + let mut messages = vec![ + json!({"role": "assistant", "tool_calls": [{"id": "tc1", "type": "function", "function": {"name": "screenshot", "arguments": "{}"}}], "reasoning_content": "thinking..."}), + json!({"role": "tool", "tool_call_id": "tc1", "content": "This tool result included an image that is uploaded in the next message."}), + json!({"role": "user", "content": [{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}}]}), + json!({"role": "assistant", "tool_calls": [{"id": "tc2", "type": "function", "function": {"name": "click", "arguments": "{}"}}], "reasoning_content": "thinking..."}), + json!({"role": "tool", "tool_call_id": "tc2", "content": "clicked"}), + ]; + merge_split_tool_call_messages(&mut messages); + + assert_eq!(messages.len(), 4); + assert_eq!(messages[0]["tool_calls"].as_array().unwrap().len(), 2); + assert_eq!(messages[0]["role"], "assistant"); + assert_eq!(messages[1]["role"], "tool"); + assert_eq!(messages[1]["tool_call_id"], "tc1"); + assert_eq!(messages[2]["role"], "user"); + assert_eq!(messages[3]["role"], "tool"); + assert_eq!(messages[3]["tool_call_id"], "tc2"); + } + + #[test] + fn test_merge_does_not_skip_real_user_message() { + let mut messages = vec![ + json!({"role": "assistant", "tool_calls": [{"id": "tc1", "type": "function", "function": {"name": "read", "arguments": "{}"}}], "reasoning_content": "thinking..."}), + json!({"role": "tool", "tool_call_id": "tc1", "content": "result1"}), + json!({"role": "user", "content": "what happened?"}), + json!({"role": "assistant", "tool_calls": [{"id": "tc2", "type": "function", "function": {"name": "write", "arguments": "{}"}}], "reasoning_content": "thinking..."}), + json!({"role": "tool", "tool_call_id": "tc2", "content": "result2"}), + ]; + merge_split_tool_call_messages(&mut messages); + + assert_eq!(messages.len(), 5); + assert_eq!(messages[0]["tool_calls"].as_array().unwrap().len(), 1); + assert_eq!(messages[2]["role"], "user"); + assert_eq!(messages[2]["content"], "what happened?"); + assert_eq!(messages[3]["tool_calls"].as_array().unwrap().len(), 1); + } }