diff --git a/crates/goose-cli/src/commands/configure.rs b/crates/goose-cli/src/commands/configure.rs index 1c0df24aca..ce6cb63aca 100644 --- a/crates/goose-cli/src/commands/configure.rs +++ b/crates/goose-cli/src/commands/configure.rs @@ -23,8 +23,6 @@ use goose::model::ModelConfig; #[cfg(feature = "telemetry")] use goose::posthog::{get_telemetry_choice, TELEMETRY_ENABLED_KEY}; use goose::providers::base::ConfigKey; -use goose::providers::chatgpt_codex::reasoning_levels_for_model; -use goose::providers::formats::anthropic::supports_adaptive_thinking; use goose::providers::provider_test::test_provider_configuration; use goose::providers::{create, providers, retry_operation, RetryConfig}; use goose::session::SessionType; @@ -738,15 +736,13 @@ pub async fn configure_provider_dialog() -> anyhow::Result { let spin = spinner(); spin.start("Attempting to fetch supported models..."); - let models_res = { - let temp_model_config = - ModelConfig::new(&provider_meta.default_model)?.with_canonical_limits(provider_name); - let temp_provider = create(provider_name, temp_model_config, Vec::new()).await?; - retry_operation(&RetryConfig::default(), || async { - temp_provider.fetch_recommended_models().await - }) - .await - }; + let temp_model_config = + ModelConfig::new(&provider_meta.default_model)?.with_canonical_limits(provider_name); + let temp_provider = create(provider_name, temp_model_config, Vec::new()).await?; + let models_res = retry_operation(&RetryConfig::default(), || async { + temp_provider.fetch_recommended_models().await + }) + .await; spin.stop(style("Model fetch complete").green()); // Select a model: on fetch error show styled error and abort; if models available, show list; otherwise free-text input @@ -766,78 +762,24 @@ pub async fn configure_provider_dialog() -> anyhow::Result { } }; - if model.to_lowercase().starts_with("gemini-3") { - let thinking_level: &str = cliclack::select("Select thinking level for Gemini 3:") - .item("low", "Low - Better latency, lighter reasoning", "") - .item("high", "High - Deeper reasoning, higher latency", "") - .interact()?; - config.set_gemini3_thinking_level(thinking_level)?; - } + { + let supports_thinking = match temp_provider.fetch_model_info(&model).await { + Ok(model_info) => model_info.reasoning, + Err(_) => goose::model::ModelConfig::new(&model) + .map(|c| c.is_reasoning_model()) + .unwrap_or(false), + }; - if model.to_lowercase().starts_with("claude-") { - let supports_adaptive = supports_adaptive_thinking(&model); - - let mut thinking_select = cliclack::select("Select extended thinking mode for Claude:"); - if supports_adaptive { - thinking_select = thinking_select.item( - "adaptive", - "Adaptive - Claude decides when and how much to think (recommended)", - "", - ); - } - thinking_select = thinking_select - .item("enabled", "Enabled - Fixed token budget for thinking", "") - .item("disabled", "Disabled - No extended thinking", ""); - if supports_adaptive { - thinking_select = thinking_select.initial_value("adaptive"); - } else { - thinking_select = thinking_select.initial_value("disabled"); - } - let thinking_type: &str = thinking_select.interact()?; - config.set_claude_thinking_type(thinking_type)?; - - if thinking_type == "adaptive" { - let effort: &str = cliclack::select("Select adaptive thinking effort level:") - .item("low", "Low - Minimal thinking, fastest responses", "") + if supports_thinking { + let effort: &str = cliclack::select("Select thinking effort:") + .item("off", "Off - No extended thinking", "") + .item("low", "Low - Better latency, lighter reasoning", "") .item("medium", "Medium - Moderate thinking", "") - .item("high", "High - Deep reasoning (default)", "") - .item( - "max", - "Max - No constraints on thinking depth (Opus 4.6 only)", - "", - ) - .initial_value("high") + .item("high", "High - Deep reasoning", "") + .item("max", "Max - No constraints on thinking depth", "") + .initial_value("off") .interact()?; - config.set_claude_thinking_effort(effort)?; - } else if thinking_type == "enabled" { - let budget: String = cliclack::input("Enter thinking budget (tokens):") - .default_input("16000") - .validate(|input: &String| match input.parse::() { - Ok(n) if n > 0 => Ok(()), - _ => Err("Please enter a valid positive number"), - }) - .interact()?; - config.set_claude_thinking_budget(budget.parse::()?)?; - } - } - - if provider_name == "chatgpt_codex" { - let valid_levels = reasoning_levels_for_model(&model); - if !valid_levels.is_empty() { - let mut select = cliclack::select("Select reasoning effort level:"); - for &level in valid_levels { - let description = match level { - "low" => "Low - Fast responses with lighter reasoning", - "medium" => "Medium - Balances speed and reasoning depth for everyday tasks", - "high" => "High - Greater reasoning depth for complex problems", - "xhigh" => "Extra High - Extra high reasoning depth for complex problems", - _ => "", - }; - select = select.item(level, description, ""); - } - select = select.initial_value("medium"); - let effort: &str = select.interact()?; - config.set_chatgpt_codex_reasoning_effort(effort.to_string())?; + config.set_goose_thinking_effort(effort)?; } } diff --git a/crates/goose-cli/src/session/builder.rs b/crates/goose-cli/src/session/builder.rs index 0cd8e3a47e..a013952081 100644 --- a/crates/goose-cli/src/session/builder.rs +++ b/crates/goose-cli/src/session/builder.rs @@ -268,6 +268,7 @@ fn resolve_provider_and_model( .is_some_and(|mc| mc.model_name == model_name) { let mut config = saved_model_config.unwrap(); + config.normalize_effort_suffix(); if let Some(temp) = recipe_settings.and_then(|s| s.temperature) { config = config.with_temperature(Some(temp)); } diff --git a/crates/goose-server/src/openapi.rs b/crates/goose-server/src/openapi.rs index ae33e32b69..a526af9d22 100644 --- a/crates/goose-server/src/openapi.rs +++ b/crates/goose-server/src/openapi.rs @@ -5,7 +5,7 @@ use goose::config::permission::PermissionLevel; use goose::config::ExtensionEntry; use goose::conversation::Conversation; use goose::download_manager::{DownloadProgress, DownloadStatus}; -use goose::model::ModelConfig; +use goose::model::{ModelConfig, ThinkingEffort}; use goose::permission::permission_confirmation::{Permission, PrincipalType}; use goose::providers::base::{ConfigKey, ModelInfo, ProviderMetadata, ProviderType}; use goose::session::{Session, SessionInsights, SessionType, SystemInfo}; @@ -397,6 +397,7 @@ derive_utoipa!(IconTheme as IconThemeSchema); super::routes::config_management::read_all_config, super::routes::config_management::providers, super::routes::config_management::get_provider_models, + super::routes::config_management::get_provider_model_info, super::routes::config_management::get_slash_commands, super::routes::config_management::upsert_permissions, super::routes::config_management::create_custom_provider, @@ -573,6 +574,8 @@ derive_utoipa!(IconTheme as IconThemeSchema); PrincipalType, ModelInfo, ModelConfig, + ThinkingEffort, + super::routes::config_management::ProviderModelInfoQuery, Session, goose::config::goose_mode::GooseMode, SessionInsights, diff --git a/crates/goose-server/src/routes/agent.rs b/crates/goose-server/src/routes/agent.rs index 3576de0f11..1b9f83913b 100644 --- a/crates/goose-server/src/routes/agent.rs +++ b/crates/goose-server/src/routes/agent.rs @@ -1,3 +1,4 @@ +use crate::routes::config_management::resolve_provider_model_info; use crate::routes::errors::ErrorResponse; use crate::routes::recipe_utils::{ apply_recipe_to_agent, build_recipe_with_parameter_values, load_recipe_by_id, validate_recipe, @@ -595,7 +596,7 @@ async fn update_agent_provider( } }; - let model_config = ModelConfig::new(&model) + let mut model_config = ModelConfig::new(&model) .map_err(|e| { ( StatusCode::BAD_REQUEST, @@ -603,8 +604,15 @@ async fn update_agent_provider( ) })? .with_canonical_limits(&payload.provider) - .with_context_limit(payload.context_limit) - .with_request_params(payload.request_params); + .with_context_limit(payload.context_limit); + + if let Some(request_params) = payload.request_params { + model_config = model_config.with_merged_request_params(request_params); + } + let model_info = resolve_provider_model_info(&payload.provider, &model) + .await + .map_err(|e| (e.status, e.message))?; + model_config.reasoning = Some(model_info.reasoning); let extensions = EnabledExtensionsState::for_session(state.session_manager(), &payload.session_id, config) diff --git a/crates/goose-server/src/routes/config_management.rs b/crates/goose-server/src/routes/config_management.rs index 08646e939d..69ed487d64 100644 --- a/crates/goose-server/src/routes/config_management.rs +++ b/crates/goose-server/src/routes/config_management.rs @@ -13,7 +13,7 @@ use goose::config::ExtensionEntry; use goose::config::{Config, ConfigError}; use goose::custom_requests::SourceType; use goose::model::ModelConfig; -use goose::providers::base::{ProviderMetadata, ProviderType}; +use goose::providers::base::{ModelInfo, ProviderMetadata, ProviderType}; use goose::providers::canonical::maybe_get_canonical_model; use goose::providers::catalog::{ get_provider_template, get_providers_by_format, ProviderCatalogEntry, ProviderFormat, @@ -418,7 +418,7 @@ pub async fn providers() -> Result>, ErrorResponse> { ("name" = String, Path, description = "Provider name (e.g., openai)") ), responses( - (status = 200, description = "Models fetched successfully", body = [String]), + (status = 200, description = "Models fetched successfully", body = [ModelInfo]), (status = 400, description = "Unknown provider, provider not configured, or authentication error"), (status = 429, description = "Rate limit exceeded"), (status = 500, description = "Internal server error") @@ -426,7 +426,7 @@ pub async fn providers() -> Result>, ErrorResponse> { )] pub async fn get_provider_models( Path(name): Path, -) -> Result>, ErrorResponse> { +) -> Result>, ErrorResponse> { let all = get_providers().await.into_iter().collect::>(); let Some((metadata, provider_type)) = all.into_iter().find(|(m, _)| m.name == name) else { return Err(ErrorResponse::bad_request(format!( @@ -444,7 +444,7 @@ pub async fn get_provider_models( let model_config = ModelConfig::new(&metadata.default_model)?.with_canonical_limits(&name); let provider = goose::providers::create(&name, model_config, Vec::new()).await?; - let models_result = provider.fetch_recommended_models().await; + let models_result = provider.fetch_recommended_model_info().await; match models_result { Ok(models) => Ok(Json(models)), @@ -452,6 +452,70 @@ pub async fn get_provider_models( } } +#[derive(Deserialize, ToSchema)] +pub struct ProviderModelInfoQuery { + pub model: String, +} + +pub async fn resolve_provider_model_info( + name: &str, + model: &str, +) -> Result { + let all = get_providers().await.into_iter().collect::>(); + let Some((metadata, provider_type)) = all.into_iter().find(|(m, _)| m.name == name) else { + return Err(ErrorResponse::bad_request(format!( + "Unknown provider: {}", + name + ))); + }; + if !check_provider_configured(&metadata, provider_type) { + return Err(ErrorResponse::bad_request(format!( + "Provider '{}' is not configured", + name + ))); + } + + let model_config = ModelConfig::new(model)?.with_canonical_limits(name); + let provider = goose::providers::create(name, model_config.clone(), Vec::new()).await?; + match provider.fetch_model_info(model).await { + Ok(info) => Ok(info), + Err(error) => { + let mut info = ModelInfo::new(model, model_config.context_limit()); + info.reasoning = model_config.is_reasoning_model(); + tracing::debug!( + provider = name, + model, + error = %error, + "Falling back to local model metadata" + ); + Ok(info) + } + } +} + +#[utoipa::path( + post, + path = "/config/providers/{name}/model-info", + params( + ("name" = String, Path, description = "Provider name (e.g., openai)") + ), + request_body = ProviderModelInfoQuery, + responses( + (status = 200, description = "Model metadata fetched successfully", body = ModelInfo), + (status = 400, description = "Unknown provider, provider not configured, or authentication error"), + (status = 429, description = "Rate limit exceeded"), + (status = 500, description = "Internal server error") + ) +)] +pub async fn get_provider_model_info( + Path(name): Path, + Json(query): Json, +) -> Result, ErrorResponse> { + resolve_provider_model_info(&name, &query.model) + .await + .map(Json) +} + #[derive(Deserialize, utoipa::IntoParams)] pub struct SlashCommandsQuery { /// Optional working directory to discover local skills from @@ -523,6 +587,7 @@ pub struct ModelInfoData { pub model: String, pub context_limit: usize, pub max_output_tokens: Option, + pub reasoning: bool, pub input_token_cost: Option, pub output_token_cost: Option, pub cache_read_token_cost: Option, @@ -560,6 +625,9 @@ pub async fn get_canonical_model_info( model: query.model.clone(), context_limit: canonical_model.limit.context, max_output_tokens: canonical_model.limit.output, + reasoning: canonical_model + .reasoning + .unwrap_or_else(|| ModelConfig::new_or_fail(&query.model).is_reasoning_model()), // Costs are per million tokens - client handles division for display input_token_cost: canonical_model.cost.input, output_token_cost: canonical_model.cost.output, @@ -926,6 +994,10 @@ pub fn routes(state: Arc) -> Router { .route("/config/extensions/{name}", delete(remove_extension)) .route("/config/providers", get(providers)) .route("/config/providers/{name}/models", get(get_provider_models)) + .route( + "/config/providers/{name}/model-info", + post(get_provider_model_info), + ) .route("/config/provider-catalog", get(get_provider_catalog)) .route( "/config/provider-catalog/{id}", diff --git a/crates/goose/src/acp/server.rs b/crates/goose/src/acp/server.rs index ac99fe02a8..6d06a0b5f7 100644 --- a/crates/goose/src/acp/server.rs +++ b/crates/goose/src/acp/server.rs @@ -1039,6 +1039,34 @@ async fn resolve_provider_and_model_from_config( Ok((provider_name, model_config)) } +fn with_preserved_session_request_params( + mut model_config: crate::model::ModelConfig, + current_model_config: Option<&crate::model::ModelConfig>, + request_params: Option>, +) -> crate::model::ModelConfig { + let has_model_effort = model_config + .request_params + .as_ref() + .and_then(|params| params.get("thinking_effort")) + .is_some(); + if !has_model_effort { + if let Some(thinking_effort) = current_model_config + .and_then(|config| config.request_params.as_ref()) + .and_then(|params| params.get("thinking_effort")) + .cloned() + { + model_config = model_config.with_merged_request_params(HashMap::from([( + "thinking_effort".into(), + thinking_effort, + )])); + } + } + if let Some(request_params) = request_params { + model_config = model_config.with_merged_request_params(request_params); + } + model_config +} + /// Convenience wrapper: reads config from disk, then resolves provider + model. /// Cheap enough to call from `on_new_session` (file + registry reads, no network). async fn resolve_provider_and_model( @@ -3223,11 +3251,14 @@ impl GooseAcpAgent { .await .internal_err_ctx("Failed to get provider")?; let provider_name = current_provider.get_name().to_string(); + let current_model_config = current_provider.get_model_config(); let extensions = EnabledExtensionsState::for_session(&self.session_manager, session_id, &config).await; let model_config = crate::model::ModelConfig::new(model_id) .invalid_params_err_ctx("Invalid model config")? .with_canonical_limits(&provider_name); + let model_config = + with_preserved_session_request_params(model_config, Some(¤t_model_config), None); let session = self .session_manager .get_session(session_id, false) @@ -3333,7 +3364,8 @@ impl GooseAcpAgent { .await .internal_err_ctx("Failed to get provider")?; let current_provider_name = current_provider.get_name(); - let current_model = current_provider.get_model_config().model_name; + let current_model_config = current_provider.get_model_config(); + let current_model = current_model_config.model_name.clone(); let has_default_overrides = model_name.is_some() || context_limit.is_some() || request_params.is_some(); let use_default_provider = provider_name == DEFAULT_PROVIDER_ID; @@ -3357,11 +3389,15 @@ impl GooseAcpAgent { current_model }; let model = model_name.unwrap_or(&default_model); - let model_config = crate::model::ModelConfig::new(model) + let mut model_config = crate::model::ModelConfig::new(model) .invalid_params_err_ctx("Invalid model config")? .with_canonical_limits(&resolved_provider_name) - .with_context_limit(context_limit) - .with_request_params(request_params); + .with_context_limit(context_limit); + model_config = with_preserved_session_request_params( + model_config, + (!is_changing_provider).then_some(¤t_model_config), + request_params, + ); let extensions = EnabledExtensionsState::for_session(&self.session_manager, session_id, &config).await; diff --git a/crates/goose/src/config/base.rs b/crates/goose/src/config/base.rs index dbb5fb4c2b..f26d9bdab0 100644 --- a/crates/goose/src/config/base.rs +++ b/crates/goose/src/config/base.rs @@ -1098,7 +1098,6 @@ config_value!(CLAUDE_CODE_COMMAND, String, "claude"); config_value!(GEMINI_CLI_COMMAND, String, "gemini"); config_value!(CURSOR_AGENT_COMMAND, String, "cursor-agent"); config_value!(CODEX_COMMAND, String, "codex"); -config_value!(CODEX_REASONING_EFFORT, String, "high"); config_value!(CODEX_ENABLE_SKILLS, String, "true"); config_value!(CODEX_SKIP_GIT_CHECK, String, "false"); config_value!(CHATGPT_CODEX_REASONING_EFFORT, String, "medium"); @@ -1137,12 +1136,48 @@ config_value!(GOOSE_PROMPT_EDITOR_ALWAYS, Option); config_value!(GOOSE_MAX_ACTIVE_AGENTS, usize); config_value!(GOOSE_DISABLE_SESSION_NAMING, bool); config_value!(GOOSE_DISABLE_TOOL_CALL_SUMMARY, bool); -config_value!(GEMINI3_THINKING_LEVEL, String); -config_value!(CLAUDE_THINKING_TYPE, String); -config_value!(CLAUDE_THINKING_EFFORT, String); -config_value!(CLAUDE_THINKING_BUDGET, i32); +config_value!(GOOSE_THINKING_EFFORT, String); config_value!(GOOSE_DEFAULT_EXTENSION_TIMEOUT, u64); +fn find_workspace_or_exe_root() -> Option { + let exe = std::env::current_exe().ok()?; + let exe_dir = exe.parent()?.to_path_buf(); + + let mut path = exe; + while let Some(parent) = path.parent() { + let cargo_toml = parent.join("Cargo.toml"); + if cargo_toml.exists() { + if let Ok(content) = std::fs::read_to_string(&cargo_toml) { + if content.contains("[workspace]") { + return Some(parent.to_path_buf()); + } + } + } + path = parent.to_path_buf(); + } + + Some(exe_dir) +} + +pub fn load_init_config_from_workspace() -> Result { + let root = find_workspace_or_exe_root().ok_or_else(|| { + ConfigError::FileError(std::io::Error::new( + std::io::ErrorKind::NotFound, + "Could not determine executable path", + )) + })?; + + let init_config_path = root.join("init-config.yaml"); + if !init_config_path.exists() { + return Err(ConfigError::NotFound( + "init-config.yaml not found".to_string(), + )); + } + + let init_content = std::fs::read_to_string(&init_config_path)?; + parse_yaml_content(&init_content) +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/goose/src/model.rs b/crates/goose/src/model.rs index 3a08722e78..eeb54562ff 100644 --- a/crates/goose/src/model.rs +++ b/crates/goose/src/model.rs @@ -1,12 +1,51 @@ use once_cell::sync::Lazy; +use serde::de::Deserializer; use serde::{Deserialize, Serialize}; use serde_json::Value; use std::collections::HashMap; +use std::fmt; +use std::str::FromStr; use thiserror::Error; use utoipa::ToSchema; pub const DEFAULT_CONTEXT_LIMIT: usize = 128_000; +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "lowercase")] +pub enum ThinkingEffort { + Off, + Low, + Medium, + High, + Max, +} + +impl FromStr for ThinkingEffort { + type Err = String; + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "off" | "disabled" | "none" => Ok(Self::Off), + "low" => Ok(Self::Low), + "medium" | "med" => Ok(Self::Medium), + "high" => Ok(Self::High), + "max" | "xhigh" => Ok(Self::Max), + other => Err(format!("unknown thinking effort: '{other}'")), + } + } +} + +impl fmt::Display for ThinkingEffort { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Off => write!(f, "off"), + Self::Low => write!(f, "low"), + Self::Medium => write!(f, "medium"), + Self::High => write!(f, "high"), + Self::Max => write!(f, "max"), + } + } +} + #[derive(Debug, Clone, Deserialize)] struct PredefinedModel { name: String, @@ -44,7 +83,7 @@ pub enum ConfigError { InvalidRange(String, String), } -#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)] +#[derive(Debug, Clone, Default, Serialize, ToSchema)] pub struct ModelConfig { pub model_name: String, pub context_limit: Option, @@ -61,6 +100,44 @@ pub struct ModelConfig { pub reasoning: Option, } +impl<'de> Deserialize<'de> for ModelConfig { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + #[derive(Deserialize)] + struct RawModelConfig { + model_name: String, + context_limit: Option, + temperature: Option, + max_tokens: Option, + toolshim: bool, + toolshim_model: Option, + #[serde(default)] + fast_model_config: Option>, + #[serde(default, skip_serializing_if = "Option::is_none")] + request_params: Option>, + #[serde(default, skip_serializing_if = "Option::is_none")] + reasoning: Option, + } + + let raw = RawModelConfig::deserialize(deserializer)?; + let mut config = Self { + model_name: raw.model_name, + context_limit: raw.context_limit, + temperature: raw.temperature, + max_tokens: raw.max_tokens, + toolshim: raw.toolshim, + toolshim_model: raw.toolshim_model, + fast_model_config: raw.fast_model_config, + request_params: raw.request_params, + reasoning: raw.reasoning, + }; + config.normalize_effort_suffix(); + Ok(config) + } +} + impl ModelConfig { pub fn new(model_name: &str) -> Result { Self::new_base(model_name.to_string(), None) @@ -114,13 +191,14 @@ impl ModelConfig { let toolshim = Self::parse_toolshim()?; let toolshim_model = Self::parse_toolshim_model()?; - // Pick up request_params from predefined models (always applies) + // Pick up predefined model settings before legacy suffix normalization. let predefined = find_predefined_model(&model_name); + let predefined_context_limit = predefined.as_ref().and_then(|pm| pm.context_limit); let request_params = predefined.and_then(|pm| pm.request_params); - Ok(Self { + let mut config = Self { model_name, - context_limit, + context_limit: context_limit.or(predefined_context_limit), temperature, max_tokens, toolshim, @@ -128,7 +206,9 @@ impl ModelConfig { fast_model_config: None, request_params, reasoning: None, - }) + }; + config.normalize_effort_suffix(); + Ok(config) } pub fn with_canonical_limits(mut self, provider_name: &str) -> Self { @@ -298,8 +378,17 @@ impl ModelConfig { Ok(self) } - pub fn with_request_params(mut self, params: Option>) -> Self { - self.request_params = params; + pub fn with_merged_request_params(mut self, params: HashMap) -> Self { + match self.request_params.as_mut() { + Some(existing) => { + for (k, v) in params { + existing.insert(k, v); + } + } + None => { + self.request_params = Some(params); + } + } self } @@ -319,6 +408,21 @@ impl ModelConfig { crate::providers::utils::is_openai_responses_model(&self.model_name) } + pub fn is_reasoning_model(&self) -> bool { + if let Some(reasoning) = self.reasoning { + return reasoning; + } + + self.is_openai_reasoning_model() + || self.model_name.to_lowercase().contains("claude") + || Self::is_gemini3_reasoning_model_name(&self.model_name) + } + + fn is_gemini3_reasoning_model_name(model_name: &str) -> bool { + let lower = model_name.to_lowercase(); + lower.starts_with("gemini-3") || lower.contains("/gemini-3") || lower.contains("-gemini-3") + } + pub fn max_output_tokens(&self) -> i32 { if let Some(tokens) = self.max_tokens { return tokens; @@ -327,6 +431,82 @@ impl ModelConfig { 4_096 } + pub fn normalize_effort_suffix(&mut self) { + if !self.is_openai_reasoning_model() { + return; + } + let parts: Vec<&str> = self.model_name.split('-').collect(); + let last = match parts.last() { + Some(l) => *l, + None => return, + }; + let effort = match last { + "none" => ThinkingEffort::Off, + "low" => ThinkingEffort::Low, + "medium" => ThinkingEffort::Medium, + "high" => ThinkingEffort::High, + "xhigh" => ThinkingEffort::Max, + _ => return, + }; + self.model_name = parts[..parts.len() - 1].join("-"); + let has_explicit_effort = self + .request_params + .as_ref() + .and_then(|p| p.get("thinking_effort")) + .is_some(); + if !has_explicit_effort { + let params = self.request_params.get_or_insert_with(HashMap::new); + params.insert( + "thinking_effort".to_string(), + serde_json::json!(effort.to_string()), + ); + } + } + + pub fn thinking_effort(&self) -> Option { + self.get_config_param::("thinking_effort", "GOOSE_THINKING_EFFORT") + .and_then(|s| s.parse::().ok()) + .or_else(Self::legacy_thinking_effort) + } + + fn legacy_thinking_effort() -> Option { + let config = crate::config::Config::global(); + + if let Ok(value) = config.get_param::("CLAUDE_THINKING_TYPE") { + if let Some(effort) = match value.to_lowercase().as_str() { + "adaptive" | "enabled" => Some(ThinkingEffort::High), + "disabled" => Some(ThinkingEffort::Off), + _ => None, + } { + return Some(effort); + } + } + + if let Ok(enabled) = config.get_param::("CLAUDE_THINKING_ENABLED") { + return Some(if enabled { + ThinkingEffort::High + } else { + ThinkingEffort::Off + }); + } + + if let Ok(value) = config.get_param::("GEMINI3_THINKING_LEVEL") { + if let Some(effort) = Self::legacy_gemini3_thinking_effort(&value) { + return Some(effort); + } + } + + None + } + + fn legacy_gemini3_thinking_effort(value: &str) -> Option { + match value.to_lowercase().as_str() { + "low" => Some(ThinkingEffort::Low), + "high" => Some(ThinkingEffort::High), + _ => None, + } + } + pub fn get_config_param serde::Deserialize<'de>>( &self, request_key: &str, @@ -419,13 +599,10 @@ mod tests { #[test] fn test_get_config_param() { - let _guard = env_lock::lock_env([ - ("CLAUDE_THINKING_EFFORT", Some("high")), - ("CLAUDE_THINKING_TYPE", None::<&str>), - ]); + let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", Some("high"))]); let mut params = HashMap::new(); - params.insert("effort".to_string(), serde_json::json!("low")); + params.insert("thinking_effort".to_string(), serde_json::json!("low")); let config_with_params = ModelConfig { model_name: "test".to_string(), @@ -439,11 +616,13 @@ mod tests { }; assert_eq!( - config_with_params.get_config_param::("effort", "CLAUDE_THINKING_EFFORT"), + config_with_params + .get_config_param::("thinking_effort", "GOOSE_THINKING_EFFORT"), Some("low".to_string()) ); assert_eq!( - config_without_params.get_config_param::("effort", "CLAUDE_THINKING_EFFORT"), + config_without_params + .get_config_param::("thinking_effort", "GOOSE_THINKING_EFFORT"), Some("high".to_string()) ); assert_eq!( @@ -453,6 +632,236 @@ mod tests { ); } + #[test] + fn test_deserialize_preserves_fast_model_config() { + let config: ModelConfig = serde_json::from_value(serde_json::json!({ + "model_name": "primary-model", + "context_limit": null, + "temperature": null, + "max_tokens": null, + "toolshim": false, + "toolshim_model": null, + "fast_model_config": { + "model_name": "fast-model", + "context_limit": 4096, + "temperature": null, + "max_tokens": 1024, + "toolshim": false, + "toolshim_model": null + } + })) + .unwrap(); + + let fast_config = config.fast_model_config.as_ref().unwrap(); + assert_eq!(fast_config.model_name, "fast-model"); + assert_eq!(fast_config.context_limit, Some(4096)); + assert_eq!(fast_config.max_tokens, Some(1024)); + assert_eq!(config.use_fast_model().model_name, "fast-model"); + } + + mod thinking_effort_tests { + use super::*; + + #[test] + fn from_request_params() { + let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", None::<&str>)]); + let mut params = HashMap::new(); + params.insert("thinking_effort".to_string(), serde_json::json!("medium")); + let config = ModelConfig { + model_name: "test".to_string(), + request_params: Some(params), + ..Default::default() + }; + assert_eq!(config.thinking_effort(), Some(ThinkingEffort::Medium)); + } + + #[test] + fn from_env_var() { + let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", Some("high"))]); + let config = ModelConfig { + model_name: "test".to_string(), + ..Default::default() + }; + assert_eq!(config.thinking_effort(), Some(ThinkingEffort::High)); + } + + #[test] + fn request_params_override_env() { + let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", Some("high"))]); + let mut params = HashMap::new(); + params.insert("thinking_effort".to_string(), serde_json::json!("low")); + let config = ModelConfig { + model_name: "test".to_string(), + request_params: Some(params), + ..Default::default() + }; + assert_eq!(config.thinking_effort(), Some(ThinkingEffort::Low)); + } + + #[test] + fn legacy_claude_thinking_type_fallback() { + for value in ["enabled", "adaptive"] { + let _guard = env_lock::lock_env([ + ("GOOSE_THINKING_EFFORT", None::<&str>), + ("CLAUDE_THINKING_TYPE", Some(value)), + ("CLAUDE_THINKING_ENABLED", None::<&str>), + ("GEMINI3_THINKING_LEVEL", None::<&str>), + ("ANTHROPIC_THINKING_BUDGET", None::<&str>), + ("CLAUDE_THINKING_BUDGET", None::<&str>), + ("GEMINI25_THINKING_BUDGET", None::<&str>), + ]); + let config = ModelConfig { + model_name: "test".to_string(), + ..Default::default() + }; + assert_eq!(config.thinking_effort(), Some(ThinkingEffort::High)); + } + } + + #[test] + fn legacy_gemini3_thinking_level_mapping() { + assert_eq!( + ModelConfig::legacy_gemini3_thinking_effort("low"), + Some(ThinkingEffort::Low) + ); + assert_eq!( + ModelConfig::legacy_gemini3_thinking_effort("high"), + Some(ThinkingEffort::High) + ); + assert_eq!(ModelConfig::legacy_gemini3_thinking_effort("auto"), None); + } + + #[test] + fn legacy_gemini3_thinking_level_fallback() { + let temp_dir = tempfile::tempdir().unwrap(); + let temp_root = temp_dir.path().to_string_lossy().to_string(); + let _guard = env_lock::lock_env([ + ("GOOSE_PATH_ROOT", Some(temp_root.as_str())), + ("GOOSE_THINKING_EFFORT", None::<&str>), + ("CLAUDE_THINKING_TYPE", None::<&str>), + ("CLAUDE_THINKING_ENABLED", None::<&str>), + ("GEMINI3_THINKING_LEVEL", Some("high")), + ("ANTHROPIC_THINKING_BUDGET", None::<&str>), + ("CLAUDE_THINKING_BUDGET", None::<&str>), + ("GEMINI25_THINKING_BUDGET", None::<&str>), + ]); + let config = ModelConfig { + model_name: "gemini-3-pro".to_string(), + ..Default::default() + }; + assert_eq!(config.thinking_effort(), Some(ThinkingEffort::High)); + } + + #[test] + fn effort_suffix_stripped_from_model_name() { + let _guard = env_lock::lock_env([ + ("GOOSE_THINKING_EFFORT", None::<&str>), + ("GOOSE_MAX_TOKENS", None::<&str>), + ("GOOSE_TEMPERATURE", None::<&str>), + ("GOOSE_CONTEXT_LIMIT", None::<&str>), + ("GOOSE_TOOLSHIM", None::<&str>), + ("GOOSE_TOOLSHIM_OLLAMA_MODEL", None::<&str>), + ]); + let config = ModelConfig::new("o3-mini-high").unwrap(); + assert_eq!(config.model_name, "o3-mini"); + assert_eq!(config.thinking_effort(), Some(ThinkingEffort::High)); + } + + #[test] + fn none_suffix_stripped_from_model_name() { + let _guard = env_lock::lock_env([ + ("GOOSE_THINKING_EFFORT", Some("high")), + ("GOOSE_MAX_TOKENS", None::<&str>), + ("GOOSE_TEMPERATURE", None::<&str>), + ("GOOSE_CONTEXT_LIMIT", None::<&str>), + ("GOOSE_TOOLSHIM", None::<&str>), + ("GOOSE_TOOLSHIM_OLLAMA_MODEL", None::<&str>), + ]); + let config = ModelConfig::new("o3-mini-none").unwrap(); + assert_eq!(config.model_name, "o3-mini"); + assert_eq!(config.thinking_effort(), Some(ThinkingEffort::Off)); + } + + #[test] + fn xhigh_suffix_stripped_from_model_name() { + let _guard = env_lock::lock_env([ + ("GOOSE_THINKING_EFFORT", Some("low")), + ("GOOSE_MAX_TOKENS", None::<&str>), + ("GOOSE_TEMPERATURE", None::<&str>), + ("GOOSE_CONTEXT_LIMIT", None::<&str>), + ("GOOSE_TOOLSHIM", None::<&str>), + ("GOOSE_TOOLSHIM_OLLAMA_MODEL", None::<&str>), + ]); + let config = ModelConfig::new("gpt-5.4-xhigh").unwrap(); + assert_eq!(config.model_name, "gpt-5.4"); + assert_eq!(config.thinking_effort(), Some(ThinkingEffort::Max)); + } + + #[test] + fn effort_suffix_not_stripped_when_thinking_effort_set() { + let _guard = env_lock::lock_env([ + ("GOOSE_THINKING_EFFORT", None::<&str>), + ("GOOSE_MAX_TOKENS", None::<&str>), + ("GOOSE_TEMPERATURE", None::<&str>), + ("GOOSE_CONTEXT_LIMIT", None::<&str>), + ("GOOSE_TOOLSHIM", None::<&str>), + ("GOOSE_TOOLSHIM_OLLAMA_MODEL", None::<&str>), + ]); + let mut params = HashMap::new(); + params.insert("thinking_effort".to_string(), serde_json::json!("low")); + let mut config = ModelConfig::new("o3-mini-high").unwrap(); + // Suffix was already normalized during new(), but if request_params + // were set before construction, the suffix would not be stripped. + // Verify the normalized state: + assert_eq!(config.model_name, "o3-mini"); + + // Now simulate setting explicit effort after construction + config.request_params = Some(params); + assert_eq!(config.thinking_effort(), Some(ThinkingEffort::Low)); + } + + #[test] + fn no_suffix_no_change() { + let _guard = env_lock::lock_env([ + ("GOOSE_THINKING_EFFORT", None::<&str>), + ("GOOSE_MAX_TOKENS", None::<&str>), + ("GOOSE_TEMPERATURE", None::<&str>), + ("GOOSE_CONTEXT_LIMIT", None::<&str>), + ("GOOSE_TOOLSHIM", None::<&str>), + ("GOOSE_TOOLSHIM_OLLAMA_MODEL", None::<&str>), + ]); + let config = ModelConfig::new("o3-mini").unwrap(); + assert_eq!(config.model_name, "o3-mini"); + } + + #[test] + fn non_reasoning_model_suffix_not_stripped() { + let _guard = env_lock::lock_env([ + ("GOOSE_THINKING_EFFORT", None::<&str>), + ("GOOSE_MAX_TOKENS", None::<&str>), + ("GOOSE_TEMPERATURE", None::<&str>), + ("GOOSE_CONTEXT_LIMIT", None::<&str>), + ("GOOSE_TOOLSHIM", None::<&str>), + ("GOOSE_TOOLSHIM_OLLAMA_MODEL", None::<&str>), + ]); + let config = ModelConfig::new("claude-sonnet-4-high").unwrap(); + assert_eq!(config.model_name, "claude-sonnet-4-high"); + } + + #[test] + fn parse_aliases() { + assert_eq!("off".parse::(), Ok(ThinkingEffort::Off)); + assert_eq!( + "disabled".parse::(), + Ok(ThinkingEffort::Off) + ); + assert_eq!("med".parse::(), Ok(ThinkingEffort::Medium)); + assert_eq!("max".parse::(), Ok(ThinkingEffort::Max)); + assert_eq!("xhigh".parse::(), Ok(ThinkingEffort::Max)); + assert!("invalid".parse::().is_err()); + } + } + mod with_canonical_limits { use super::*; @@ -597,4 +1006,36 @@ mod tests { assert!(!ModelConfig::new_or_fail("llama-3-70b").is_openai_reasoning_model()); } } + + mod is_reasoning_model { + use super::*; + + const ENV_LOCK_KEYS: [(&str, Option<&str>); 5] = [ + ("GOOSE_MAX_TOKENS", None), + ("GOOSE_TEMPERATURE", None), + ("GOOSE_CONTEXT_LIMIT", None), + ("GOOSE_TOOLSHIM", None), + ("GOOSE_TOOLSHIM_OLLAMA_MODEL", None), + ]; + + #[test] + fn includes_reasoning_model_families() { + let _guard = env_lock::lock_env(ENV_LOCK_KEYS); + assert!(ModelConfig::new_or_fail("o3-mini").is_reasoning_model()); + assert!(ModelConfig::new_or_fail("claude-sonnet-4").is_reasoning_model()); + assert!(ModelConfig::new_or_fail("gemini-3-pro").is_reasoning_model()); + } + + #[test] + fn uses_explicit_metadata_first() { + let _guard = env_lock::lock_env(ENV_LOCK_KEYS); + let mut config = ModelConfig::new_or_fail("provider-alias"); + config.reasoning = Some(true); + assert!(config.is_reasoning_model()); + + let mut config = ModelConfig::new_or_fail("claude-sonnet-4"); + config.reasoning = Some(false); + assert!(!config.is_reasoning_model()); + } + } } diff --git a/crates/goose/src/providers/base.rs b/crates/goose/src/providers/base.rs index cfa0418721..2d6bdf3def 100644 --- a/crates/goose/src/providers/base.rs +++ b/crates/goose/src/providers/base.rs @@ -395,6 +395,9 @@ pub struct ModelInfo { pub currency: Option, /// Whether this model supports cache control pub supports_cache_control: Option, + /// Whether this model supports reasoning/thinking controls + #[serde(default)] + pub reasoning: bool, } impl ModelInfo { @@ -407,6 +410,7 @@ impl ModelInfo { output_token_cost: None, currency: None, supports_cache_control: None, + reasoning: false, } } @@ -424,10 +428,37 @@ impl ModelInfo { output_token_cost: Some(output_cost), currency: Some("$".to_string()), supports_cache_control: None, + reasoning: false, } } } +fn model_info_for_provider_model(provider_name: &str, model_name: &str) -> ModelInfo { + let registry = CanonicalModelRegistry::bundled().ok(); + let canonical = registry.as_ref().and_then(|registry| { + let canonical_id = map_to_canonical_model(provider_name, model_name, registry)?; + let (provider, model) = canonical_id.split_once('/')?; + registry.get(provider, model) + }); + + let reasoning = canonical + .as_ref() + .and_then(|model| model.reasoning) + .unwrap_or_else(|| ModelConfig::new_or_fail(model_name).is_reasoning_model()); + + ModelInfo { + name: model_name.to_string(), + context_limit: ModelConfig::new_or_fail(model_name) + .with_canonical_limits(provider_name) + .context_limit(), + input_token_cost: None, + output_token_cost: None, + currency: None, + supports_cache_control: None, + reasoning, + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ToSchema)] pub enum ProviderType { Preferred, @@ -478,16 +509,7 @@ impl ProviderMetadata { default_model: default_model.to_string(), known_models: model_names .iter() - .map(|&model_name| ModelInfo { - name: model_name.to_string(), - context_limit: ModelConfig::new_or_fail(model_name) - .with_canonical_limits(name) - .context_limit(), - input_token_cost: None, - output_token_cost: None, - currency: None, - supports_cache_control: None, - }) + .map(|&model_name| model_info_for_provider_model(name, model_name)) .collect(), model_doc_link: model_doc_link.to_string(), config_keys, @@ -917,6 +939,19 @@ pub trait Provider: Send + Sync { Ok(vec![]) } + async fn fetch_supported_model_info(&self) -> Result, ProviderError> { + Ok(self + .fetch_supported_models() + .await? + .iter() + .map(|model_name| model_info_for_provider_model(self.get_name(), model_name)) + .collect()) + } + + async fn fetch_model_info(&self, model_name: &str) -> Result { + Ok(model_info_for_provider_model(self.get_name(), model_name)) + } + fn skip_canonical_filtering(&self) -> bool { false } @@ -982,6 +1017,15 @@ pub trait Provider: Send + Sync { } } + async fn fetch_recommended_model_info(&self) -> Result, ProviderError> { + Ok(self + .fetch_recommended_models() + .await? + .iter() + .map(|model_name| model_info_for_provider_model(self.get_name(), model_name)) + .collect()) + } + async fn map_to_canonical_model( &self, provider_model: &str, @@ -1739,6 +1783,7 @@ mod tests { output_token_cost: None, currency: None, supports_cache_control: None, + reasoning: false, }; assert_eq!(info.context_limit, 1000); @@ -1750,6 +1795,7 @@ mod tests { output_token_cost: None, currency: None, supports_cache_control: None, + reasoning: false, }; assert_eq!(info, info2); @@ -1761,6 +1807,7 @@ mod tests { output_token_cost: None, currency: None, supports_cache_control: None, + reasoning: false, }; assert_ne!(info, info3); } diff --git a/crates/goose/src/providers/chatgpt_codex.rs b/crates/goose/src/providers/chatgpt_codex.rs index 3cbb68e31c..1aa29862b0 100644 --- a/crates/goose/src/providers/chatgpt_codex.rs +++ b/crates/goose/src/providers/chatgpt_codex.rs @@ -229,6 +229,29 @@ fn get_reasoning_effort(model_name: &str) -> String { } } +fn reasoning_effort_for_config(model_config: &ModelConfig) -> Option { + use crate::model::ThinkingEffort; + + model_config + .thinking_effort() + .map(|effort| { + let valid_levels = reasoning_levels_for_model(&model_config.model_name); + let preferred_levels: &[&str] = match effort { + ThinkingEffort::Off => return None, + ThinkingEffort::Low => &["low", "medium", "high", "xhigh"], + ThinkingEffort::Medium => &["medium", "high", "low", "xhigh"], + ThinkingEffort::High => &["high", "medium", "xhigh", "low"], + ThinkingEffort::Max => &["xhigh", "high", "medium", "low"], + }; + + preferred_levels + .iter() + .find(|level| valid_levels.contains(level)) + .map(|level| (*level).to_string()) + }) + .unwrap_or_else(|| Some(get_reasoning_effort(&model_config.model_name))) +} + fn create_codex_request( model_config: &ModelConfig, system: &str, @@ -236,7 +259,7 @@ fn create_codex_request( tools: &[Tool], ) -> Result { let input_items = build_input_items(messages)?; - let reasoning_effort = get_reasoning_effort(&model_config.model_name); + let reasoning_effort = reasoning_effort_for_config(model_config); let instructions = match model_config.model_name.as_str() { "gpt-5.3-codex" => format!("{GPT_53_CODEX_TOOL_PREAMBLE}\n\n{system}"), @@ -247,7 +270,6 @@ fn create_codex_request( "model": model_config.model_name, "input": input_items, "store": false, - "reasoning": {"effort": reasoning_effort}, "instructions": instructions, }); @@ -277,6 +299,13 @@ fn create_codex_request( payload_obj.insert("temperature".to_string(), json!(temp)); } + if let Some(reasoning_effort) = reasoning_effort { + payload_obj.insert( + "reasoning".to_string(), + json!({ "effort": reasoning_effort }), + ); + } + Ok(payload) } @@ -1177,6 +1206,42 @@ mod tests { ); } + #[test] + fn test_create_codex_request_reasoning_effort_from_unified_thinking() { + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), json!("max")); + let mut config = ModelConfig::new("gpt-5.3-codex").unwrap(); + config.request_params = Some(params); + + let payload = create_codex_request(&config, "sys", &[], &[]).unwrap(); + assert_eq!(payload["reasoning"]["effort"], "xhigh"); + assert!(payload.get("reasoning_effort").is_none()); + } + + #[test] + fn test_create_codex_request_caps_unified_thinking_to_supported_level() { + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), json!("max")); + let mut config = ModelConfig::new("unknown-model").unwrap(); + config.request_params = Some(params); + + let payload = create_codex_request(&config, "sys", &[], &[]).unwrap(); + assert_eq!(payload["reasoning"]["effort"], "high"); + assert!(payload.get("reasoning_effort").is_none()); + } + + #[test] + fn test_create_codex_request_off_omits_reasoning_for_codex_models() { + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), json!("off")); + let mut config = ModelConfig::new("gpt-5.2-codex").unwrap(); + config.request_params = Some(params); + + let payload = create_codex_request(&config, "sys", &[], &[]).unwrap(); + assert!(payload.get("reasoning").is_none()); + assert!(payload.get("reasoning_effort").is_none()); + } + #[test_case( JwtClaims { chatgpt_account_id: Some("account-1".to_string()), diff --git a/crates/goose/src/providers/codex.rs b/crates/goose/src/providers/codex.rs index a2075f52aa..8b420d4146 100644 --- a/crates/goose/src/providers/codex.rs +++ b/crates/goose/src/providers/codex.rs @@ -16,7 +16,7 @@ use super::base::{ }; use super::errors::ProviderError; use super::utils::{filter_extensions_from_system_prompt, RequestLog}; -use crate::config::base::{CodexCommand, CodexReasoningEffort, CodexSkipGitCheck}; +use crate::config::base::{CodexCommand, CodexSkipGitCheck}; use crate::config::paths::Paths; use crate::config::search_path::SearchPaths; use crate::config::{Config, ExtensionConfig, GooseMode}; @@ -50,7 +50,7 @@ pub struct CodexProvider { #[serde(skip)] name: String, /// Reasoning effort level (none, low, medium, high, xhigh) - reasoning_effort: String, + reasoning_effort: Option, /// Whether to skip git repo check skip_git_check: bool, /// CLI config overrides for MCP servers @@ -60,12 +60,40 @@ pub struct CodexProvider { } impl CodexProvider { - fn supports_reasoning_effort(model_name: &str, reasoning_effort: &str) -> bool { - if !CODEX_REASONING_LEVELS.contains(&reasoning_effort) { - return false; - } + fn legacy_reasoning_effort() -> Option { + Config::global() + .get_param::("CODEX_REASONING_EFFORT") + .ok() + .and_then(|effort| match effort.to_lowercase().as_str() { + "none" => Some(crate::model::ThinkingEffort::Off), + "low" => Some(crate::model::ThinkingEffort::Low), + "medium" => Some(crate::model::ThinkingEffort::Medium), + "high" => Some(crate::model::ThinkingEffort::High), + "xhigh" => Some(crate::model::ThinkingEffort::Max), + _ => None, + }) + } - if reasoning_effort == "none" && model_name.contains("codex") { + fn map_thinking_effort( + _model_name: &str, + effort: Option, + ) -> Option { + use crate::model::ThinkingEffort; + match effort + .or_else(Self::legacy_reasoning_effort) + .unwrap_or(ThinkingEffort::High) + { + ThinkingEffort::Off => Some("none".to_string()), + ThinkingEffort::Low => Some("low".to_string()), + ThinkingEffort::Medium => Some("medium".to_string()), + ThinkingEffort::High => Some("high".to_string()), + ThinkingEffort::Max => Some("xhigh".to_string()), + } + } + + #[cfg(test)] + fn supports_reasoning_effort(_model_name: &str, reasoning_effort: &str) -> bool { + if !CODEX_REASONING_LEVELS.contains(&reasoning_effort) { return false; } @@ -115,7 +143,7 @@ impl CodexProvider { println!("=== CODEX PROVIDER DEBUG ==="); println!("Command: {:?}", self.command); println!("Model: {}", self.model.model_name); - println!("Reasoning effort: {}", self.reasoning_effort); + println!("Reasoning effort: {:?}", self.reasoning_effort); println!("Skip git check: {}", self.skip_git_check); println!("Prompt length: {} chars", prompt.len()); println!("Prompt: {}", prompt); @@ -142,11 +170,10 @@ impl CodexProvider { cmd.arg("-m").arg(&self.model.model_name); } - // Reasoning effort configuration - cmd.arg("-c").arg(format!( - "model_reasoning_effort=\"{}\"", - self.reasoning_effort - )); + if let Some(reasoning_effort) = &self.reasoning_effort { + cmd.arg("-c") + .arg(format!("model_reasoning_effort=\"{}\"", reasoning_effort)); + } for override_config in &self.mcp_config_overrides { cmd.arg("-c").arg(override_config); @@ -604,7 +631,6 @@ impl ProviderDef for CodexProvider { CODEX_DOC_URL, vec![ ConfigKey::from_value_type::(true, false, true), - ConfigKey::from_value_type::(false, false, true), ConfigKey::from_value_type::(false, false, true), ], ) @@ -619,24 +645,8 @@ impl ProviderDef for CodexProvider { let command: String = config.get_codex_command().unwrap_or_default().into(); let resolved_command = SearchPaths::builder().with_npm().resolve(command)?; - // Get reasoning effort from config, default to "high" - let reasoning_effort = config - .get_codex_reasoning_effort() - .map(String::from) - .unwrap_or_else(|_| "high".to_string()); - - // Validate reasoning effort let reasoning_effort = - if Self::supports_reasoning_effort(&model.model_name, &reasoning_effort) { - reasoning_effort - } else { - tracing::warn!( - "Invalid CODEX_REASONING_EFFORT '{}' for model '{}', using 'high'", - reasoning_effort, - model.model_name - ); - "high".to_string() - }; + Self::map_thinking_effort(&model.model_name, model.thinking_effort()); // Get skip_git_check from config, default to false let skip_git_check = config @@ -925,7 +935,7 @@ mod tests { command: PathBuf::from("codex"), model: ModelConfig::new("gpt-5.2-codex").unwrap(), name: "codex".to_string(), - reasoning_effort: "high".to_string(), + reasoning_effort: Some("high".to_string()), skip_git_check: false, mcp_config_overrides: Vec::new(), mode_by_session: tokio::sync::RwLock::new(HashMap::new()), @@ -946,7 +956,7 @@ mod tests { command: PathBuf::from("codex"), model: ModelConfig::new("gpt-5.2-codex").unwrap(), name: "codex".to_string(), - reasoning_effort: "high".to_string(), + reasoning_effort: Some("high".to_string()), skip_git_check: false, mcp_config_overrides: Vec::new(), mode_by_session: tokio::sync::RwLock::new(HashMap::new()), @@ -980,7 +990,7 @@ mod tests { command: PathBuf::from("codex"), model: ModelConfig::new("gpt-5.2-codex").unwrap(), name: "codex".to_string(), - reasoning_effort: "high".to_string(), + reasoning_effort: Some("high".to_string()), skip_git_check: false, mcp_config_overrides: Vec::new(), mode_by_session: tokio::sync::RwLock::new(HashMap::new()), @@ -1005,7 +1015,7 @@ mod tests { #[test] fn test_reasoning_effort_support_by_model() { assert!(CodexProvider::supports_reasoning_effort("gpt-5.2", "none")); - assert!(!CodexProvider::supports_reasoning_effort( + assert!(CodexProvider::supports_reasoning_effort( "gpt-5.2-codex", "none" )); @@ -1029,7 +1039,7 @@ mod tests { command: PathBuf::from("codex"), model: ModelConfig::new("gpt-5.2-codex").unwrap(), name: "codex".to_string(), - reasoning_effort: "high".to_string(), + reasoning_effort: Some("high".to_string()), skip_git_check: false, mcp_config_overrides: Vec::new(), mode_by_session: tokio::sync::RwLock::new(HashMap::new()), @@ -1055,7 +1065,7 @@ mod tests { command: PathBuf::from("codex"), model: ModelConfig::new("gpt-5.2-codex").unwrap(), name: "codex".to_string(), - reasoning_effort: "high".to_string(), + reasoning_effort: Some("high".to_string()), skip_git_check: false, mcp_config_overrides: Vec::new(), mode_by_session: tokio::sync::RwLock::new(HashMap::new()), @@ -1128,7 +1138,7 @@ mod tests { command: PathBuf::from("codex"), model: ModelConfig::new("gpt-5.2-codex").unwrap(), name: "codex".to_string(), - reasoning_effort: "high".to_string(), + reasoning_effort: Some("high".to_string()), skip_git_check: false, mcp_config_overrides: Vec::new(), mode_by_session: tokio::sync::RwLock::new(HashMap::new()), @@ -1145,7 +1155,7 @@ mod tests { command: PathBuf::from("codex"), model: ModelConfig::new("gpt-5.2-codex").unwrap(), name: "codex".to_string(), - reasoning_effort: "high".to_string(), + reasoning_effort: Some("high".to_string()), skip_git_check: false, mcp_config_overrides: Vec::new(), mode_by_session: tokio::sync::RwLock::new(HashMap::new()), @@ -1214,20 +1224,56 @@ mod tests { #[test] fn test_config_keys() { let metadata = CodexProvider::metadata(); - assert_eq!(metadata.config_keys.len(), 3); + assert_eq!(metadata.config_keys.len(), 2); // First key should be CODEX_COMMAND (required) assert_eq!(metadata.config_keys[0].name, "CODEX_COMMAND"); assert!(metadata.config_keys[0].required); assert!(!metadata.config_keys[0].secret); - // Second key should be CODEX_REASONING_EFFORT (optional) - assert_eq!(metadata.config_keys[1].name, "CODEX_REASONING_EFFORT"); + // Second key should be CODEX_SKIP_GIT_CHECK (optional) + assert_eq!(metadata.config_keys[1].name, "CODEX_SKIP_GIT_CHECK"); assert!(!metadata.config_keys[1].required); + } - // Third key should be CODEX_SKIP_GIT_CHECK (optional) - assert_eq!(metadata.config_keys[2].name, "CODEX_SKIP_GIT_CHECK"); - assert!(!metadata.config_keys[2].required); + #[test] + fn test_map_thinking_effort() { + use crate::model::ThinkingEffort; + + let _guard = env_lock::lock_env([ + ("CODEX_REASONING_EFFORT", None::<&str>), + ("GOOSE_THINKING_EFFORT", None::<&str>), + ]); + + assert_eq!( + CodexProvider::map_thinking_effort("gpt-5.2-codex", Some(ThinkingEffort::Off)), + Some("none".to_string()) + ); + assert_eq!( + CodexProvider::map_thinking_effort("gpt-5.2", Some(ThinkingEffort::Off)), + Some("none".to_string()) + ); + assert_eq!( + CodexProvider::map_thinking_effort("gpt-5.2-codex", Some(ThinkingEffort::Max)), + Some("xhigh".to_string()) + ); + assert_eq!( + CodexProvider::map_thinking_effort("gpt-5.2-codex", None), + Some("high".to_string()) + ); + } + + #[test] + fn test_map_thinking_effort_uses_legacy_codex_env() { + let _guard = env_lock::lock_env([ + ("CODEX_REASONING_EFFORT", Some("low")), + ("GOOSE_THINKING_EFFORT", None::<&str>), + ]); + + assert_eq!( + CodexProvider::map_thinking_effort("gpt-5.2-codex", None), + Some("low".to_string()) + ); } #[test] @@ -1236,7 +1282,7 @@ mod tests { command: PathBuf::from("codex"), model: ModelConfig::new("gpt-5.2-codex").unwrap(), name: "codex".to_string(), - reasoning_effort: "high".to_string(), + reasoning_effort: Some("high".to_string()), skip_git_check: false, mcp_config_overrides: Vec::new(), mode_by_session: tokio::sync::RwLock::new(HashMap::new()), diff --git a/crates/goose/src/providers/databricks.rs b/crates/goose/src/providers/databricks.rs index 2695871beb..a7f512e16f 100644 --- a/crates/goose/src/providers/databricks.rs +++ b/crates/goose/src/providers/databricks.rs @@ -3,12 +3,14 @@ use async_trait::async_trait; use futures::future::BoxFuture; use serde::{Deserialize, Serialize}; use serde_json::Value; +use std::collections::HashSet; +use std::sync::LazyLock; use std::sync::{Arc, Mutex}; -use std::time::Duration; +use std::time::{Duration, Instant}; use super::api_client::{ApiClient, AuthMethod, AuthProvider}; use super::base::{ - ConfigKey, MessageStream, Provider, ProviderDef, ProviderMetadata, + ConfigKey, MessageStream, ModelInfo, Provider, ProviderDef, ProviderMetadata, DEFAULT_PROVIDER_TIMEOUT_SECS, }; use super::embedding::EmbeddingCapable; @@ -21,7 +23,7 @@ use super::openai_compatible::{ stream_openai_compat, stream_responses_compat, }; use super::retry::ProviderRetry; -use super::utils::{ImageFormat, RequestLog}; +use super::utils::{is_openai_responses_model, ImageFormat, RequestLog}; use crate::config::ConfigError; use crate::conversation::message::Message; use crate::instance_id::get_instance_id; @@ -33,11 +35,35 @@ use crate::providers::retry::{ use rmcp::model::Tool; use serde_json::json; +#[derive(Debug, Clone)] +struct DatabricksEndpointInfo { + name: String, + upstream_model_name: Option, + upstream_model_provider: Option, + reasoning: Option, +} + +#[derive(Debug, Clone)] +struct DatabricksUpstreamModel { + name: String, + provider: Option, +} + +#[derive(Debug, Clone)] +struct CachedDatabricksEndpointInfo { + info: DatabricksEndpointInfo, + fetched_at: Instant, +} + const DEFAULT_CLIENT_ID: &str = "databricks-cli"; const DEFAULT_REDIRECT_URL: &str = "http://localhost"; const DEFAULT_SCOPES: &[&str] = &["all-apis", "offline_access"]; const DATABRICKS_PROVIDER_NAME: &str = "databricks"; +const DATABRICKS_ENDPOINT_METADATA_TTL_SECS: u64 = 60; +static DATABRICKS_ENDPOINT_INFO_CACHE: LazyLock< + Mutex>, +> = LazyLock::new(|| Mutex::new(std::collections::HashMap::new())); pub const DATABRICKS_DEFAULT_MODEL: &str = "databricks-claude-sonnet-4"; const DATABRICKS_DEFAULT_FAST_MODEL: &str = "databricks-claude-haiku-4-5"; pub const DATABRICKS_KNOWN_MODELS: &[&str] = &[ @@ -116,6 +142,8 @@ impl AuthProvider for DatabricksAuthProvider { pub struct DatabricksProvider { #[serde(skip)] api_client: ApiClient, + #[serde(skip)] + host: String, auth: DatabricksAuth, model: ModelConfig, image_format: ImageFormat, @@ -172,13 +200,14 @@ impl DatabricksProvider { })); let api_client = ApiClient::with_timeout( - host, + host.clone(), auth_method, Duration::from_secs(DEFAULT_PROVIDER_TIMEOUT_SECS), )?; let mut provider = Self { api_client, + host, auth, model: model.clone(), image_format: ImageFormat::OpenAi, @@ -240,13 +269,14 @@ impl DatabricksProvider { })); let api_client = ApiClient::with_timeout( - host, + host.clone(), auth_method, Duration::from_secs(DEFAULT_PROVIDER_TIMEOUT_SECS), )?; Ok(Self { api_client, + host, auth, model, image_format: ImageFormat::OpenAi, @@ -270,7 +300,252 @@ impl DatabricksProvider { } fn is_responses_model(model_name: &str) -> bool { - super::utils::is_openai_responses_model(model_name) + is_openai_responses_model(model_name) + } + + fn is_claude_model(model_name: &str) -> bool { + model_name.to_lowercase().contains("claude") + } + + fn is_reasoning_capable_model_name(model_name: &str) -> bool { + Self::is_claude_model(model_name) || Self::is_responses_model(model_name) + } + + fn endpoint_model_candidates(value: &Value) -> Vec { + let mut candidates: Vec = Vec::new(); + + fn get_string_at(value: &Value, path: &[&str]) -> Option { + path.iter() + .try_fold(value, |current, key| current.get(*key)) + .and_then(|v| v.as_str()) + .filter(|s| !s.is_empty()) + .map(ToString::to_string) + } + + fn push_candidate( + name: Option, + provider: Option, + candidates: &mut Vec, + ) { + if let Some(name) = name { + if !candidates.iter().any(|candidate| candidate.name == name) { + candidates.push(DatabricksUpstreamModel { name, provider }); + } + } + } + + for config_key in ["config", "pending_config"] { + let Some(config) = value.get(config_key) else { + continue; + }; + + for collection_key in ["served_entities", "served_models"] { + let Some(entities) = config.get(collection_key).and_then(|v| v.as_array()) else { + continue; + }; + + for entity in entities { + push_candidate( + get_string_at(entity, &["external_model", "name"]), + get_string_at(entity, &["external_model", "provider"]), + &mut candidates, + ); + push_candidate( + get_string_at(entity, &["foundation_model", "name"]), + get_string_at(entity, &["foundation_model", "provider"]), + &mut candidates, + ); + push_candidate( + get_string_at(entity, &["entity_name"]), + None, + &mut candidates, + ); + } + } + } + + candidates + } + + fn endpoint_info_from_value(endpoint: &Value) -> Option { + let name = endpoint.get("name")?.as_str()?.to_string(); + let upstream_model = Self::endpoint_model_candidates(endpoint) + .into_iter() + .find(|candidate| candidate.name != name); + let upstream_model_name = upstream_model.as_ref().map(|model| model.name.clone()); + let upstream_model_provider = upstream_model.and_then(|model| model.provider); + + let reasoning = upstream_model_name + .as_deref() + .map(Self::is_reasoning_capable_model_name) + .or_else(|| Some(Self::is_reasoning_capable_model_name(&name))); + + Some(DatabricksEndpointInfo { + name, + upstream_model_name, + upstream_model_provider, + reasoning, + }) + } + + async fn fetch_endpoint_info( + &self, + endpoint_name: &str, + ) -> Result { + let response = self + .api_client + .request( + None, + &format!( + "api/2.0/serving-endpoints/{}", + urlencoding::encode(endpoint_name) + ), + ) + .response_get() + .await + .map_err(|e| { + ProviderError::RequestFailed(format!( + "Failed to fetch Databricks endpoint metadata: {}", + e + )) + })?; + + if !response.status().is_success() { + let status = response.status(); + let detail = response.text().await.unwrap_or_default(); + return Err(ProviderError::RequestFailed(format!( + "Failed to fetch Databricks endpoint metadata: {} {}", + status, detail + ))); + } + + let json: Value = response.json().await.map_err(|e| { + ProviderError::RequestFailed(format!( + "Failed to parse Databricks endpoint metadata: {}", + e + )) + })?; + + Self::endpoint_info_from_value(&json).ok_or_else(|| { + ProviderError::RequestFailed( + "Unexpected response format from Databricks endpoint metadata".to_string(), + ) + }) + } + + async fn resolve_endpoint_info( + &self, + endpoint_name: &str, + ) -> Result { + const MAX_MODEL_SERVING_HOPS: usize = 4; + + let original_endpoint_name = endpoint_name.to_string(); + let mut current_endpoint_name = endpoint_name.to_string(); + let mut visited = HashSet::new(); + let mut last_info: Option = None; + + for _ in 0..MAX_MODEL_SERVING_HOPS { + if !visited.insert(current_endpoint_name.clone()) { + break; + } + + let info = self.fetch_endpoint_info(¤t_endpoint_name).await?; + let next_endpoint_name = match ( + info.upstream_model_provider.as_deref(), + info.upstream_model_name.as_deref(), + ) { + (Some("databricks-model-serving"), Some(next_endpoint_name)) + if !visited.contains(next_endpoint_name) => + { + Some(next_endpoint_name.to_string()) + } + _ => None, + }; + + if let Some(next_endpoint_name) = next_endpoint_name { + last_info = Some(info); + current_endpoint_name = next_endpoint_name; + continue; + } + + return Ok(if info.name == original_endpoint_name { + info + } else { + let upstream_model_name = info + .upstream_model_name + .clone() + .or_else(|| Some(info.name.clone())); + DatabricksEndpointInfo { + name: original_endpoint_name, + upstream_model_name, + upstream_model_provider: info.upstream_model_provider.clone(), + reasoning: info.reasoning, + } + }); + } + + last_info + .map(|info| DatabricksEndpointInfo { + name: original_endpoint_name, + upstream_model_name: info.upstream_model_name, + upstream_model_provider: info.upstream_model_provider, + reasoning: info.reasoning, + }) + .ok_or_else(|| { + ProviderError::RequestFailed( + "Failed to resolve Databricks endpoint metadata".to_string(), + ) + }) + } + + async fn resolve_endpoint_info_cached( + &self, + endpoint_name: &str, + ) -> Result { + let cache_key = format!("{}:{}", self.host, endpoint_name); + let cached = DATABRICKS_ENDPOINT_INFO_CACHE + .lock() + .unwrap() + .get(&cache_key) + .cloned(); + + if let Some(cached) = cached { + if cached.fetched_at.elapsed() + < Duration::from_secs(DATABRICKS_ENDPOINT_METADATA_TTL_SECS) + { + return Ok(cached.info); + } + } + + let info = self.resolve_endpoint_info(endpoint_name).await?; + DATABRICKS_ENDPOINT_INFO_CACHE.lock().unwrap().insert( + cache_key, + CachedDatabricksEndpointInfo { + info: info.clone(), + fetched_at: Instant::now(), + }, + ); + Ok(info) + } + + fn model_info_from_endpoint(info: DatabricksEndpointInfo) -> ModelInfo { + let context_model = info.upstream_model_name.as_deref().unwrap_or(&info.name); + let context_limit = ModelConfig::new_or_fail(context_model) + .with_canonical_limits(DATABRICKS_PROVIDER_NAME) + .context_limit(); + let reasoning = info + .reasoning + .unwrap_or_else(|| ModelConfig::new_or_fail(context_model).is_reasoning_model()); + + ModelInfo { + name: info.name, + context_limit, + input_token_cost: None, + output_token_cost: None, + currency: None, + supports_cache_control: None, + reasoning, + } } fn get_endpoint_path(&self, model_name: &str, is_embedding: bool) -> String { @@ -378,11 +653,49 @@ impl Provider for DatabricksProvider { messages: &[Message], tools: &[Tool], ) -> Result { - let path = self.get_endpoint_path(&model_config.model_name, false); + let (endpoint_name, _) = super::utils::extract_reasoning_effort(&model_config.model_name); + let endpoint_info = self.resolve_endpoint_info_cached(&endpoint_name).await.ok(); + let effective_model_name = endpoint_info + .as_ref() + .and_then(|info| info.upstream_model_name.as_deref()) + .unwrap_or(&model_config.model_name); + let is_responses_model = Self::is_responses_model(&model_config.model_name) + || Self::is_responses_model(effective_model_name); + let path = if is_responses_model { + "serving-endpoints/responses".to_string() + } else { + self.get_endpoint_path(&model_config.model_name, false) + }; let client_request_id = self.build_client_request_id(session_id); - if Self::is_responses_model(&model_config.model_name) { - let mut payload = create_responses_request(model_config, system, messages, tools)?; + if is_responses_model { + let responses_model_config; + let request_model_config = if effective_model_name != model_config.model_name { + responses_model_config = { + let mut config = model_config.clone(); + config.model_name = effective_model_name.to_string(); + config + }; + &responses_model_config + } else { + model_config + }; + let mut payload = + create_responses_request(request_model_config, system, messages, tools)?; + payload["model"] = Value::String(endpoint_name.clone()); + if payload.get("reasoning").is_none() { + if let Some(effort) = model_config.thinking_effort().and_then(|effort| { + super::utils::openai_reasoning_effort_for_thinking(effective_model_name, effort) + }) { + payload.as_object_mut().unwrap().insert( + "reasoning".to_string(), + json!({ + "effort": effort, + "summary": "auto", + }), + ); + } + } payload["stream"] = Value::Bool(true); if let Some(ref client_request_id) = client_request_id { payload["client_request_id"] = Value::String(client_request_id.clone()); @@ -406,8 +719,27 @@ impl Provider for DatabricksProvider { stream_responses_compat(response, log) } else { - let mut payload = - create_request(model_config, system, messages, tools, &self.image_format)?; + let format_model_config; + let request_model_config = if Self::is_claude_model(effective_model_name) + && !Self::is_claude_model(&model_config.model_name) + { + format_model_config = { + let mut config = model_config.clone(); + config.model_name = effective_model_name.to_string(); + config + }; + &format_model_config + } else { + model_config + }; + + let mut payload = create_request( + request_model_config, + system, + messages, + tools, + &self.image_format, + )?; payload .as_object_mut() .expect("payload should have model key") @@ -498,6 +830,15 @@ impl Provider for DatabricksProvider { } async fn fetch_supported_models(&self) -> Result, ProviderError> { + Ok(self + .fetch_supported_model_info() + .await? + .into_iter() + .map(|model| model.name) + .collect()) + } + + async fn fetch_supported_model_info(&self) -> Result, ProviderError> { let response = self .api_client .request(None, "api/2.0/serving-endpoints") @@ -530,18 +871,25 @@ impl Provider for DatabricksProvider { ) })?; - let models: Vec = endpoints - .iter() - .filter_map(|endpoint| { - endpoint - .get("name") - .and_then(|v| v.as_str()) - .map(|name| name.to_string()) - }) - .collect(); + let mut models = Vec::new(); + for endpoint in endpoints { + if let Some(endpoint_info) = Self::endpoint_info_from_value(endpoint) { + models.push(Self::model_info_from_endpoint(endpoint_info)); + } + } Ok(models) } + + async fn fetch_model_info(&self, model_name: &str) -> Result { + let (endpoint_name, _) = super::utils::extract_reasoning_effort(model_name); + let endpoint_info = self.resolve_endpoint_info_cached(&endpoint_name).await?; + Ok(Self::model_info_from_endpoint(endpoint_info)) + } + + async fn fetch_recommended_model_info(&self) -> Result, ProviderError> { + self.fetch_supported_model_info().await + } } #[async_trait] @@ -596,6 +944,7 @@ mod tests { super::super::api_client::AuthMethod::NoAuth, ) .unwrap(), + host: "https://example.com".to_string(), auth: DatabricksAuth::Token("fake".into()), model: ModelConfig::new_or_fail("databricks-gpt-5.4"), image_format: ImageFormat::OpenAi, @@ -628,4 +977,91 @@ mod tests { ); } } + + #[test] + fn endpoint_metadata_marks_reasoning_alias_from_external_model() { + let endpoint = json!({ + "name": "goose", + "config": { + "served_entities": [{ + "name": "current", + "external_model": { + "name": "claude-opus-4.6", + "provider": "anthropic", + "task": "llm/v1/chat" + } + }] + } + }); + + let info = DatabricksProvider::endpoint_info_from_value(&endpoint).unwrap(); + + assert_eq!(info.name, "goose"); + assert_eq!(info.upstream_model_name.as_deref(), Some("claude-opus-4.6")); + assert_eq!(info.reasoning, Some(true)); + } + + #[test] + fn endpoint_metadata_captures_databricks_model_serving_hop() { + let endpoint = json!({ + "name": "goose", + "config": { + "served_entities": [{ + "external_model": { + "name": "databricks-claude-opus-4-6", + "provider": "databricks-model-serving", + "task": "llm/v1/chat" + } + }] + } + }); + + let info = DatabricksProvider::endpoint_info_from_value(&endpoint).unwrap(); + + assert_eq!(info.name, "goose"); + assert_eq!( + info.upstream_model_name.as_deref(), + Some("databricks-claude-opus-4-6") + ); + assert_eq!( + info.upstream_model_provider.as_deref(), + Some("databricks-model-serving") + ); + assert_eq!(info.reasoning, Some(true)); + } + + #[test] + fn endpoint_metadata_marks_reasoning_alias_from_pending_gpt_model() { + let endpoint = json!({ + "name": "goose", + "pending_config": { + "served_entities": [{ + "external_model": { + "name": "gpt-5.5", + "provider": "openai", + "task": "llm/v1/chat" + } + }] + } + }); + + let info = DatabricksProvider::endpoint_info_from_value(&endpoint).unwrap(); + + assert_eq!(info.name, "goose"); + assert_eq!(info.upstream_model_name.as_deref(), Some("gpt-5.5")); + assert_eq!(info.reasoning, Some(true)); + } + + #[test] + fn endpoint_metadata_uses_endpoint_name_when_no_upstream_model_exists() { + let endpoint = json!({ + "name": "goose-gpt-5-5" + }); + + let info = DatabricksProvider::endpoint_info_from_value(&endpoint).unwrap(); + + assert_eq!(info.name, "goose-gpt-5-5"); + assert_eq!(info.upstream_model_name, None); + assert_eq!(info.reasoning, Some(true)); + } } diff --git a/crates/goose/src/providers/formats/anthropic.rs b/crates/goose/src/providers/formats/anthropic.rs index 72450770bb..92803dea4b 100644 --- a/crates/goose/src/providers/formats/anthropic.rs +++ b/crates/goose/src/providers/formats/anthropic.rs @@ -1,6 +1,6 @@ use crate::conversation::message::{Message, MessageContent}; use crate::mcp_utils::extract_text_from_resource; -use crate::model::ModelConfig; +use crate::model::{ModelConfig, ThinkingEffort}; use crate::providers::base::Usage; use crate::providers::errors::ProviderError; use crate::providers::utils::{convert_image, ImageFormat}; @@ -37,7 +37,6 @@ macro_rules! string_enum { } string_enum!(ThinkingType { Adaptive => "adaptive", Enabled => "enabled", Disabled => "disabled" }); -string_enum!(ThinkingEffort { Low => "low", Medium => "medium", High => "high", Max => "max" }); #[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] pub struct AnthropicFormatOptions { @@ -80,33 +79,16 @@ pub fn thinking_type(model_config: &ModelConfig) -> ThinkingType { } let is_adaptive_model = supports_adaptive_thinking(&model_config.model_name); + let effort = model_config.thinking_effort(); - if let Some(s) = - model_config.get_config_param::("thinking_type", "CLAUDE_THINKING_TYPE") - { - let tt = s.parse::().unwrap_or_else(|e| { - tracing::warn!("{e}"); - ThinkingType::Disabled - }); - if tt == ThinkingType::Adaptive && !is_adaptive_model { - tracing::warn!( - "Adaptive thinking not supported for {}, disabling thinking", - model_config.model_name - ); - return ThinkingType::Disabled; - } - return tt; + if effort.is_none() && legacy_thinking_budget_tokens().is_some() { + return ThinkingType::Enabled; } - if is_adaptive_model { - ThinkingType::Adaptive - } else if std::env::var("CLAUDE_THINKING_ENABLED").is_ok() { - tracing::warn!( - "CLAUDE_THINKING_ENABLED is deprecated, use CLAUDE_THINKING_TYPE=enabled instead" - ); - ThinkingType::Enabled - } else { - ThinkingType::Disabled + match effort.unwrap_or(ThinkingEffort::Off) { + ThinkingEffort::Off => ThinkingType::Disabled, + _ if is_adaptive_model => ThinkingType::Adaptive, + _ => ThinkingType::Enabled, } } @@ -510,35 +492,45 @@ pub fn get_usage(data: &Value) -> Result { } pub fn thinking_effort(model_config: &ModelConfig) -> ThinkingEffort { - match model_config.get_config_param::("effort", "CLAUDE_THINKING_EFFORT") { - Some(s) => s.parse().unwrap_or_else(|e| { - tracing::warn!("{e}, defaulting to 'high'"); - ThinkingEffort::High - }), - None => ThinkingEffort::High, - } + model_config + .thinking_effort() + .unwrap_or(ThinkingEffort::High) } -fn thinking_budget_tokens(model_config: &ModelConfig) -> i32 { - let request_param = model_config +pub fn thinking_budget_tokens(model_config: &ModelConfig) -> i32 { + if let Some(request_param) = model_config .request_params .as_ref() .and_then(|params| params.get("budget_tokens")) - .and_then(|v| serde_json::from_value(v.clone()).ok()); + .and_then(|v| serde_json::from_value::(v.clone()).ok()) + { + return request_param.max(1024); + } - request_param - .or_else(|| { - crate::config::Config::global() - .get_param::("ANTHROPIC_THINKING_BUDGET") - .ok() - }) - .or_else(|| { - crate::config::Config::global() - .get_param::("CLAUDE_THINKING_BUDGET") - .ok() - }) - .unwrap_or(16000) - .max(1024) + if let Some(budget) = legacy_thinking_budget_tokens() { + return budget; + } + + let effort = model_config + .thinking_effort() + .unwrap_or(ThinkingEffort::High); + match effort { + ThinkingEffort::Off => 1024, + ThinkingEffort::Low => 4000, + ThinkingEffort::Medium => 10000, + ThinkingEffort::High => 16000, + ThinkingEffort::Max => 32000, + } +} + +fn legacy_thinking_budget_tokens() -> Option { + let config = crate::config::Config::global(); + for key in ["ANTHROPIC_THINKING_BUDGET", "CLAUDE_THINKING_BUDGET"] { + if let Ok(budget) = config.get_param::(key) { + return Some(budget.max(1024)); + } + } + None } fn apply_thinking_config( @@ -1181,14 +1173,14 @@ mod tests { #[test] fn test_create_request_adaptive_thinking_for_46_models() -> Result<()> { - let _guard = env_lock::lock_env([ - ("CLAUDE_THINKING_TYPE", Some("adaptive")), - ("CLAUDE_THINKING_EFFORT", Some("high")), - ("CLAUDE_THINKING_ENABLED", None::<&str>), - ]); + let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", None::<&str>)]); + + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), json!("high")); let mut config = cfg("claude-opus-4-6"); config.max_tokens = Some(4096); + config.request_params = Some(params); let messages = vec![Message::user().with_text("Hello")]; let payload = create_request(&config, "system", &messages, &[])?; @@ -1202,27 +1194,20 @@ mod tests { #[test] fn test_create_request_enabled_thinking_with_budget() -> Result<()> { let _guard = env_lock::lock_env([ - ("CLAUDE_THINKING_TYPE", None::<&str>), - ("CLAUDE_THINKING_EFFORT", None::<&str>), - ("CLAUDE_THINKING_ENABLED", None::<&str>), - ("ANTHROPIC_THINKING_BUDGET", None::<&str>), - ("CLAUDE_THINKING_BUDGET", None::<&str>), + ("GOOSE_THINKING_EFFORT", None::<&str>), + ("ANTHROPIC_PRESERVE_THINKING_CONTEXT", None::<&str>), ]); - let mut params = std::collections::HashMap::new(); - params.insert("thinking_type".to_string(), json!("enabled")); - params.insert("budget_tokens".to_string(), json!(10000)); - - let mut config = cfg("claude-3-7-sonnet-20250219"); + let mut config = cfg_with_effort("claude-3-7-sonnet-20250219", "high"); config.max_tokens = Some(4096); - config.request_params = Some(params); let messages = vec![Message::user().with_text("Hello")]; let payload = create_request(&config, "system", &messages, &[])?; assert_eq!(payload["thinking"]["type"], "enabled"); - assert_eq!(payload["thinking"]["budget_tokens"], 10000); - assert_eq!(payload["max_tokens"], 4096 + 10000); + let budget = payload["thinking"]["budget_tokens"].as_i64().unwrap(); + assert!(budget > 0); + assert_eq!(payload["max_tokens"], 4096 + budget); Ok(()) } @@ -1230,12 +1215,11 @@ mod tests { #[test] fn test_create_request_disabled_thinking_no_thinking_field() -> Result<()> { let _guard = env_lock::lock_env([ - ("CLAUDE_THINKING_TYPE", None::<&str>), - ("CLAUDE_THINKING_ENABLED", None::<&str>), + ("GOOSE_THINKING_EFFORT", None::<&str>), ("ANTHROPIC_PRESERVE_THINKING_CONTEXT", None::<&str>), ]); - let config = cfg("claude-sonnet-4-20250514"); + let config = cfg_with_effort("claude-sonnet-4-20250514", "off"); let messages = vec![Message::user().with_text("Hello")]; let payload = create_request(&config, "system", &messages, &[])?; @@ -1449,9 +1433,9 @@ mod tests { } } - fn cfg_with_thinking(name: &str, tt: &str) -> ModelConfig { + fn cfg_with_effort(name: &str, effort: &str) -> ModelConfig { let mut params = std::collections::HashMap::new(); - params.insert("thinking_type".to_string(), json!(tt)); + params.insert("thinking_effort".to_string(), json!(effort)); ModelConfig { model_name: name.to_string(), request_params: Some(params), @@ -1460,50 +1444,61 @@ mod tests { } #[test] - fn test_thinking_type_explicit_params() { + fn test_thinking_type_from_effort() { + let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", None::<&str>)]); + // Adaptive model with effort → adaptive assert_eq!( - thinking_type(&cfg_with_thinking("claude-opus-4-6", "adaptive")), + thinking_type(&cfg_with_effort("claude-opus-4-6", "high")), ThinkingType::Adaptive ); + // Adaptive model with off → disabled assert_eq!( - thinking_type(&cfg_with_thinking("claude-opus-4-6", "disabled")), + thinking_type(&cfg_with_effort("claude-opus-4-6", "off")), ThinkingType::Disabled ); + // Non-adaptive Claude with effort → enabled assert_eq!( - thinking_type(&cfg_with_thinking("claude-3-7-sonnet-20250219", "enabled")), + thinking_type(&cfg_with_effort("claude-3-7-sonnet-20250219", "high")), ThinkingType::Enabled ); + // Non-adaptive Claude with off → disabled assert_eq!( - thinking_type(&cfg_with_thinking("claude-3-7-sonnet-20250219", "adaptive")), - ThinkingType::Disabled - ); - assert_eq!( - thinking_type(&cfg_with_thinking("claude-opus-4-6", "adapttive")), + thinking_type(&cfg_with_effort("claude-3-7-sonnet-20250219", "off")), ThinkingType::Disabled ); } + #[test] + fn test_thinking_budget_uses_legacy_env() { + let _guard = env_lock::lock_env([ + ("GOOSE_THINKING_EFFORT", None::<&str>), + ("ANTHROPIC_THINKING_BUDGET", Some("8192")), + ("CLAUDE_THINKING_BUDGET", None::<&str>), + ]); + let config = cfg_with_effort("claude-3-7-sonnet-20250219", "high"); + assert_eq!(thinking_budget_tokens(&config), 8192); + } + #[test] fn test_thinking_type_non_claude_always_disabled() { - assert_eq!(thinking_type(&cfg("gpt-4o")), ThinkingType::Disabled); assert_eq!( - thinking_type(&cfg_with_thinking("gpt-4o", "enabled")), + thinking_type(&cfg_with_effort("gpt-4o", "off")), + ThinkingType::Disabled + ); + assert_eq!( + thinking_type(&cfg_with_effort("gpt-4o", "high")), ThinkingType::Disabled ); } #[test] - fn test_thinking_type_env_var_override() { - let _guard = env_lock::lock_env([ - ("CLAUDE_THINKING_TYPE", Some("adaptive")), - ("CLAUDE_THINKING_ENABLED", None::<&str>), - ]); + fn test_thinking_type_off_means_disabled() { assert_eq!( - thinking_type(&cfg("claude-opus-4-6")), - ThinkingType::Adaptive + thinking_type(&cfg_with_effort("claude-opus-4-6", "off")), + ThinkingType::Disabled ); assert_eq!( - thinking_type(&cfg("claude-3-7-sonnet-20250219")), + thinking_type(&cfg_with_effort("claude-3-7-sonnet-20250219", "off")), ThinkingType::Disabled ); } diff --git a/crates/goose/src/providers/formats/databricks.rs b/crates/goose/src/providers/formats/databricks.rs index 584bbdf823..7918c75004 100644 --- a/crates/goose/src/providers/formats/databricks.rs +++ b/crates/goose/src/providers/formats/databricks.rs @@ -1,10 +1,12 @@ use crate::conversation::message::{Message, MessageContent}; use crate::model::ModelConfig; -use crate::providers::formats::anthropic::{thinking_effort, thinking_type, ThinkingType}; +use crate::providers::formats::anthropic::{ + thinking_budget_tokens, thinking_effort, thinking_type, ThinkingType, +}; use crate::providers::utils::{ convert_image, detect_image_path, extract_reasoning_effort, is_openai_responses_model, - is_valid_function_name, load_image_file, safely_parse_json, sanitize_function_name, - ImageFormat, + is_valid_function_name, load_image_file, openai_reasoning_effort_for_thinking, + safely_parse_json, sanitize_function_name, ImageFormat, }; use anyhow::{anyhow, Error}; use rmcp::model::{ @@ -245,11 +247,7 @@ fn apply_claude_thinking_config(payload: &mut Value, model_config: &ModelConfig) ); } ThinkingType::Enabled => { - let budget_tokens = model_config - .get_config_param::("budget_tokens", "CLAUDE_THINKING_BUDGET") - .unwrap_or(16000) - .max(1024); - + let budget_tokens = thinking_budget_tokens(model_config); let max_tokens = model_config.max_output_tokens() + budget_tokens; obj.insert("max_tokens".to_string(), json!(max_tokens)); obj.insert( @@ -582,8 +580,17 @@ pub fn create_request( )); } - let (model_name, reasoning_effort) = extract_reasoning_effort(&model_config.model_name); + let (model_name, legacy_reasoning_effort) = extract_reasoning_effort(&model_config.model_name); let is_openai_reasoning_model = is_openai_responses_model(&model_name); + let reasoning_effort = if is_openai_reasoning_model { + model_config + .thinking_effort() + .map_or(legacy_reasoning_effort, |effort| { + openai_reasoning_effort_for_thinking(&model_name, effort) + }) + } else { + None + }; let system_message = DatabricksMessage { role: "system".to_string(), @@ -652,6 +659,9 @@ pub fn create_request( if let Some(params) = &model_config.request_params { if let Some(obj) = payload.as_object_mut() { for (key, value) in params { + if key == "thinking_effort" { + continue; + } obj.insert(key.clone(), value.clone()); } } @@ -1042,15 +1052,17 @@ mod tests { #[test] fn test_create_request_reasoning_effort() -> anyhow::Result<()> { + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), serde_json::json!("high")); let model_config = ModelConfig { - model_name: "o3-mini-high".to_string(), + model_name: "o3-mini".to_string(), context_limit: Some(4096), temperature: None, max_tokens: Some(1024), toolshim: false, toolshim_model: None, fast_model_config: None, - request_params: None, + request_params: Some(params), reasoning: None, }; let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?; @@ -1058,6 +1070,48 @@ mod tests { Ok(()) } + #[test] + fn test_create_request_off_effort_preserves_none() -> anyhow::Result<()> { + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), serde_json::json!("off")); + let model_config = ModelConfig { + model_name: "databricks-o3-mini".to_string(), + context_limit: Some(4096), + temperature: None, + max_tokens: Some(1024), + toolshim: false, + toolshim_model: None, + fast_model_config: None, + request_params: Some(params), + reasoning: None, + }; + let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?; + assert_eq!(request["reasoning_effort"], "none"); + assert!(request.get("thinking_effort").is_none()); + Ok(()) + } + + #[test] + fn test_create_request_max_effort_uses_supported_level() -> anyhow::Result<()> { + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), serde_json::json!("max")); + let model_config = ModelConfig { + model_name: "databricks-gpt-5.2-pro".to_string(), + context_limit: Some(4096), + temperature: None, + max_tokens: Some(1024), + toolshim: false, + toolshim_model: None, + fast_model_config: None, + request_params: Some(params), + reasoning: None, + }; + let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?; + assert_eq!(request["reasoning_effort"], "high"); + assert!(request.get("thinking_effort").is_none()); + Ok(()) + } + #[test] fn test_create_request_reasoning_effort_xhigh() -> anyhow::Result<()> { let model_config = ModelConfig { @@ -1117,15 +1171,11 @@ mod tests { #[test] fn test_create_request_adaptive_thinking_for_46_models() -> anyhow::Result<()> { - let _guard = env_lock::lock_env([ - ("CLAUDE_THINKING_TYPE", Some("adaptive")), - ("CLAUDE_THINKING_EFFORT", Some("low")), - ("CLAUDE_THINKING_ENABLED", None::<&str>), - ("CLAUDE_THINKING_BUDGET", None::<&str>), - ]); - let mut model_config = ModelConfig::new_or_fail("databricks-claude-opus-4-6"); model_config.max_tokens = Some(4096); + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), serde_json::json!("low")); + model_config.request_params = Some(params); let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?; @@ -1140,30 +1190,47 @@ mod tests { #[test] fn test_create_request_enabled_thinking_with_budget() -> anyhow::Result<()> { - let _guard = env_lock::lock_env([ - ("CLAUDE_THINKING_TYPE", None::<&str>), - ("CLAUDE_THINKING_ENABLED", None::<&str>), - ("CLAUDE_THINKING_BUDGET", Some("10000")), - ]); - let mut model_config = ModelConfig::new_or_fail("databricks-claude-3-7-sonnet"); model_config.max_tokens = Some(4096); - model_config = model_config.with_request_params(Some(std::collections::HashMap::from([( - "thinking_type".to_string(), - json!("enabled"), - )]))); + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), serde_json::json!("high")); + model_config.request_params = Some(params); let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?; assert_eq!(request["thinking"]["type"], "enabled"); - assert_eq!(request["thinking"]["budget_tokens"], 10000); - assert_eq!(request["max_tokens"], 14096); + assert_eq!(request["thinking"]["budget_tokens"], 16000); + assert_eq!(request["max_tokens"], 20096); assert_eq!(request["temperature"], 2); assert!(request.get("max_completion_tokens").is_none()); Ok(()) } + #[test] + fn test_create_request_enabled_thinking_budget_tracks_effort() -> anyhow::Result<()> { + for (effort, expected_budget) in [ + ("low", 4000), + ("medium", 10000), + ("high", 16000), + ("max", 32000), + ] { + let mut model_config = ModelConfig::new_or_fail("databricks-claude-3-7-sonnet"); + model_config.max_tokens = Some(4096); + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), serde_json::json!(effort)); + model_config.request_params = Some(params); + + let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?; + + assert_eq!(request["thinking"]["type"], "enabled"); + assert_eq!(request["thinking"]["budget_tokens"], expected_budget); + assert_eq!(request["max_tokens"], 4096 + expected_budget); + } + + Ok(()) + } + #[test] fn test_response_to_message_claude_thinking() -> anyhow::Result<()> { let response = json!({ diff --git a/crates/goose/src/providers/formats/google.rs b/crates/goose/src/providers/formats/google.rs index 298efb93fb..b35c2db504 100644 --- a/crates/goose/src/providers/formats/google.rs +++ b/crates/goose/src/providers/formats/google.rs @@ -542,22 +542,18 @@ fn get_thinking_config(model_config: &ModelConfig) -> Option { } if is_gemini_3 { - let thinking_level_str = model_config - .get_config_param::("thinking_level", "GEMINI3_THINKING_LEVEL") - .map(|s| s.to_lowercase()) - .unwrap_or_else(|| "low".to_string()); - - let thinking_level = match thinking_level_str.as_str() { - "high" => ThinkingLevel::High, - "low" => ThinkingLevel::Low, - invalid => { - tracing::warn!( - "Invalid thinking level '{}' for model '{}'. Valid levels: low, high. Using 'low'.", - invalid, - model_config.model_name, - ); + use crate::model::ThinkingEffort; + let effort = model_config + .thinking_effort() + .unwrap_or(ThinkingEffort::Off); + if effort == ThinkingEffort::Off { + return None; + } + let thinking_level = match effort { + ThinkingEffort::Off | ThinkingEffort::Low | ThinkingEffort::Medium => { ThinkingLevel::Low } + ThinkingEffort::High | ThinkingEffort::Max => ThinkingLevel::High, }; Some(ThinkingConfig { @@ -1378,7 +1374,11 @@ data: [DONE]"#; fn test_get_thinking_config() { use crate::model::ModelConfig; - let config = ModelConfig::new("gemini-3-pro").unwrap(); + // Test 1: Gemini 3 model with low thinking effort + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), serde_json::json!("low")); + let mut config = ModelConfig::new("gemini-3-pro").unwrap(); + config.request_params = Some(params); let result = get_thinking_config(&config); assert!(result.is_some()); let thinking_config = result.unwrap(); @@ -1386,9 +1386,18 @@ data: [DONE]"#; assert!(thinking_config.thinking_budget.is_none()); assert!(thinking_config.include_thoughts); - let config = ModelConfig::new("Gemini-3-Flash").unwrap(); + // Test 2: Gemini 3 model with high thinking effort + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), serde_json::json!("high")); + let mut config = ModelConfig::new("Gemini-3-Flash").unwrap(); + config.request_params = Some(params); let result = get_thinking_config(&config); assert!(result.is_some()); + let thinking_config = result.unwrap(); + assert!(matches!( + thinking_config.thinking_level, + Some(ThinkingLevel::High) + )); let config = ModelConfig::new("gemini-2.5-flash").unwrap(); let result = get_thinking_config(&config); @@ -1405,7 +1414,7 @@ data: [DONE]"#; params.insert("thinking_budget".to_string(), json!(4096)); let config = ModelConfig::new("gemini-2.5-flash") .unwrap() - .with_request_params(Some(params)); + .with_merged_request_params(params); let result = get_thinking_config(&config); assert!(result.is_some()); let thinking_config = result.unwrap(); @@ -1415,7 +1424,7 @@ data: [DONE]"#; params.insert("thinking_budget".to_string(), json!(-1)); let config = ModelConfig::new("gemini-2.5-flash") .unwrap() - .with_request_params(Some(params)); + .with_merged_request_params(params); let result = get_thinking_config(&config); assert!(result.is_some()); let thinking_config = result.unwrap(); diff --git a/crates/goose/src/providers/formats/openai.rs b/crates/goose/src/providers/formats/openai.rs index 66e9acaba5..1ec5aa1308 100644 --- a/crates/goose/src/providers/formats/openai.rs +++ b/crates/goose/src/providers/formats/openai.rs @@ -5,8 +5,8 @@ use crate::providers::base::{split_think_blocks, ProviderUsage, ThinkFilter, Usa use crate::providers::errors::ProviderError; use crate::providers::utils::{ convert_image, detect_image_path, extract_reasoning_effort, is_openai_responses_model, - is_valid_function_name, load_image_file, safely_parse_json, sanitize_function_name, - ImageFormat, + is_valid_function_name, load_image_file, openai_reasoning_effort_for_thinking, + safely_parse_json, sanitize_function_name, ImageFormat, }; use anyhow::{anyhow, Error}; use async_stream::try_stream; @@ -1239,8 +1239,17 @@ pub fn create_request_with_options( )); } - let (model_name, reasoning_effort) = extract_reasoning_effort(&model_config.model_name); + let (model_name, legacy_reasoning_effort) = extract_reasoning_effort(&model_config.model_name); let is_reasoning_model = is_openai_responses_model(&model_name); + let reasoning_effort = if is_reasoning_model { + model_config + .thinking_effort() + .map_or(legacy_reasoning_effort, |effort| { + openai_reasoning_effort_for_thinking(&model_name, effort) + }) + } else { + None + }; let system_message = json!({ "role": if is_reasoning_model { "developer" } else { "system" }, @@ -1299,7 +1308,7 @@ pub fn create_request_with_options( if let Some(params) = &model_config.request_params { if let Some(obj) = payload.as_object_mut() { for (key, value) in params { - if !is_reserved_request_param_key(key) { + if key != "thinking_effort" && !is_reserved_request_param_key(key) { obj.insert(key.clone(), value.clone()); } } @@ -2070,8 +2079,7 @@ mod tests { fn test_create_request_omits_max_tokens_when_unset() -> anyhow::Result<()> { // Unknown models on OpenAI-compatible local providers (llama_swap, // lmstudio) have no canonical record and no GOOSE_MAX_TOKENS, so the - // request must not pin the legacy 4096 default — the server should - // pick its own ceiling. See issue #9007. + // request must not pin the legacy 4096 default. See issue #9007. let model_config = ModelConfig { model_name: "some-unknown-local-model".to_string(), context_limit: None, @@ -2164,8 +2172,6 @@ mod tests { #[test] fn test_create_request_o1_default() -> anyhow::Result<()> { - // Without an explicit effort suffix the API picks its own default; - // we should omit reasoning_effort entirely but still use "developer" role. let model_config = ModelConfig { model_name: "o1".to_string(), context_limit: Some(4096), @@ -2209,17 +2215,111 @@ mod tests { } #[test] - fn test_create_request_o3_custom_reasoning_effort() -> anyhow::Result<()> { - // Test custom reasoning effort for O3 model + fn test_create_request_o1_medium_effort() -> anyhow::Result<()> { + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), json!("medium")); let model_config = ModelConfig { - model_name: "o3-mini-high".to_string(), + model_name: "o1".to_string(), context_limit: Some(4096), temperature: None, max_tokens: Some(1024), toolshim: false, toolshim_model: None, fast_model_config: None, - request_params: None, + request_params: Some(params), + reasoning: None, + }; + let request = create_request( + &model_config, + "system", + &[], + &[], + &ImageFormat::OpenAi, + false, + )?; + let obj = request.as_object().unwrap(); + + assert_eq!(obj.get("reasoning_effort"), Some(&json!("medium"))); + assert!(obj.get("thinking_effort").is_none()); + + Ok(()) + } + + #[test] + fn test_create_request_o3_off_effort_preserves_none() -> anyhow::Result<()> { + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), json!("off")); + let model_config = ModelConfig { + model_name: "o3".to_string(), + context_limit: Some(4096), + temperature: None, + max_tokens: Some(1024), + toolshim: false, + toolshim_model: None, + fast_model_config: None, + request_params: Some(params), + reasoning: None, + }; + let request = create_request( + &model_config, + "system", + &[], + &[], + &ImageFormat::OpenAi, + false, + )?; + let obj = request.as_object().unwrap(); + + assert_eq!(obj.get("reasoning_effort"), Some(&json!("none"))); + assert!(obj.get("thinking_effort").is_none()); + + Ok(()) + } + + #[test] + fn test_create_request_gpt5_pro_max_effort_uses_supported_level() -> anyhow::Result<()> { + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), json!("max")); + let model_config = ModelConfig { + model_name: "gpt-5.2-pro-2025-12-11".to_string(), + context_limit: Some(4096), + temperature: None, + max_tokens: Some(1024), + toolshim: false, + toolshim_model: None, + fast_model_config: None, + request_params: Some(params), + reasoning: None, + }; + let request = create_request( + &model_config, + "system", + &[], + &[], + &ImageFormat::OpenAi, + false, + )?; + let obj = request.as_object().unwrap(); + + assert_eq!(obj.get("reasoning_effort"), Some(&json!("high"))); + assert!(obj.get("thinking_effort").is_none()); + + Ok(()) + } + + #[test] + fn test_create_request_o3_custom_reasoning_effort() -> anyhow::Result<()> { + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), json!("high")); + let model_config = ModelConfig { + model_name: "o3-mini".to_string(), + context_limit: Some(4096), + temperature: None, + max_tokens: Some(1024), + toolshim: false, + toolshim_model: None, + fast_model_config: None, + request_params: Some(params), reasoning: None, }; let request = create_request( @@ -2246,6 +2346,7 @@ mod tests { for (key, value) in expected.as_object().unwrap() { assert_eq!(obj.get(key).unwrap(), value); } + assert!(obj.get("thinking_effort").is_none()); Ok(()) } diff --git a/crates/goose/src/providers/formats/openai_responses.rs b/crates/goose/src/providers/formats/openai_responses.rs index b328b0c9df..ef39fb8b20 100644 --- a/crates/goose/src/providers/formats/openai_responses.rs +++ b/crates/goose/src/providers/formats/openai_responses.rs @@ -2,7 +2,9 @@ use crate::conversation::message::{Message, MessageContent}; use crate::mcp_utils::extract_text_from_resource; use crate::model::ModelConfig; use crate::providers::base::{ProviderUsage, Usage}; -use crate::providers::utils::{extract_reasoning_effort, is_openai_responses_model}; +use crate::providers::utils::{ + extract_reasoning_effort, is_openai_responses_model, openai_reasoning_effort_for_thinking, +}; use anyhow::{anyhow, Error}; use async_stream::try_stream; use chrono; @@ -541,11 +543,26 @@ pub fn create_responses_request( add_message_items(&mut input_items, messages); - let (model_name, reasoning_effort) = extract_reasoning_effort(&model_config.model_name); + let (model_name, legacy_reasoning_effort) = extract_reasoning_effort(&model_config.model_name); // All models routed here are responses-capable; temperature is rejected // by the API for reasoning models regardless of whether an explicit // effort suffix was provided. let is_reasoning_model = is_openai_responses_model(&model_name); + let reasoning_effort = if is_reasoning_model { + if let Some(effort) = legacy_reasoning_effort.as_deref() { + effort + .parse() + .ok() + .and_then(|effort| openai_reasoning_effort_for_thinking(&model_name, effort)) + .or(legacy_reasoning_effort) + } else { + model_config + .thinking_effort() + .and_then(|effort| openai_reasoning_effort_for_thinking(&model_name, effort)) + } + } else { + None + }; let mut payload = json!({ "model": model_name, @@ -1268,6 +1285,17 @@ mod tests { } } + #[test] + fn test_responses_request_with_normalized_effort_suffix() { + let model_config = ModelConfig::new("o3-mini-high").unwrap(); + + let result = create_responses_request(&model_config, "You are helpful.", &[], &[]).unwrap(); + + assert_eq!(result["model"], "o3-mini"); + assert_eq!(result["reasoning"]["effort"], "high"); + assert_eq!(result["reasoning"]["summary"], "auto"); + } + #[test] fn test_responses_request_without_effort_suffix_omits_reasoning() { for model_name in ["gpt-5.4", "o3", "gpt-5-nano"] { @@ -1294,6 +1322,30 @@ mod tests { } } + #[test] + fn test_responses_request_non_reasoning_model_ignores_global_thinking_effort() { + let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", Some("high"))]); + let model_config = ModelConfig { + model_name: "gpt-4o".to_string(), + context_limit: None, + temperature: None, + max_tokens: None, + toolshim: false, + toolshim_model: None, + fast_model_config: None, + request_params: None, + reasoning: None, + }; + + let result = create_responses_request(&model_config, "You are helpful.", &[], &[]).unwrap(); + + assert_eq!(result["model"], "gpt-4o"); + assert!( + result.get("reasoning").is_none(), + "non-reasoning models should not receive reasoning config" + ); + } + #[test] fn test_user_image_serialized_in_responses_request() { use crate::conversation::message::Message; diff --git a/crates/goose/src/providers/formats/openrouter.rs b/crates/goose/src/providers/formats/openrouter.rs index f20d613cc0..22ac7465b2 100644 --- a/crates/goose/src/providers/formats/openrouter.rs +++ b/crates/goose/src/providers/formats/openrouter.rs @@ -1,4 +1,5 @@ use crate::conversation::message::{Message, MessageContent, ProviderMetadata}; +use crate::model::{ModelConfig, ThinkingEffort}; use crate::providers::formats::openai; use rmcp::model::Role; use serde_json::{json, Value}; @@ -87,9 +88,40 @@ pub fn add_reasoning_details_to_request(payload: &mut Value, messages: &[Message } } +fn reasoning_effort_for_openrouter(effort: ThinkingEffort) -> &'static str { + match effort { + ThinkingEffort::Off => "none", + ThinkingEffort::Low => "low", + ThinkingEffort::Medium => "medium", + ThinkingEffort::High => "high", + ThinkingEffort::Max => "xhigh", + } +} + +pub fn apply_reasoning_config(payload: &mut Value, model_config: &ModelConfig) { + let Some(effort) = model_config.thinking_effort() else { + return; + }; + + if let Some(obj) = payload.as_object_mut() { + let clamped_effort = obj + .remove("reasoning_effort") + .and_then(|value| value.as_str().map(str::to_owned)); + if clamped_effort.is_none() && !model_config.is_reasoning_model() { + return; + } + + obj.insert( + "reasoning".to_string(), + json!({ "effort": clamped_effort.as_deref().unwrap_or_else(|| reasoning_effort_for_openrouter(effort)) }), + ); + } +} + #[cfg(test)] mod tests { use super::*; + use std::collections::HashMap; #[test] fn test_extract_reasoning_details() { @@ -149,4 +181,89 @@ mod tests { let details = get_reasoning_details(&tool_request.metadata).unwrap(); assert_eq!(details.len(), 1); } + + #[test] + fn test_apply_reasoning_config_uses_openrouter_reasoning_object() { + let mut payload = json!({ + "model": "openai/gpt-5", + "messages": [], + "reasoning_effort": "high" + }); + let mut model_config = ModelConfig::new_or_fail("openai/gpt-5"); + let mut params = HashMap::new(); + params.insert("thinking_effort".to_string(), json!("max")); + model_config.request_params = Some(params); + + apply_reasoning_config(&mut payload, &model_config); + + assert_eq!(payload["reasoning"], json!({ "effort": "high" })); + assert!(payload.get("reasoning_effort").is_none()); + } + + #[test] + fn test_apply_reasoning_config_uses_reasoning_metadata() { + let mut payload = json!({ + "model": "x-ai/grok-4", + "messages": [] + }); + let mut model_config = ModelConfig::new_or_fail("x-ai/grok-4"); + let mut params = HashMap::new(); + params.insert("thinking_effort".to_string(), json!("high")); + model_config.request_params = Some(params); + model_config.reasoning = Some(true); + + apply_reasoning_config(&mut payload, &model_config); + + assert_eq!(payload["reasoning"], json!({ "effort": "high" })); + } + + #[test] + fn test_apply_reasoning_config_uses_model_detection() { + let mut payload = json!({ + "model": "anthropic/claude-sonnet-4", + "messages": [] + }); + let mut model_config = ModelConfig::new_or_fail("anthropic/claude-sonnet-4"); + let mut params = HashMap::new(); + params.insert("thinking_effort".to_string(), json!("high")); + model_config.request_params = Some(params); + + apply_reasoning_config(&mut payload, &model_config); + + assert_eq!(payload["reasoning"], json!({ "effort": "high" })); + } + + #[test] + fn test_apply_reasoning_config_skips_non_reasoning_models() { + let mut payload = json!({ + "model": "openai/gpt-4o", + "messages": [] + }); + let mut model_config = ModelConfig::new_or_fail("openai/gpt-4o"); + let mut params = HashMap::new(); + params.insert("thinking_effort".to_string(), json!("high")); + model_config.request_params = Some(params); + model_config.reasoning = Some(false); + + apply_reasoning_config(&mut payload, &model_config); + + assert!(payload.get("reasoning").is_none()); + } + + #[test] + fn test_apply_reasoning_config_off_disables_reasoning() { + let mut payload = json!({ + "model": "x-ai/grok-4", + "messages": [] + }); + let mut model_config = ModelConfig::new_or_fail("x-ai/grok-4"); + let mut params = HashMap::new(); + params.insert("thinking_effort".to_string(), json!("off")); + model_config.request_params = Some(params); + model_config.reasoning = Some(true); + + apply_reasoning_config(&mut payload, &model_config); + + assert_eq!(payload["reasoning"], json!({ "effort": "none" })); + } } diff --git a/crates/goose/src/providers/openrouter.rs b/crates/goose/src/providers/openrouter.rs index 5e1cf5bf8e..59afb94a84 100644 --- a/crates/goose/src/providers/openrouter.rs +++ b/crates/goose/src/providers/openrouter.rs @@ -278,6 +278,7 @@ impl Provider for OpenRouterProvider { if is_gemini_model(&model_config.model_name) { openrouter_format::add_reasoning_details_to_request(&mut payload, messages); } + openrouter_format::apply_reasoning_config(&mut payload, model_config); if let Some(obj) = payload.as_object_mut() { obj.insert("transforms".to_string(), json!(["middle-out"])); diff --git a/crates/goose/src/providers/provider_registry.rs b/crates/goose/src/providers/provider_registry.rs index 7e2c14ce1f..ee4b6a1323 100644 --- a/crates/goose/src/providers/provider_registry.rs +++ b/crates/goose/src/providers/provider_registry.rs @@ -182,6 +182,7 @@ impl ProviderRegistry { output_token_cost: m.output_token_cost, currency: m.currency.clone(), supports_cache_control: Some(m.supports_cache_control.unwrap_or(false)), + reasoning: m.reasoning, }) .collect(); diff --git a/crates/goose/src/providers/utils.rs b/crates/goose/src/providers/utils.rs index 81d15f5fc0..87be4af751 100644 --- a/crates/goose/src/providers/utils.rs +++ b/crates/goose/src/providers/utils.rs @@ -1,7 +1,7 @@ use super::base::Usage; use super::errors::GoogleErrorCode; use crate::config::paths::Paths; -use crate::model::ModelConfig; +use crate::model::{ModelConfig, ThinkingEffort}; use crate::providers::errors::ProviderError; use anyhow::{anyhow, Result}; use base64::Engine; @@ -237,6 +237,49 @@ pub fn extract_reasoning_effort(model_name: &str) -> (String, Option) { (model_name.to_string(), None) } +pub fn openai_reasoning_effort_for_thinking( + model_name: &str, + effort: ThinkingEffort, +) -> Option { + if effort == ThinkingEffort::Off { + return Some("none".to_string()); + } + + let supported = openai_reasoning_efforts_for_model(model_name); + let preferred: &[&str] = match effort { + ThinkingEffort::Off => unreachable!(), + ThinkingEffort::Low => &["low", "medium", "high", "xhigh"], + ThinkingEffort::Medium => &["medium", "high", "low", "xhigh"], + ThinkingEffort::High => &["high", "medium", "xhigh", "low"], + ThinkingEffort::Max => &["xhigh", "high", "medium", "low"], + }; + + preferred + .iter() + .find(|level| supported.contains(level)) + .map(|level| (*level).to_string()) +} + +fn openai_reasoning_efforts_for_model(model_name: &str) -> &'static [&'static str] { + let normalized = model_name.to_ascii_lowercase(); + + if normalized.contains("gpt-5") { + if normalized.contains("-pro") || normalized.contains("/pro") { + &["high"] + } else if normalized.contains("gpt-5.4") + || normalized.contains("gpt-5-4") + || normalized.contains("gpt-5.5") + || normalized.contains("gpt-5-5") + { + &["low", "medium", "high", "xhigh"] + } else { + &["low", "medium", "high"] + } + } else { + &["low", "medium", "high"] + } +} + pub fn sanitize_function_name(name: &str) -> String { static RE: OnceLock = OnceLock::new(); let re = RE.get_or_init(|| Regex::new(r"[^a-zA-Z0-9_-]").unwrap()); diff --git a/documentation/docs/guides/cli-providers.md b/documentation/docs/guides/cli-providers.md index 366d582e84..f4eacd45bf 100644 --- a/documentation/docs/guides/cli-providers.md +++ b/documentation/docs/guides/cli-providers.md @@ -330,7 +330,7 @@ GOOSE_PROVIDER=claude-code GOOSE_MODE=approve goose session | `GOOSE_PROVIDER` | Set to `codex` to use this provider | None | | `GOOSE_MODEL` | Model to use (only known models are passed to CLI) | `gpt-5.2-codex` | | `CODEX_COMMAND` | Path to the Codex CLI command | `codex` | -| `GOOSE_THINKING_EFFORT` | Unified thinking effort (`off`, `low`, `medium`, `high`, `max`). Mapped to Codex CLI effort levels (`none/low/medium/high/xhigh`). | `high` | +| `CODEX_REASONING_EFFORT` | Reasoning effort level: `low`, `medium`, `high`, or `xhigh` (`none` is only supported on non-codex models like `gpt-5.2`) | `high` | | `CODEX_ENABLE_SKILLS` | Enable Codex skills: `true` or `false` | `true` | | `CODEX_SKIP_GIT_CHECK` | Skip git repository requirement: `true` or `false` | `false` | diff --git a/ui/desktop/openapi.json b/ui/desktop/openapi.json index 460ea4649f..98be0e380a 100644 --- a/ui/desktop/openapi.json +++ b/ui/desktop/openapi.json @@ -1369,6 +1369,56 @@ } } }, + "/config/providers/{name}/model-info": { + "post": { + "tags": [ + "super::routes::config_management" + ], + "operationId": "get_provider_model_info", + "parameters": [ + { + "name": "name", + "in": "path", + "description": "Provider name (e.g., openai)", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProviderModelInfoQuery" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Model metadata fetched successfully", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ModelInfo" + } + } + } + }, + "400": { + "description": "Unknown provider, provider not configured, or authentication error" + }, + "429": { + "description": "Rate limit exceeded" + }, + "500": { + "description": "Internal server error" + } + } + } + }, "/config/providers/{name}/models": { "get": { "tags": [ @@ -1394,7 +1444,7 @@ "schema": { "type": "array", "items": { - "type": "string" + "$ref": "#/components/schemas/ModelInfo" } } } @@ -6533,6 +6583,10 @@ "description": "Cost per token for output in USD (optional)", "nullable": true }, + "reasoning": { + "type": "boolean", + "description": "Whether this model supports reasoning/thinking controls" + }, "supports_cache_control": { "type": "boolean", "description": "Whether this model supports cache control", @@ -6546,6 +6600,7 @@ "provider", "model", "context_limit", + "reasoning", "currency" ], "properties": { @@ -6586,6 +6641,9 @@ }, "provider": { "type": "string" + }, + "reasoning": { + "type": "boolean" } } }, @@ -6973,6 +7031,17 @@ } } }, + "ProviderModelInfoQuery": { + "type": "object", + "required": [ + "model" + ], + "properties": { + "model": { + "type": "string" + } + } + }, "ProviderTemplate": { "type": "object", "required": [ @@ -8568,6 +8637,16 @@ } } }, + "ThinkingEffort": { + "type": "string", + "enum": [ + "off", + "low", + "medium", + "high", + "max" + ] + }, "TokenState": { "type": "object", "required": [ diff --git a/ui/desktop/src/api/index.ts b/ui/desktop/src/api/index.ts index fd1811a2c9..d8f51b7cc8 100644 --- a/ui/desktop/src/api/index.ts +++ b/ui/desktop/src/api/index.ts @@ -1,4 +1,4 @@ // This file is auto-generated by @hey-api/openapi-ts -export { addExtension, agentAddExtension, agentRemoveExtension, callTool, cancelDownload, cancelLocalModelDownload, checkProvider, cleanupProviderCache, configureProviderOauth, confirmToolAction, createCustomProvider, createRecipe, createSchedule, decodeRecipe, deleteLocalModel, deleteModel, deleteRecipe, deleteSchedule, deleteSession, diagnostics, downloadHfModel, downloadModel, encodeRecipe, exportApp, exportSession, forkSession, getCanonicalModelInfo, getCustomProvider, getDictationConfig, getDownloadProgress, getExtensions, getFeatures, getLocalModelDownloadProgress, getModelSettings, getPrompt, getPrompts, getProviderCatalog, getProviderCatalogTemplate, getProviderModels, getRepoFiles, getSession, getSessionExtensions, getSessionInsights, getSlashCommands, getTools, getTunnelStatus, importApp, importSession, importSessionNostr, inspectRunningJob, killRunningJob, listApps, listLocalModels, listModels, listRecipes, listSchedules, listSessions, mcpUiProxy, type Options, parseRecipe, pauseSchedule, providers, readAllConfig, readConfig, readResource, recipeToYaml, removeConfig, removeCustomProvider, removeExtension, reply, resetPrompt, restartAgent, resumeAgent, runNowHandler, savePrompt, saveRecipe, scanRecipe, scheduleRecipe, searchHfModels, searchSessions, sendTelemetryEvent, sessionCancel, sessionEvents, sessionReply, sessionsHandler, setConfigProvider, setRecipeSlashCommand, shareSessionNostr, startAgent, startNanogptSetup, startOpenrouterSetup, startTetrateSetup, startTunnel, status, stopAgent, stopTunnel, syncFeaturedModels, systemInfo, transcribeDictation, unpauseSchedule, updateAgentProvider, updateCustomProvider, updateFromSession, updateModelSettings, updateSchedule, updateSession, updateSessionName, updateSessionUserRecipeValues, updateWorkingDir, upsertConfig, upsertPermissions, validateConfig } from './sdk.gen'; -export type { ActionRequired, ActionRequiredData, AddExtensionData, AddExtensionErrors, AddExtensionRequest, AddExtensionResponse, AddExtensionResponses, AgentAddExtensionData, AgentAddExtensionErrors, AgentAddExtensionResponse, AgentAddExtensionResponses, AgentRemoveExtensionData, AgentRemoveExtensionErrors, AgentRemoveExtensionResponse, AgentRemoveExtensionResponses, Annotations, Author, AuthorRequest, CallToolData, CallToolError, CallToolErrors, CallToolRequest, CallToolResponse, CallToolResponse2, CallToolResponses, CancelDownloadData, CancelDownloadErrors, CancelDownloadResponses, CancelLocalModelDownloadData, CancelLocalModelDownloadErrors, CancelLocalModelDownloadResponses, CancelRequest, ChatRequest, CheckProviderData, CheckProviderRequest, CleanupProviderCacheData, CleanupProviderCacheErrors, CleanupProviderCacheResponse, CleanupProviderCacheResponses, ClientOptions, CommandType, ConfigKey, ConfigKeyQuery, ConfigResponse, ConfigureProviderOauthData, ConfigureProviderOauthErrors, ConfigureProviderOauthResponses, ConfirmToolActionData, ConfirmToolActionErrors, ConfirmToolActionRequest, ConfirmToolActionResponses, Content, ContentBlock, Conversation, CreateCustomProviderData, CreateCustomProviderErrors, CreateCustomProviderResponse, CreateCustomProviderResponse2, CreateCustomProviderResponses, CreateRecipeData, CreateRecipeErrors, CreateRecipeRequest, CreateRecipeResponse, CreateRecipeResponse2, CreateRecipeResponses, CreateScheduleData, CreateScheduleErrors, CreateScheduleRequest, CreateScheduleResponse, CreateScheduleResponses, CspMetadata, DeclarativeProviderConfig, DecodeRecipeData, DecodeRecipeErrors, DecodeRecipeRequest, DecodeRecipeResponse, DecodeRecipeResponse2, DecodeRecipeResponses, DeleteLocalModelData, DeleteLocalModelErrors, DeleteLocalModelResponses, DeleteModelData, DeleteModelErrors, DeleteModelResponses, DeleteRecipeData, DeleteRecipeErrors, DeleteRecipeRequest, DeleteRecipeResponse, DeleteRecipeResponses, DeleteScheduleData, DeleteScheduleErrors, DeleteScheduleResponse, DeleteScheduleResponses, DeleteSessionData, DeleteSessionErrors, DeleteSessionResponses, DiagnosticsData, DiagnosticsErrors, DiagnosticsResponse, DiagnosticsResponses, DictationProvider, DictationProviderStatus, DownloadHfModelData, DownloadHfModelErrors, DownloadHfModelResponse, DownloadHfModelResponses, DownloadModelData, DownloadModelErrors, DownloadModelRequest, DownloadModelResponses, DownloadProgress, DownloadStatus, EmbeddedResource, EncodeRecipeData, EncodeRecipeErrors, EncodeRecipeRequest, EncodeRecipeResponse, EncodeRecipeResponse2, EncodeRecipeResponses, Envs, EnvVarConfig, ErrorResponse, ExportAppData, ExportAppError, ExportAppErrors, ExportAppResponse, ExportAppResponses, ExportSessionData, ExportSessionErrors, ExportSessionResponse, ExportSessionResponses, ExtensionConfig, ExtensionData, ExtensionEntry, ExtensionLoadResult, ExtensionQuery, ExtensionResponse, FeaturesResponse, ForkRequest, ForkResponse, ForkSessionData, ForkSessionErrors, ForkSessionResponse, ForkSessionResponses, FrontendToolRequest, GetCanonicalModelInfoData, GetCanonicalModelInfoResponse, GetCanonicalModelInfoResponses, GetCustomProviderData, GetCustomProviderErrors, GetCustomProviderResponse, GetCustomProviderResponses, GetDictationConfigData, GetDictationConfigResponse, GetDictationConfigResponses, GetDownloadProgressData, GetDownloadProgressErrors, GetDownloadProgressResponse, GetDownloadProgressResponses, GetExtensionsData, GetExtensionsErrors, GetExtensionsResponse, GetExtensionsResponses, GetFeaturesData, GetFeaturesResponse, GetFeaturesResponses, GetLocalModelDownloadProgressData, GetLocalModelDownloadProgressErrors, GetLocalModelDownloadProgressResponse, GetLocalModelDownloadProgressResponses, GetModelSettingsData, GetModelSettingsErrors, GetModelSettingsResponse, GetModelSettingsResponses, GetPromptData, GetPromptErrors, GetPromptResponse, GetPromptResponses, GetPromptsData, GetPromptsResponse, GetPromptsResponses, GetProviderCatalogData, GetProviderCatalogErrors, GetProviderCatalogResponse, GetProviderCatalogResponses, GetProviderCatalogTemplateData, GetProviderCatalogTemplateErrors, GetProviderCatalogTemplateResponse, GetProviderCatalogTemplateResponses, GetProviderModelsData, GetProviderModelsErrors, GetProviderModelsResponse, GetProviderModelsResponses, GetRepoFilesData, GetRepoFilesResponse, GetRepoFilesResponses, GetSessionData, GetSessionErrors, GetSessionExtensionsData, GetSessionExtensionsErrors, GetSessionExtensionsResponse, GetSessionExtensionsResponses, GetSessionInsightsData, GetSessionInsightsErrors, GetSessionInsightsResponse, GetSessionInsightsResponses, GetSessionResponse, GetSessionResponses, GetSlashCommandsData, GetSlashCommandsResponse, GetSlashCommandsResponses, GetToolsData, GetToolsErrors, GetToolsQuery, GetToolsResponse, GetToolsResponses, GetTunnelStatusData, GetTunnelStatusResponse, GetTunnelStatusResponses, GooseApp, GooseMode, HfGgufFile, HfModelInfo, HfQuantVariant, Icon, IconTheme, ImageContent, ImportAppData, ImportAppError, ImportAppErrors, ImportAppRequest, ImportAppResponse, ImportAppResponse2, ImportAppResponses, ImportSessionData, ImportSessionErrors, ImportSessionNostrData, ImportSessionNostrErrors, ImportSessionNostrRequest, ImportSessionNostrResponse, ImportSessionNostrResponses, ImportSessionRequest, ImportSessionResponse, ImportSessionResponses, InspectJobResponse, InspectRunningJobData, InspectRunningJobErrors, InspectRunningJobResponse, InspectRunningJobResponses, JsonObject, KillJobResponse, KillRunningJobData, KillRunningJobResponses, ListAppsData, ListAppsError, ListAppsErrors, ListAppsRequest, ListAppsResponse, ListAppsResponse2, ListAppsResponses, ListLocalModelsData, ListLocalModelsResponse, ListLocalModelsResponses, ListModelsData, ListModelsResponse, ListModelsResponses, ListRecipeResponse, ListRecipesData, ListRecipesErrors, ListRecipesResponse, ListRecipesResponses, ListSchedulesData, ListSchedulesErrors, ListSchedulesResponse, ListSchedulesResponse2, ListSchedulesResponses, ListSessionsData, ListSessionsErrors, ListSessionsResponse, ListSessionsResponses, LoadedProvider, LocalModelResponse, McpAppResource, McpUiProxyData, McpUiProxyErrors, McpUiProxyResponses, Message, MessageContent, MessageEvent, MessageMetadata, ModelCapabilities, ModelConfig, ModelDownloadStatus, ModelInfo, ModelInfoData, ModelInfoQuery, ModelInfoResponse, ModelSettings, ModelTemplate, ParseRecipeData, ParseRecipeError, ParseRecipeErrors, ParseRecipeRequest, ParseRecipeResponse, ParseRecipeResponse2, ParseRecipeResponses, PauseScheduleData, PauseScheduleErrors, PauseScheduleResponse, PauseScheduleResponses, Permission, PermissionLevel, PermissionsMetadata, PrincipalType, PromptContentResponse, PromptsListResponse, ProviderCatalogEntry, ProviderDetails, ProviderEngine, ProviderMetadata, ProvidersData, ProvidersResponse, ProvidersResponse2, ProvidersResponses, ProviderTemplate, ProviderType, RawAudioContent, RawEmbeddedResource, RawImageContent, RawResource, RawTextContent, ReadAllConfigData, ReadAllConfigResponse, ReadAllConfigResponses, ReadConfigData, ReadConfigErrors, ReadConfigResponses, ReadResourceData, ReadResourceErrors, ReadResourceRequest, ReadResourceResponse, ReadResourceResponse2, ReadResourceResponses, Recipe, RecipeManifest, RecipeParameter, RecipeParameterInputType, RecipeParameterRequirement, RecipeToYamlData, RecipeToYamlError, RecipeToYamlErrors, RecipeToYamlRequest, RecipeToYamlResponse, RecipeToYamlResponse2, RecipeToYamlResponses, RedactedThinkingContent, RemoveConfigData, RemoveConfigErrors, RemoveConfigResponse, RemoveConfigResponses, RemoveCustomProviderData, RemoveCustomProviderErrors, RemoveCustomProviderResponse, RemoveCustomProviderResponses, RemoveExtensionData, RemoveExtensionErrors, RemoveExtensionRequest, RemoveExtensionResponse, RemoveExtensionResponses, ReplyData, ReplyErrors, ReplyResponse, ReplyResponses, RepoVariantsResponse, ResetPromptData, ResetPromptErrors, ResetPromptResponse, ResetPromptResponses, ResourceContents, ResourceMetadata, Response, RestartAgentData, RestartAgentErrors, RestartAgentRequest, RestartAgentResponse, RestartAgentResponse2, RestartAgentResponses, ResumeAgentData, ResumeAgentErrors, ResumeAgentRequest, ResumeAgentResponse, ResumeAgentResponse2, ResumeAgentResponses, RetryConfig, Role, RunNowHandlerData, RunNowHandlerErrors, RunNowHandlerResponse, RunNowHandlerResponses, RunNowResponse, SamplingConfig, SavePromptData, SavePromptErrors, SavePromptRequest, SavePromptResponse, SavePromptResponses, SaveRecipeData, SaveRecipeError, SaveRecipeErrors, SaveRecipeRequest, SaveRecipeResponse, SaveRecipeResponse2, SaveRecipeResponses, ScanRecipeData, ScanRecipeRequest, ScanRecipeResponse, ScanRecipeResponse2, ScanRecipeResponses, ScheduledJob, ScheduleRecipeData, ScheduleRecipeErrors, ScheduleRecipeRequest, ScheduleRecipeResponses, SearchHfModelsData, SearchHfModelsErrors, SearchHfModelsResponse, SearchHfModelsResponses, SearchSessionsData, SearchSessionsErrors, SearchSessionsResponse, SearchSessionsResponses, SendTelemetryEventData, SendTelemetryEventResponses, Session, SessionCancelData, SessionCancelResponses, SessionDisplayInfo, SessionEventsData, SessionEventsErrors, SessionEventsResponse, SessionEventsResponses, SessionExtensionsResponse, SessionInsights, SessionListResponse, SessionReplyData, SessionReplyErrors, SessionReplyRequest, SessionReplyResponse, SessionReplyResponse2, SessionReplyResponses, SessionsHandlerData, SessionsHandlerErrors, SessionsHandlerResponse, SessionsHandlerResponses, SessionsQuery, SessionType, SetConfigProviderData, SetProviderRequest, SetRecipeSlashCommandData, SetRecipeSlashCommandErrors, SetRecipeSlashCommandResponses, SetSlashCommandRequest, Settings, SetupResponse, ShareSessionNostrData, ShareSessionNostrErrors, ShareSessionNostrRequest, ShareSessionNostrResponse, ShareSessionNostrResponse2, ShareSessionNostrResponses, SlashCommand, SlashCommandsResponse, StartAgentData, StartAgentError, StartAgentErrors, StartAgentRequest, StartAgentResponse, StartAgentResponses, StartNanogptSetupData, StartNanogptSetupResponse, StartNanogptSetupResponses, StartOpenrouterSetupData, StartOpenrouterSetupResponse, StartOpenrouterSetupResponses, StartTetrateSetupData, StartTetrateSetupResponse, StartTetrateSetupResponses, StartTunnelData, StartTunnelError, StartTunnelErrors, StartTunnelResponse, StartTunnelResponses, StatusData, StatusResponse, StatusResponses, StopAgentData, StopAgentErrors, StopAgentRequest, StopAgentResponse, StopAgentResponses, StopTunnelData, StopTunnelError, StopTunnelErrors, StopTunnelResponses, SubRecipe, SuccessCheck, SyncFeaturedModelsData, SyncFeaturedModelsResponses, SystemInfo, SystemInfoData, SystemInfoResponse, SystemInfoResponses, SystemNotificationContent, SystemNotificationType, TaskSupport, TelemetryEventRequest, Template, TextContent, ThinkingContent, TokenState, Tool, ToolAnnotations, ToolConfirmationRequest, ToolExecution, ToolInfo, ToolPermission, ToolRequest, ToolResponse, TranscribeDictationData, TranscribeDictationErrors, TranscribeDictationResponse, TranscribeDictationResponses, TranscribeRequest, TranscribeResponse, TunnelInfo, TunnelState, UiMetadata, UnpauseScheduleData, UnpauseScheduleErrors, UnpauseScheduleResponse, UnpauseScheduleResponses, UpdateAgentProviderData, UpdateAgentProviderErrors, UpdateAgentProviderResponses, UpdateCustomProviderData, UpdateCustomProviderErrors, UpdateCustomProviderRequest, UpdateCustomProviderResponse, UpdateCustomProviderResponses, UpdateFromSessionData, UpdateFromSessionErrors, UpdateFromSessionRequest, UpdateFromSessionResponses, UpdateModelSettingsData, UpdateModelSettingsErrors, UpdateModelSettingsResponse, UpdateModelSettingsResponses, UpdateProviderRequest, UpdateScheduleData, UpdateScheduleErrors, UpdateScheduleRequest, UpdateScheduleResponse, UpdateScheduleResponses, UpdateSessionData, UpdateSessionErrors, UpdateSessionNameData, UpdateSessionNameErrors, UpdateSessionNameRequest, UpdateSessionNameResponses, UpdateSessionRequest, UpdateSessionResponses, UpdateSessionUserRecipeValuesData, UpdateSessionUserRecipeValuesError, UpdateSessionUserRecipeValuesErrors, UpdateSessionUserRecipeValuesRequest, UpdateSessionUserRecipeValuesResponse, UpdateSessionUserRecipeValuesResponse2, UpdateSessionUserRecipeValuesResponses, UpdateWorkingDirData, UpdateWorkingDirErrors, UpdateWorkingDirRequest, UpdateWorkingDirResponses, UpsertConfigData, UpsertConfigErrors, UpsertConfigQuery, UpsertConfigResponse, UpsertConfigResponses, UpsertPermissionsData, UpsertPermissionsErrors, UpsertPermissionsQuery, UpsertPermissionsResponse, UpsertPermissionsResponses, ValidateConfigData, ValidateConfigErrors, ValidateConfigResponse, ValidateConfigResponses, WhisperModelResponse, WindowProps } from './types.gen'; +export { addExtension, agentAddExtension, agentRemoveExtension, callTool, cancelDownload, cancelLocalModelDownload, checkProvider, cleanupProviderCache, configureProviderOauth, confirmToolAction, createCustomProvider, createRecipe, createSchedule, decodeRecipe, deleteLocalModel, deleteModel, deleteRecipe, deleteSchedule, deleteSession, diagnostics, downloadHfModel, downloadModel, encodeRecipe, exportApp, exportSession, forkSession, getCanonicalModelInfo, getCustomProvider, getDictationConfig, getDownloadProgress, getExtensions, getFeatures, getLocalModelDownloadProgress, getModelSettings, getPrompt, getPrompts, getProviderCatalog, getProviderCatalogTemplate, getProviderModelInfo, getProviderModels, getRepoFiles, getSession, getSessionExtensions, getSessionInsights, getSlashCommands, getTools, getTunnelStatus, importApp, importSession, importSessionNostr, inspectRunningJob, killRunningJob, listApps, listLocalModels, listModels, listRecipes, listSchedules, listSessions, mcpUiProxy, type Options, parseRecipe, pauseSchedule, providers, readAllConfig, readConfig, readResource, recipeToYaml, removeConfig, removeCustomProvider, removeExtension, reply, resetPrompt, restartAgent, resumeAgent, runNowHandler, savePrompt, saveRecipe, scanRecipe, scheduleRecipe, searchHfModels, searchSessions, sendTelemetryEvent, sessionCancel, sessionEvents, sessionReply, sessionsHandler, setConfigProvider, setRecipeSlashCommand, shareSessionNostr, startAgent, startNanogptSetup, startOpenrouterSetup, startTetrateSetup, startTunnel, status, stopAgent, stopTunnel, syncFeaturedModels, systemInfo, transcribeDictation, unpauseSchedule, updateAgentProvider, updateCustomProvider, updateFromSession, updateModelSettings, updateSchedule, updateSession, updateSessionName, updateSessionUserRecipeValues, updateWorkingDir, upsertConfig, upsertPermissions, validateConfig } from './sdk.gen'; +export type { ActionRequired, ActionRequiredData, AddExtensionData, AddExtensionErrors, AddExtensionRequest, AddExtensionResponse, AddExtensionResponses, AgentAddExtensionData, AgentAddExtensionErrors, AgentAddExtensionResponse, AgentAddExtensionResponses, AgentRemoveExtensionData, AgentRemoveExtensionErrors, AgentRemoveExtensionResponse, AgentRemoveExtensionResponses, Annotations, Author, AuthorRequest, CallToolData, CallToolError, CallToolErrors, CallToolRequest, CallToolResponse, CallToolResponse2, CallToolResponses, CancelDownloadData, CancelDownloadErrors, CancelDownloadResponses, CancelLocalModelDownloadData, CancelLocalModelDownloadErrors, CancelLocalModelDownloadResponses, CancelRequest, ChatRequest, CheckProviderData, CheckProviderRequest, CleanupProviderCacheData, CleanupProviderCacheErrors, CleanupProviderCacheResponse, CleanupProviderCacheResponses, ClientOptions, CommandType, ConfigKey, ConfigKeyQuery, ConfigResponse, ConfigureProviderOauthData, ConfigureProviderOauthErrors, ConfigureProviderOauthResponses, ConfirmToolActionData, ConfirmToolActionErrors, ConfirmToolActionRequest, ConfirmToolActionResponses, Content, ContentBlock, Conversation, CreateCustomProviderData, CreateCustomProviderErrors, CreateCustomProviderResponse, CreateCustomProviderResponse2, CreateCustomProviderResponses, CreateRecipeData, CreateRecipeErrors, CreateRecipeRequest, CreateRecipeResponse, CreateRecipeResponse2, CreateRecipeResponses, CreateScheduleData, CreateScheduleErrors, CreateScheduleRequest, CreateScheduleResponse, CreateScheduleResponses, CspMetadata, DeclarativeProviderConfig, DecodeRecipeData, DecodeRecipeErrors, DecodeRecipeRequest, DecodeRecipeResponse, DecodeRecipeResponse2, DecodeRecipeResponses, DeleteLocalModelData, DeleteLocalModelErrors, DeleteLocalModelResponses, DeleteModelData, DeleteModelErrors, DeleteModelResponses, DeleteRecipeData, DeleteRecipeErrors, DeleteRecipeRequest, DeleteRecipeResponse, DeleteRecipeResponses, DeleteScheduleData, DeleteScheduleErrors, DeleteScheduleResponse, DeleteScheduleResponses, DeleteSessionData, DeleteSessionErrors, DeleteSessionResponses, DiagnosticsData, DiagnosticsErrors, DiagnosticsResponse, DiagnosticsResponses, DictationProvider, DictationProviderStatus, DownloadHfModelData, DownloadHfModelErrors, DownloadHfModelResponse, DownloadHfModelResponses, DownloadModelData, DownloadModelErrors, DownloadModelRequest, DownloadModelResponses, DownloadProgress, DownloadStatus, EmbeddedResource, EncodeRecipeData, EncodeRecipeErrors, EncodeRecipeRequest, EncodeRecipeResponse, EncodeRecipeResponse2, EncodeRecipeResponses, Envs, EnvVarConfig, ErrorResponse, ExportAppData, ExportAppError, ExportAppErrors, ExportAppResponse, ExportAppResponses, ExportSessionData, ExportSessionErrors, ExportSessionResponse, ExportSessionResponses, ExtensionConfig, ExtensionData, ExtensionEntry, ExtensionLoadResult, ExtensionQuery, ExtensionResponse, FeaturesResponse, ForkRequest, ForkResponse, ForkSessionData, ForkSessionErrors, ForkSessionResponse, ForkSessionResponses, FrontendToolRequest, GetCanonicalModelInfoData, GetCanonicalModelInfoResponse, GetCanonicalModelInfoResponses, GetCustomProviderData, GetCustomProviderErrors, GetCustomProviderResponse, GetCustomProviderResponses, GetDictationConfigData, GetDictationConfigResponse, GetDictationConfigResponses, GetDownloadProgressData, GetDownloadProgressErrors, GetDownloadProgressResponse, GetDownloadProgressResponses, GetExtensionsData, GetExtensionsErrors, GetExtensionsResponse, GetExtensionsResponses, GetFeaturesData, GetFeaturesResponse, GetFeaturesResponses, GetLocalModelDownloadProgressData, GetLocalModelDownloadProgressErrors, GetLocalModelDownloadProgressResponse, GetLocalModelDownloadProgressResponses, GetModelSettingsData, GetModelSettingsErrors, GetModelSettingsResponse, GetModelSettingsResponses, GetPromptData, GetPromptErrors, GetPromptResponse, GetPromptResponses, GetPromptsData, GetPromptsResponse, GetPromptsResponses, GetProviderCatalogData, GetProviderCatalogErrors, GetProviderCatalogResponse, GetProviderCatalogResponses, GetProviderCatalogTemplateData, GetProviderCatalogTemplateErrors, GetProviderCatalogTemplateResponse, GetProviderCatalogTemplateResponses, GetProviderModelInfoData, GetProviderModelInfoErrors, GetProviderModelInfoResponse, GetProviderModelInfoResponses, GetProviderModelsData, GetProviderModelsErrors, GetProviderModelsResponse, GetProviderModelsResponses, GetRepoFilesData, GetRepoFilesResponse, GetRepoFilesResponses, GetSessionData, GetSessionErrors, GetSessionExtensionsData, GetSessionExtensionsErrors, GetSessionExtensionsResponse, GetSessionExtensionsResponses, GetSessionInsightsData, GetSessionInsightsErrors, GetSessionInsightsResponse, GetSessionInsightsResponses, GetSessionResponse, GetSessionResponses, GetSlashCommandsData, GetSlashCommandsResponse, GetSlashCommandsResponses, GetToolsData, GetToolsErrors, GetToolsQuery, GetToolsResponse, GetToolsResponses, GetTunnelStatusData, GetTunnelStatusResponse, GetTunnelStatusResponses, GooseApp, GooseMode, HfGgufFile, HfModelInfo, HfQuantVariant, Icon, IconTheme, ImageContent, ImportAppData, ImportAppError, ImportAppErrors, ImportAppRequest, ImportAppResponse, ImportAppResponse2, ImportAppResponses, ImportSessionData, ImportSessionErrors, ImportSessionNostrData, ImportSessionNostrErrors, ImportSessionNostrRequest, ImportSessionNostrResponse, ImportSessionNostrResponses, ImportSessionRequest, ImportSessionResponse, ImportSessionResponses, InspectJobResponse, InspectRunningJobData, InspectRunningJobErrors, InspectRunningJobResponse, InspectRunningJobResponses, JsonObject, KillJobResponse, KillRunningJobData, KillRunningJobResponses, ListAppsData, ListAppsError, ListAppsErrors, ListAppsRequest, ListAppsResponse, ListAppsResponse2, ListAppsResponses, ListLocalModelsData, ListLocalModelsResponse, ListLocalModelsResponses, ListModelsData, ListModelsResponse, ListModelsResponses, ListRecipeResponse, ListRecipesData, ListRecipesErrors, ListRecipesResponse, ListRecipesResponses, ListSchedulesData, ListSchedulesErrors, ListSchedulesResponse, ListSchedulesResponse2, ListSchedulesResponses, ListSessionsData, ListSessionsErrors, ListSessionsResponse, ListSessionsResponses, LoadedProvider, LocalModelResponse, McpAppResource, McpUiProxyData, McpUiProxyErrors, McpUiProxyResponses, Message, MessageContent, MessageEvent, MessageMetadata, ModelCapabilities, ModelConfig, ModelDownloadStatus, ModelInfo, ModelInfoData, ModelInfoQuery, ModelInfoResponse, ModelSettings, ModelTemplate, ParseRecipeData, ParseRecipeError, ParseRecipeErrors, ParseRecipeRequest, ParseRecipeResponse, ParseRecipeResponse2, ParseRecipeResponses, PauseScheduleData, PauseScheduleErrors, PauseScheduleResponse, PauseScheduleResponses, Permission, PermissionLevel, PermissionsMetadata, PrincipalType, PromptContentResponse, PromptsListResponse, ProviderCatalogEntry, ProviderDetails, ProviderEngine, ProviderMetadata, ProviderModelInfoQuery, ProvidersData, ProvidersResponse, ProvidersResponse2, ProvidersResponses, ProviderTemplate, ProviderType, RawAudioContent, RawEmbeddedResource, RawImageContent, RawResource, RawTextContent, ReadAllConfigData, ReadAllConfigResponse, ReadAllConfigResponses, ReadConfigData, ReadConfigErrors, ReadConfigResponses, ReadResourceData, ReadResourceErrors, ReadResourceRequest, ReadResourceResponse, ReadResourceResponse2, ReadResourceResponses, Recipe, RecipeManifest, RecipeParameter, RecipeParameterInputType, RecipeParameterRequirement, RecipeToYamlData, RecipeToYamlError, RecipeToYamlErrors, RecipeToYamlRequest, RecipeToYamlResponse, RecipeToYamlResponse2, RecipeToYamlResponses, RedactedThinkingContent, RemoveConfigData, RemoveConfigErrors, RemoveConfigResponse, RemoveConfigResponses, RemoveCustomProviderData, RemoveCustomProviderErrors, RemoveCustomProviderResponse, RemoveCustomProviderResponses, RemoveExtensionData, RemoveExtensionErrors, RemoveExtensionRequest, RemoveExtensionResponse, RemoveExtensionResponses, ReplyData, ReplyErrors, ReplyResponse, ReplyResponses, RepoVariantsResponse, ResetPromptData, ResetPromptErrors, ResetPromptResponse, ResetPromptResponses, ResourceContents, ResourceMetadata, Response, RestartAgentData, RestartAgentErrors, RestartAgentRequest, RestartAgentResponse, RestartAgentResponse2, RestartAgentResponses, ResumeAgentData, ResumeAgentErrors, ResumeAgentRequest, ResumeAgentResponse, ResumeAgentResponse2, ResumeAgentResponses, RetryConfig, Role, RunNowHandlerData, RunNowHandlerErrors, RunNowHandlerResponse, RunNowHandlerResponses, RunNowResponse, SamplingConfig, SavePromptData, SavePromptErrors, SavePromptRequest, SavePromptResponse, SavePromptResponses, SaveRecipeData, SaveRecipeError, SaveRecipeErrors, SaveRecipeRequest, SaveRecipeResponse, SaveRecipeResponse2, SaveRecipeResponses, ScanRecipeData, ScanRecipeRequest, ScanRecipeResponse, ScanRecipeResponse2, ScanRecipeResponses, ScheduledJob, ScheduleRecipeData, ScheduleRecipeErrors, ScheduleRecipeRequest, ScheduleRecipeResponses, SearchHfModelsData, SearchHfModelsErrors, SearchHfModelsResponse, SearchHfModelsResponses, SearchSessionsData, SearchSessionsErrors, SearchSessionsResponse, SearchSessionsResponses, SendTelemetryEventData, SendTelemetryEventResponses, Session, SessionCancelData, SessionCancelResponses, SessionDisplayInfo, SessionEventsData, SessionEventsErrors, SessionEventsResponse, SessionEventsResponses, SessionExtensionsResponse, SessionInsights, SessionListResponse, SessionReplyData, SessionReplyErrors, SessionReplyRequest, SessionReplyResponse, SessionReplyResponse2, SessionReplyResponses, SessionsHandlerData, SessionsHandlerErrors, SessionsHandlerResponse, SessionsHandlerResponses, SessionsQuery, SessionType, SetConfigProviderData, SetProviderRequest, SetRecipeSlashCommandData, SetRecipeSlashCommandErrors, SetRecipeSlashCommandResponses, SetSlashCommandRequest, Settings, SetupResponse, ShareSessionNostrData, ShareSessionNostrErrors, ShareSessionNostrRequest, ShareSessionNostrResponse, ShareSessionNostrResponse2, ShareSessionNostrResponses, SlashCommand, SlashCommandsResponse, StartAgentData, StartAgentError, StartAgentErrors, StartAgentRequest, StartAgentResponse, StartAgentResponses, StartNanogptSetupData, StartNanogptSetupResponse, StartNanogptSetupResponses, StartOpenrouterSetupData, StartOpenrouterSetupResponse, StartOpenrouterSetupResponses, StartTetrateSetupData, StartTetrateSetupResponse, StartTetrateSetupResponses, StartTunnelData, StartTunnelError, StartTunnelErrors, StartTunnelResponse, StartTunnelResponses, StatusData, StatusResponse, StatusResponses, StopAgentData, StopAgentErrors, StopAgentRequest, StopAgentResponse, StopAgentResponses, StopTunnelData, StopTunnelError, StopTunnelErrors, StopTunnelResponses, SubRecipe, SuccessCheck, SyncFeaturedModelsData, SyncFeaturedModelsResponses, SystemInfo, SystemInfoData, SystemInfoResponse, SystemInfoResponses, SystemNotificationContent, SystemNotificationType, TaskSupport, TelemetryEventRequest, Template, TextContent, ThinkingContent, ThinkingEffort, TokenState, Tool, ToolAnnotations, ToolConfirmationRequest, ToolExecution, ToolInfo, ToolPermission, ToolRequest, ToolResponse, TranscribeDictationData, TranscribeDictationErrors, TranscribeDictationResponse, TranscribeDictationResponses, TranscribeRequest, TranscribeResponse, TunnelInfo, TunnelState, UiMetadata, UnpauseScheduleData, UnpauseScheduleErrors, UnpauseScheduleResponse, UnpauseScheduleResponses, UpdateAgentProviderData, UpdateAgentProviderErrors, UpdateAgentProviderResponses, UpdateCustomProviderData, UpdateCustomProviderErrors, UpdateCustomProviderRequest, UpdateCustomProviderResponse, UpdateCustomProviderResponses, UpdateFromSessionData, UpdateFromSessionErrors, UpdateFromSessionRequest, UpdateFromSessionResponses, UpdateModelSettingsData, UpdateModelSettingsErrors, UpdateModelSettingsResponse, UpdateModelSettingsResponses, UpdateProviderRequest, UpdateScheduleData, UpdateScheduleErrors, UpdateScheduleRequest, UpdateScheduleResponse, UpdateScheduleResponses, UpdateSessionData, UpdateSessionErrors, UpdateSessionNameData, UpdateSessionNameErrors, UpdateSessionNameRequest, UpdateSessionNameResponses, UpdateSessionRequest, UpdateSessionResponses, UpdateSessionUserRecipeValuesData, UpdateSessionUserRecipeValuesError, UpdateSessionUserRecipeValuesErrors, UpdateSessionUserRecipeValuesRequest, UpdateSessionUserRecipeValuesResponse, UpdateSessionUserRecipeValuesResponse2, UpdateSessionUserRecipeValuesResponses, UpdateWorkingDirData, UpdateWorkingDirErrors, UpdateWorkingDirRequest, UpdateWorkingDirResponses, UpsertConfigData, UpsertConfigErrors, UpsertConfigQuery, UpsertConfigResponse, UpsertConfigResponses, UpsertPermissionsData, UpsertPermissionsErrors, UpsertPermissionsQuery, UpsertPermissionsResponse, UpsertPermissionsResponses, ValidateConfigData, ValidateConfigErrors, ValidateConfigResponse, ValidateConfigResponses, WhisperModelResponse, WindowProps } from './types.gen'; diff --git a/ui/desktop/src/api/sdk.gen.ts b/ui/desktop/src/api/sdk.gen.ts index 2870da539d..c98c916408 100644 --- a/ui/desktop/src/api/sdk.gen.ts +++ b/ui/desktop/src/api/sdk.gen.ts @@ -2,7 +2,7 @@ import type { Client, Options as Options2, TDataShape } from './client'; import { client } from './client.gen'; -import type { AddExtensionData, AddExtensionErrors, AddExtensionResponses, AgentAddExtensionData, AgentAddExtensionErrors, AgentAddExtensionResponses, AgentRemoveExtensionData, AgentRemoveExtensionErrors, AgentRemoveExtensionResponses, CallToolData, CallToolErrors, CallToolResponses, CancelDownloadData, CancelDownloadErrors, CancelDownloadResponses, CancelLocalModelDownloadData, CancelLocalModelDownloadErrors, CancelLocalModelDownloadResponses, CheckProviderData, CleanupProviderCacheData, CleanupProviderCacheErrors, CleanupProviderCacheResponses, ConfigureProviderOauthData, ConfigureProviderOauthErrors, ConfigureProviderOauthResponses, ConfirmToolActionData, ConfirmToolActionErrors, ConfirmToolActionResponses, CreateCustomProviderData, CreateCustomProviderErrors, CreateCustomProviderResponses, CreateRecipeData, CreateRecipeErrors, CreateRecipeResponses, CreateScheduleData, CreateScheduleErrors, CreateScheduleResponses, DecodeRecipeData, DecodeRecipeErrors, DecodeRecipeResponses, DeleteLocalModelData, DeleteLocalModelErrors, DeleteLocalModelResponses, DeleteModelData, DeleteModelErrors, DeleteModelResponses, DeleteRecipeData, DeleteRecipeErrors, DeleteRecipeResponses, DeleteScheduleData, DeleteScheduleErrors, DeleteScheduleResponses, DeleteSessionData, DeleteSessionErrors, DeleteSessionResponses, DiagnosticsData, DiagnosticsErrors, DiagnosticsResponses, DownloadHfModelData, DownloadHfModelErrors, DownloadHfModelResponses, DownloadModelData, DownloadModelErrors, DownloadModelResponses, EncodeRecipeData, EncodeRecipeErrors, EncodeRecipeResponses, ExportAppData, ExportAppErrors, ExportAppResponses, ExportSessionData, ExportSessionErrors, ExportSessionResponses, ForkSessionData, ForkSessionErrors, ForkSessionResponses, GetCanonicalModelInfoData, GetCanonicalModelInfoResponses, GetCustomProviderData, GetCustomProviderErrors, GetCustomProviderResponses, GetDictationConfigData, GetDictationConfigResponses, GetDownloadProgressData, GetDownloadProgressErrors, GetDownloadProgressResponses, GetExtensionsData, GetExtensionsErrors, GetExtensionsResponses, GetFeaturesData, GetFeaturesResponses, GetLocalModelDownloadProgressData, GetLocalModelDownloadProgressErrors, GetLocalModelDownloadProgressResponses, GetModelSettingsData, GetModelSettingsErrors, GetModelSettingsResponses, GetPromptData, GetPromptErrors, GetPromptResponses, GetPromptsData, GetPromptsResponses, GetProviderCatalogData, GetProviderCatalogErrors, GetProviderCatalogResponses, GetProviderCatalogTemplateData, GetProviderCatalogTemplateErrors, GetProviderCatalogTemplateResponses, GetProviderModelsData, GetProviderModelsErrors, GetProviderModelsResponses, GetRepoFilesData, GetRepoFilesResponses, GetSessionData, GetSessionErrors, GetSessionExtensionsData, GetSessionExtensionsErrors, GetSessionExtensionsResponses, GetSessionInsightsData, GetSessionInsightsErrors, GetSessionInsightsResponses, GetSessionResponses, GetSlashCommandsData, GetSlashCommandsResponses, GetToolsData, GetToolsErrors, GetToolsResponses, GetTunnelStatusData, GetTunnelStatusResponses, ImportAppData, ImportAppErrors, ImportAppResponses, ImportSessionData, ImportSessionErrors, ImportSessionNostrData, ImportSessionNostrErrors, ImportSessionNostrResponses, ImportSessionResponses, InspectRunningJobData, InspectRunningJobErrors, InspectRunningJobResponses, KillRunningJobData, KillRunningJobResponses, ListAppsData, ListAppsErrors, ListAppsResponses, ListLocalModelsData, ListLocalModelsResponses, ListModelsData, ListModelsResponses, ListRecipesData, ListRecipesErrors, ListRecipesResponses, ListSchedulesData, ListSchedulesErrors, ListSchedulesResponses, ListSessionsData, ListSessionsErrors, ListSessionsResponses, McpUiProxyData, McpUiProxyErrors, McpUiProxyResponses, ParseRecipeData, ParseRecipeErrors, ParseRecipeResponses, PauseScheduleData, PauseScheduleErrors, PauseScheduleResponses, ProvidersData, ProvidersResponses, ReadAllConfigData, ReadAllConfigResponses, ReadConfigData, ReadConfigErrors, ReadConfigResponses, ReadResourceData, ReadResourceErrors, ReadResourceResponses, RecipeToYamlData, RecipeToYamlErrors, RecipeToYamlResponses, RemoveConfigData, RemoveConfigErrors, RemoveConfigResponses, RemoveCustomProviderData, RemoveCustomProviderErrors, RemoveCustomProviderResponses, RemoveExtensionData, RemoveExtensionErrors, RemoveExtensionResponses, ReplyData, ReplyErrors, ReplyResponses, ResetPromptData, ResetPromptErrors, ResetPromptResponses, RestartAgentData, RestartAgentErrors, RestartAgentResponses, ResumeAgentData, ResumeAgentErrors, ResumeAgentResponses, RunNowHandlerData, RunNowHandlerErrors, RunNowHandlerResponses, SavePromptData, SavePromptErrors, SavePromptResponses, SaveRecipeData, SaveRecipeErrors, SaveRecipeResponses, ScanRecipeData, ScanRecipeResponses, ScheduleRecipeData, ScheduleRecipeErrors, ScheduleRecipeResponses, SearchHfModelsData, SearchHfModelsErrors, SearchHfModelsResponses, SearchSessionsData, SearchSessionsErrors, SearchSessionsResponses, SendTelemetryEventData, SendTelemetryEventResponses, SessionCancelData, SessionCancelResponses, SessionEventsData, SessionEventsErrors, SessionEventsResponses, SessionReplyData, SessionReplyErrors, SessionReplyResponses, SessionsHandlerData, SessionsHandlerErrors, SessionsHandlerResponses, SetConfigProviderData, SetRecipeSlashCommandData, SetRecipeSlashCommandErrors, SetRecipeSlashCommandResponses, ShareSessionNostrData, ShareSessionNostrErrors, ShareSessionNostrResponses, StartAgentData, StartAgentErrors, StartAgentResponses, StartNanogptSetupData, StartNanogptSetupResponses, StartOpenrouterSetupData, StartOpenrouterSetupResponses, StartTetrateSetupData, StartTetrateSetupResponses, StartTunnelData, StartTunnelErrors, StartTunnelResponses, StatusData, StatusResponses, StopAgentData, StopAgentErrors, StopAgentResponses, StopTunnelData, StopTunnelErrors, StopTunnelResponses, SyncFeaturedModelsData, SyncFeaturedModelsResponses, SystemInfoData, SystemInfoResponses, TranscribeDictationData, TranscribeDictationErrors, TranscribeDictationResponses, UnpauseScheduleData, UnpauseScheduleErrors, UnpauseScheduleResponses, UpdateAgentProviderData, UpdateAgentProviderErrors, UpdateAgentProviderResponses, UpdateCustomProviderData, UpdateCustomProviderErrors, UpdateCustomProviderResponses, UpdateFromSessionData, UpdateFromSessionErrors, UpdateFromSessionResponses, UpdateModelSettingsData, UpdateModelSettingsErrors, UpdateModelSettingsResponses, UpdateScheduleData, UpdateScheduleErrors, UpdateScheduleResponses, UpdateSessionData, UpdateSessionErrors, UpdateSessionNameData, UpdateSessionNameErrors, UpdateSessionNameResponses, UpdateSessionResponses, UpdateSessionUserRecipeValuesData, UpdateSessionUserRecipeValuesErrors, UpdateSessionUserRecipeValuesResponses, UpdateWorkingDirData, UpdateWorkingDirErrors, UpdateWorkingDirResponses, UpsertConfigData, UpsertConfigErrors, UpsertConfigResponses, UpsertPermissionsData, UpsertPermissionsErrors, UpsertPermissionsResponses, ValidateConfigData, ValidateConfigErrors, ValidateConfigResponses } from './types.gen'; +import type { AddExtensionData, AddExtensionErrors, AddExtensionResponses, AgentAddExtensionData, AgentAddExtensionErrors, AgentAddExtensionResponses, AgentRemoveExtensionData, AgentRemoveExtensionErrors, AgentRemoveExtensionResponses, CallToolData, CallToolErrors, CallToolResponses, CancelDownloadData, CancelDownloadErrors, CancelDownloadResponses, CancelLocalModelDownloadData, CancelLocalModelDownloadErrors, CancelLocalModelDownloadResponses, CheckProviderData, CleanupProviderCacheData, CleanupProviderCacheErrors, CleanupProviderCacheResponses, ConfigureProviderOauthData, ConfigureProviderOauthErrors, ConfigureProviderOauthResponses, ConfirmToolActionData, ConfirmToolActionErrors, ConfirmToolActionResponses, CreateCustomProviderData, CreateCustomProviderErrors, CreateCustomProviderResponses, CreateRecipeData, CreateRecipeErrors, CreateRecipeResponses, CreateScheduleData, CreateScheduleErrors, CreateScheduleResponses, DecodeRecipeData, DecodeRecipeErrors, DecodeRecipeResponses, DeleteLocalModelData, DeleteLocalModelErrors, DeleteLocalModelResponses, DeleteModelData, DeleteModelErrors, DeleteModelResponses, DeleteRecipeData, DeleteRecipeErrors, DeleteRecipeResponses, DeleteScheduleData, DeleteScheduleErrors, DeleteScheduleResponses, DeleteSessionData, DeleteSessionErrors, DeleteSessionResponses, DiagnosticsData, DiagnosticsErrors, DiagnosticsResponses, DownloadHfModelData, DownloadHfModelErrors, DownloadHfModelResponses, DownloadModelData, DownloadModelErrors, DownloadModelResponses, EncodeRecipeData, EncodeRecipeErrors, EncodeRecipeResponses, ExportAppData, ExportAppErrors, ExportAppResponses, ExportSessionData, ExportSessionErrors, ExportSessionResponses, ForkSessionData, ForkSessionErrors, ForkSessionResponses, GetCanonicalModelInfoData, GetCanonicalModelInfoResponses, GetCustomProviderData, GetCustomProviderErrors, GetCustomProviderResponses, GetDictationConfigData, GetDictationConfigResponses, GetDownloadProgressData, GetDownloadProgressErrors, GetDownloadProgressResponses, GetExtensionsData, GetExtensionsErrors, GetExtensionsResponses, GetFeaturesData, GetFeaturesResponses, GetLocalModelDownloadProgressData, GetLocalModelDownloadProgressErrors, GetLocalModelDownloadProgressResponses, GetModelSettingsData, GetModelSettingsErrors, GetModelSettingsResponses, GetPromptData, GetPromptErrors, GetPromptResponses, GetPromptsData, GetPromptsResponses, GetProviderCatalogData, GetProviderCatalogErrors, GetProviderCatalogResponses, GetProviderCatalogTemplateData, GetProviderCatalogTemplateErrors, GetProviderCatalogTemplateResponses, GetProviderModelInfoData, GetProviderModelInfoErrors, GetProviderModelInfoResponses, GetProviderModelsData, GetProviderModelsErrors, GetProviderModelsResponses, GetRepoFilesData, GetRepoFilesResponses, GetSessionData, GetSessionErrors, GetSessionExtensionsData, GetSessionExtensionsErrors, GetSessionExtensionsResponses, GetSessionInsightsData, GetSessionInsightsErrors, GetSessionInsightsResponses, GetSessionResponses, GetSlashCommandsData, GetSlashCommandsResponses, GetToolsData, GetToolsErrors, GetToolsResponses, GetTunnelStatusData, GetTunnelStatusResponses, ImportAppData, ImportAppErrors, ImportAppResponses, ImportSessionData, ImportSessionErrors, ImportSessionNostrData, ImportSessionNostrErrors, ImportSessionNostrResponses, ImportSessionResponses, InspectRunningJobData, InspectRunningJobErrors, InspectRunningJobResponses, KillRunningJobData, KillRunningJobResponses, ListAppsData, ListAppsErrors, ListAppsResponses, ListLocalModelsData, ListLocalModelsResponses, ListModelsData, ListModelsResponses, ListRecipesData, ListRecipesErrors, ListRecipesResponses, ListSchedulesData, ListSchedulesErrors, ListSchedulesResponses, ListSessionsData, ListSessionsErrors, ListSessionsResponses, McpUiProxyData, McpUiProxyErrors, McpUiProxyResponses, ParseRecipeData, ParseRecipeErrors, ParseRecipeResponses, PauseScheduleData, PauseScheduleErrors, PauseScheduleResponses, ProvidersData, ProvidersResponses, ReadAllConfigData, ReadAllConfigResponses, ReadConfigData, ReadConfigErrors, ReadConfigResponses, ReadResourceData, ReadResourceErrors, ReadResourceResponses, RecipeToYamlData, RecipeToYamlErrors, RecipeToYamlResponses, RemoveConfigData, RemoveConfigErrors, RemoveConfigResponses, RemoveCustomProviderData, RemoveCustomProviderErrors, RemoveCustomProviderResponses, RemoveExtensionData, RemoveExtensionErrors, RemoveExtensionResponses, ReplyData, ReplyErrors, ReplyResponses, ResetPromptData, ResetPromptErrors, ResetPromptResponses, RestartAgentData, RestartAgentErrors, RestartAgentResponses, ResumeAgentData, ResumeAgentErrors, ResumeAgentResponses, RunNowHandlerData, RunNowHandlerErrors, RunNowHandlerResponses, SavePromptData, SavePromptErrors, SavePromptResponses, SaveRecipeData, SaveRecipeErrors, SaveRecipeResponses, ScanRecipeData, ScanRecipeResponses, ScheduleRecipeData, ScheduleRecipeErrors, ScheduleRecipeResponses, SearchHfModelsData, SearchHfModelsErrors, SearchHfModelsResponses, SearchSessionsData, SearchSessionsErrors, SearchSessionsResponses, SendTelemetryEventData, SendTelemetryEventResponses, SessionCancelData, SessionCancelResponses, SessionEventsData, SessionEventsErrors, SessionEventsResponses, SessionReplyData, SessionReplyErrors, SessionReplyResponses, SessionsHandlerData, SessionsHandlerErrors, SessionsHandlerResponses, SetConfigProviderData, SetRecipeSlashCommandData, SetRecipeSlashCommandErrors, SetRecipeSlashCommandResponses, ShareSessionNostrData, ShareSessionNostrErrors, ShareSessionNostrResponses, StartAgentData, StartAgentErrors, StartAgentResponses, StartNanogptSetupData, StartNanogptSetupResponses, StartOpenrouterSetupData, StartOpenrouterSetupResponses, StartTetrateSetupData, StartTetrateSetupResponses, StartTunnelData, StartTunnelErrors, StartTunnelResponses, StatusData, StatusResponses, StopAgentData, StopAgentErrors, StopAgentResponses, StopTunnelData, StopTunnelErrors, StopTunnelResponses, SyncFeaturedModelsData, SyncFeaturedModelsResponses, SystemInfoData, SystemInfoResponses, TranscribeDictationData, TranscribeDictationErrors, TranscribeDictationResponses, UnpauseScheduleData, UnpauseScheduleErrors, UnpauseScheduleResponses, UpdateAgentProviderData, UpdateAgentProviderErrors, UpdateAgentProviderResponses, UpdateCustomProviderData, UpdateCustomProviderErrors, UpdateCustomProviderResponses, UpdateFromSessionData, UpdateFromSessionErrors, UpdateFromSessionResponses, UpdateModelSettingsData, UpdateModelSettingsErrors, UpdateModelSettingsResponses, UpdateScheduleData, UpdateScheduleErrors, UpdateScheduleResponses, UpdateSessionData, UpdateSessionErrors, UpdateSessionNameData, UpdateSessionNameErrors, UpdateSessionNameResponses, UpdateSessionResponses, UpdateSessionUserRecipeValuesData, UpdateSessionUserRecipeValuesErrors, UpdateSessionUserRecipeValuesResponses, UpdateWorkingDirData, UpdateWorkingDirErrors, UpdateWorkingDirResponses, UpsertConfigData, UpsertConfigErrors, UpsertConfigResponses, UpsertPermissionsData, UpsertPermissionsErrors, UpsertPermissionsResponses, ValidateConfigData, ValidateConfigErrors, ValidateConfigResponses } from './types.gen'; export type Options = Options2 & { /** @@ -237,6 +237,15 @@ export const providers = (options?: Option export const cleanupProviderCache = (options: Options) => (options.client ?? client).post({ url: '/config/providers/{name}/cleanup', ...options }); +export const getProviderModelInfo = (options: Options) => (options.client ?? client).post({ + url: '/config/providers/{name}/model-info', + ...options, + headers: { + 'Content-Type': 'application/json', + ...options.headers + } +}); + export const getProviderModels = (options: Options) => (options.client ?? client).get({ url: '/config/providers/{name}/models', ...options }); export const configureProviderOauth = (options: Options) => (options.client ?? client).post({ url: '/config/providers/{name}/oauth', ...options }); diff --git a/ui/desktop/src/api/types.gen.ts b/ui/desktop/src/api/types.gen.ts index e1e5c83e05..f9ad4a5061 100644 --- a/ui/desktop/src/api/types.gen.ts +++ b/ui/desktop/src/api/types.gen.ts @@ -818,6 +818,10 @@ export type ModelInfo = { * Cost per token for output in USD (optional) */ output_token_cost?: number | null; + /** + * Whether this model supports reasoning/thinking controls + */ + reasoning?: boolean; /** * Whether this model supports cache control */ @@ -834,6 +838,7 @@ export type ModelInfoData = { model: string; output_token_cost?: number | null; provider: string; + reasoning: boolean; }; export type ModelInfoQuery = { @@ -1000,6 +1005,10 @@ export type ProviderMetadata = { setup_steps?: Array; }; +export type ProviderModelInfoQuery = { + model: string; +}; + export type ProviderTemplate = { api_url: string; doc_url: string; @@ -1482,6 +1491,8 @@ export type ThinkingContent = { thinking: string; }; +export type ThinkingEffort = 'off' | 'low' | 'medium' | 'high' | 'max'; + export type TokenState = { accumulatedCost?: number | null; accumulatedInputTokens: number; @@ -2728,6 +2739,42 @@ export type CleanupProviderCacheResponses = { export type CleanupProviderCacheResponse = CleanupProviderCacheResponses[keyof CleanupProviderCacheResponses]; +export type GetProviderModelInfoData = { + body: ProviderModelInfoQuery; + path: { + /** + * Provider name (e.g., openai) + */ + name: string; + }; + query?: never; + url: '/config/providers/{name}/model-info'; +}; + +export type GetProviderModelInfoErrors = { + /** + * Unknown provider, provider not configured, or authentication error + */ + 400: unknown; + /** + * Rate limit exceeded + */ + 429: unknown; + /** + * Internal server error + */ + 500: unknown; +}; + +export type GetProviderModelInfoResponses = { + /** + * Model metadata fetched successfully + */ + 200: ModelInfo; +}; + +export type GetProviderModelInfoResponse = GetProviderModelInfoResponses[keyof GetProviderModelInfoResponses]; + export type GetProviderModelsData = { body?: never; path: { @@ -2759,7 +2806,7 @@ export type GetProviderModelsResponses = { /** * Models fetched successfully */ - 200: Array; + 200: Array; }; export type GetProviderModelsResponse = GetProviderModelsResponses[keyof GetProviderModelsResponses]; diff --git a/ui/desktop/src/components/recipes/shared/RecipeModelSelector.tsx b/ui/desktop/src/components/recipes/shared/RecipeModelSelector.tsx index 2a0c310ef6..ad616bfdb3 100644 --- a/ui/desktop/src/components/recipes/shared/RecipeModelSelector.tsx +++ b/ui/desktop/src/components/recipes/shared/RecipeModelSelector.tsx @@ -108,8 +108,8 @@ export const RecipeModelSelector = ({ const modelList = models || []; const options = modelList.map((m) => ({ - value: m, - label: m, + value: m.name, + label: m.name, provider: p.name, })); diff --git a/ui/desktop/src/components/settings/models/modelInterface.ts b/ui/desktop/src/components/settings/models/modelInterface.ts index a8ee79bec6..5d7e442981 100644 --- a/ui/desktop/src/components/settings/models/modelInterface.ts +++ b/ui/desktop/src/components/settings/models/modelInterface.ts @@ -1,4 +1,10 @@ -import { ProviderDetails, getProviderModels, listLocalModels } from '../../../api'; +import { + ProviderDetails, + ThinkingEffort, + getProviderModelInfo, + getProviderModels, + listLocalModels, +} from '../../../api'; import { errorMessage as getErrorMessage } from '../../../utils/conversionUtils'; export default interface Model { @@ -9,7 +15,8 @@ export default interface Model { alias?: string; // optional model display name subtext?: string; // goes below model name if not the provider context_limit?: number; // optional context limit override - request_params?: Record; // provider-specific request parameters + reasoning?: boolean; // optional reasoning/thinking support metadata + request_params?: Record & { thinking_effort?: ThinkingEffort }; // provider-specific request parameters } export function createModelStruct( @@ -45,7 +52,7 @@ export async function getProviderMetadata( export interface ProviderModelsResult { provider: ProviderDetails; - models: string[] | null; + models: Model[] | null; error: string | null; warning: string | null; } @@ -61,7 +68,7 @@ export async function fetchModelsForProviders( const allModels = response.data || []; const downloadedModels = allModels .filter((m) => m.status.state === 'Downloaded') - .map((m) => m.id); + .map((m) => ({ name: m.id, provider: p.name }) as Model); return { provider: p, models: downloadedModels, error: null, warning: null }; } @@ -69,12 +76,28 @@ export async function fetchModelsForProviders( path: { name: p.name }, throwOnError: true, }); - const models = response.data || []; + const models = (response.data || []).map( + (m) => + ({ + name: m.name, + provider: p.name, + context_limit: m.context_limit, + reasoning: m.reasoning ?? undefined, + }) as Model + ); return { provider: p, models, error: null, warning: null }; } catch (e: unknown) { // For custom providers, fall back to the configured model list if (p.provider_type === 'Custom') { - const fallbackModels = p.metadata.known_models.map((m) => m.name); + const fallbackModels = p.metadata.known_models.map( + (m) => + ({ + name: m.name, + provider: p.name, + context_limit: m.context_limit, + reasoning: m.reasoning ?? undefined, + }) as Model + ); if (fallbackModels.length > 0) { console.warn(`Failed to fetch models for ${p.name}:`, getErrorMessage(e)); return { @@ -99,3 +122,19 @@ export async function fetchModelsForProviders( return await Promise.all(modelPromises); } + +export async function fetchModelReasoning( + provider: string, + model: string, + fallback?: boolean +): Promise { + try { + const response = await getProviderModelInfo({ + path: { name: provider }, + body: { model }, + }); + return response.data?.reasoning ?? fallback ?? null; + } catch { + return fallback ?? null; + } +} diff --git a/ui/desktop/src/components/settings/models/subcomponents/SwitchModelModal.tsx b/ui/desktop/src/components/settings/models/subcomponents/SwitchModelModal.tsx index c1f0fb33d7..e127fd5e92 100644 --- a/ui/desktop/src/components/settings/models/subcomponents/SwitchModelModal.tsx +++ b/ui/desktop/src/components/settings/models/subcomponents/SwitchModelModal.tsx @@ -17,12 +17,20 @@ import { Select } from '../../../ui/Select'; import { useConfig } from '../../../ConfigContext'; import { useModelAndProvider } from '../../../ModelAndProviderContext'; import type { View } from '../../../../utils/navigationUtils'; -import Model, { getProviderMetadata, fetchModelsForProviders } from '../modelInterface'; +import Model, { + fetchModelReasoning, + fetchModelsForProviders, + getProviderMetadata, +} from '../modelInterface'; import { getPredefinedModelsFromEnv, shouldShowPredefinedModels } from '../predefinedModelsUtils'; -import { ProviderType } from '../../../../api'; +import type { ProviderType, ThinkingEffort } from '../../../../api'; import { trackModelChanged } from '../../../../utils/analytics'; const i18n = defineMessages({ + thinkingEffortOff: { + id: 'switchModelModal.thinkingEffortOff', + defaultMessage: 'Off - No extended thinking', + }, thinkingLevelLow: { id: 'switchModelModal.thinkingLevelLow', defaultMessage: 'Low - Better latency, lighter reasoning', @@ -185,16 +193,7 @@ const i18n = defineMessages({ }, }); -// THINKING_LEVEL_OPTIONS and CLAUDE_THINKING_EFFORT_OPTIONS are created inside the component to support i18n. - -function isClaudeModel(name: string | null | undefined): boolean { - return !!name && name.toLowerCase().startsWith('claude-'); -} - -function supportsAdaptiveThinking(name: string): boolean { - const lower = name.toLowerCase(); - return lower.includes('claude-opus-4-6') || lower.includes('claude-sonnet-4-6'); -} +// Thinking effort options are created inside the component to support i18n. const PREFERRED_MODEL_PATTERNS = [ /claude-sonnet-4/i, @@ -256,12 +255,8 @@ export const SwitchModelModal = ({ }: SwitchModelModalProps) => { const intl = useIntl(); - const THINKING_LEVEL_OPTIONS = [ - { value: 'low', label: intl.formatMessage(i18n.thinkingLevelLow) }, - { value: 'high', label: intl.formatMessage(i18n.thinkingLevelHigh) }, - ]; - - const CLAUDE_THINKING_EFFORT_OPTIONS = [ + const THINKING_EFFORT_OPTIONS: { value: ThinkingEffort; label: string }[] = [ + { value: 'off', label: intl.formatMessage(i18n.thinkingEffortOff) }, { value: 'low', label: intl.formatMessage(i18n.claudeEffortLow) }, { value: 'medium', label: intl.formatMessage(i18n.claudeEffortMedium) }, { value: 'high', label: intl.formatMessage(i18n.claudeEffortHigh) }, @@ -278,7 +273,13 @@ export const SwitchModelModal = ({ const currentModel = sessionModel ?? configModel; const currentProvider = sessionProvider ?? configProvider; const [providerOptions, setProviderOptions] = useState<{ value: string; label: string }[]>([]); - type ModelOption = { value: string; label: string; provider: string; isDisabled?: boolean }; + type ModelOption = { + value: string; + label: string; + provider: string; + isDisabled?: boolean; + reasoning?: boolean; + }; const [modelOptions, setModelOptions] = useState<{ options: ModelOption[] }[]>([]); const [provider, setProvider] = useState( initialProvider || currentProvider || null @@ -304,43 +305,56 @@ export const SwitchModelModal = ({ import('../../../../api').ProviderDetails[] >([]); const fetchedProviders = useRef>(new Set()); - const [thinkingLevel, setThinkingLevel] = useState('low'); - const [claudeThinkingType, setClaudeThinkingType] = useState('disabled'); - const [claudeThinkingEffort, setClaudeThinkingEffort] = useState('high'); - const [claudeThinkingBudget, setClaudeThinkingBudget] = useState('16000'); + const reasoningRequestId = useRef(0); + const [thinkingEffort, setThinkingEffort] = useState(null); + const [selectedModelReasoning, setSelectedModelReasoning] = useState(null); - const modelName = usePredefinedModels ? selectedPredefinedModel?.name : model; - const isGemini3Model = modelName?.toLowerCase().startsWith('gemini-3') ?? false; - const showClaudeThinking = isClaudeModel(modelName); - const modelSupportsAdaptive = modelName ? supportsAdaptiveThinking(modelName) : false; + const modelReasoning = selectedModelReasoning ?? selectedPredefinedModel?.reasoning; + const showThinkingControl = modelReasoning === true; + const resolveSelectedModelReasoning = useCallback( + (providerName: string, modelName: string, fallback?: boolean) => { + const requestId = ++reasoningRequestId.current; + setSelectedModelReasoning(fallback ?? null); + fetchModelReasoning(providerName, modelName, fallback).then((reasoning) => { + if (requestId === reasoningRequestId.current) { + setSelectedModelReasoning(reasoning); + } + }); + }, + [] + ); useEffect(() => { - if (!showClaudeThinking) return; - if (claudeThinkingType === 'adaptive' && !modelSupportsAdaptive) { - setClaudeThinkingType('disabled'); - } - }, [modelName, showClaudeThinking, modelSupportsAdaptive, claudeThinkingType]); - - useEffect(() => { - const readConfig = async (key: string): Promise => { - try { - const val = (await read(key, false)) as string; - return val || null; - } catch (e) { - console.warn(`Could not read ${key}, using default:`, e); - return null; - } - }; (async () => { - const tt = await readConfig('CLAUDE_THINKING_TYPE'); - if (tt) setClaudeThinkingType(tt); - const effort = await readConfig('CLAUDE_THINKING_EFFORT'); - if (effort) setClaudeThinkingEffort(effort); - const budget = await readConfig('CLAUDE_THINKING_BUDGET'); - if (budget) setClaudeThinkingBudget(budget); + try { + const effort = (await read('GOOSE_THINKING_EFFORT', false)) as ThinkingEffort; + if (effort) setThinkingEffort(effort); + } catch (e) { + console.warn('Could not read GOOSE_THINKING_EFFORT, using default:', e); + } })(); }, [read]); + useEffect(() => { + if (!provider || !model) return; + + const selectedOption = modelOptions + .flatMap((group) => group.options) + .find((option) => option.provider === provider && option.value === model); + + if (selectedOption) { + resolveSelectedModelReasoning(provider, model, selectedOption.reasoning); + return; + } + + setSelectedModelReasoning(null); + const timeout = setTimeout(() => { + resolveSelectedModelReasoning(provider, model); + }, 400); + + return () => clearTimeout(timeout); + }, [model, provider, modelOptions, resolveSelectedModelReasoning]); + // Validate form data const validateForm = useCallback(() => { const errors = { @@ -393,36 +407,18 @@ export const SwitchModelModal = ({ subtext: providerDisplayName, } as Model; } + modelObj = { + ...modelObj, + reasoning: selectedModelReasoning ?? modelObj.reasoning, + }; - if (isGemini3Model) { + if (showThinkingControl) { + const effort = thinkingEffort ?? modelObj.request_params?.thinking_effort ?? 'off'; modelObj = { ...modelObj, - request_params: { ...modelObj.request_params, thinking_level: thinkingLevel }, + request_params: { ...modelObj.request_params, thinking_effort: effort }, }; - } - - if (showClaudeThinking) { - const params: Record = { - ...modelObj.request_params, - thinking_type: claudeThinkingType, - }; - if (claudeThinkingType === 'adaptive') { - params.effort = claudeThinkingEffort; - } else if (claudeThinkingType === 'enabled') { - params.budget_tokens = parseInt(claudeThinkingBudget, 10) || 16000; - } - modelObj = { ...modelObj, request_params: params }; - - upsert('CLAUDE_THINKING_TYPE', claudeThinkingType, false).catch(console.warn); - if (claudeThinkingType === 'adaptive') { - upsert('CLAUDE_THINKING_EFFORT', claudeThinkingEffort, false).catch(console.warn); - } else if (claudeThinkingType === 'enabled') { - upsert( - 'CLAUDE_THINKING_BUDGET', - parseInt(claudeThinkingBudget, 10) || 16000, - false - ).catch(console.warn); - } + upsert('GOOSE_THINKING_EFFORT', effort, false).catch(console.warn); } const success = await changeModel(sessionId, modelObj); @@ -450,8 +446,13 @@ export const SwitchModelModal = ({ const matchingModel = models.find((m) => m.name === currentModel); if (matchingModel) { setSelectedPredefinedModel(matchingModel); + resolveSelectedModelReasoning( + matchingModel.provider, + matchingModel.name, + matchingModel.reasoning + ); } - }, [usePredefinedModels, currentModel]); + }, [usePredefinedModels, currentModel, resolveSelectedModelReasoning]); // For manual mode: one-time sync of provider/model when session data // arrives after the modal has already mounted. Uses a ref so it only @@ -515,7 +516,7 @@ export const SwitchModelModal = ({ if (cancelled) return; const newGroupedOptions: { - options: { value: string; label: string; provider: string; providerType: ProviderType }[]; + options: (ModelOption & { providerType: ProviderType })[]; }[] = []; const newErrors: Record = {}; const newWarnings: Record = {}; @@ -536,11 +537,13 @@ export const SwitchModelModal = ({ label: string; provider: string; providerType: ProviderType; + reasoning?: boolean; }[] = modelList.map((m) => ({ - value: m, - label: m, + value: m.name, + label: m.name, provider: p.name, providerType: p.provider_type, + reasoning: m.reasoning, })); if (p.provider_type !== 'Custom') { @@ -613,30 +616,51 @@ export const SwitchModelModal = ({ } }, [provider, modelOptions, loadingModels, model, isCustomModel, userClearedModel, activeProvidersList]); + const handlePredefinedModelChange = (model: Model) => { + setSelectedPredefinedModel(model); + resolveSelectedModelReasoning(model.provider, model.name, model.reasoning); + }; + // Handle model selection change const handleModelChange = (newValue: unknown) => { - const selectedOption = newValue as { value: string; label: string; provider: string } | null; + const selectedOption = newValue as { + value: string; + label: string; + provider: string; + reasoning?: boolean; + } | null; if (selectedOption?.value === 'custom') { setIsCustomModel(true); setModel(''); setProvider(selectedOption.provider); + setSelectedModelReasoning(null); setUserClearedModel(false); } else if (selectedOption === null) { // User cleared the selection setIsCustomModel(false); setModel(''); + setSelectedModelReasoning(null); setUserClearedModel(true); } else { setIsCustomModel(false); setModel(selectedOption?.value || ''); setProvider(selectedOption?.provider || ''); + if (selectedOption?.provider && selectedOption.value) { + resolveSelectedModelReasoning( + selectedOption.provider, + selectedOption.value, + selectedOption.reasoning + ); + } else { + setSelectedModelReasoning(selectedOption?.reasoning ?? null); + } setUserClearedModel(false); } }; // Store the original model options in state, initialized from modelOptions const [originalModelOptions, setOriginalModelOptions] = - useState<{ options: { value: string; label: string; provider: string }[] }[]>(modelOptions); + useState<{ options: ModelOption[] }[]>(modelOptions); const handleInputChange = (inputValue: string) => { if (!provider) return; @@ -680,54 +704,20 @@ export const SwitchModelModal = ({ } }; - const claudeThinkingTypeOptions = [ - ...(modelSupportsAdaptive - ? [{ value: 'adaptive', label: intl.formatMessage(i18n.claudeAdaptive) }] - : []), - { value: 'enabled', label: intl.formatMessage(i18n.claudeEnabled) }, - { value: 'disabled', label: intl.formatMessage(i18n.claudeDisabled) }, - ]; - - const claudeThinkingControls = showClaudeThinking && ( -
-
- - o.value === claudeThinkingEffort)} - onChange={(newValue: unknown) => { - const option = newValue as { value: string; label: string } | null; - setClaudeThinkingEffort(option?.value || 'high'); - }} - placeholder={intl.formatMessage(i18n.selectEffortLevel)} - /> -
- )} - {claudeThinkingType === 'enabled' && ( -
- - setClaudeThinkingBudget(e.target.value)} - /> -
- )} + const thinkingEffortControl = showThinkingControl && ( +
+ + o.value === thinkingLevel)} - onChange={(newValue: unknown) => { - const option = newValue as { value: string; label: string } | null; - setThinkingLevel(option?.value || 'low'); - }} - placeholder={intl.formatMessage(i18n.selectThinkingLevel)} - /> -
- )} - - {claudeThinkingControls} + {thinkingEffortControl}
) : ( /* Manual Provider/Model Selection */ @@ -970,25 +942,7 @@ export const SwitchModelModal = ({ )} - {isGemini3Model && ( -
- -