Merge pull request #3236 from ultraworkers/fix/ollama-qwen-reasoning-field

fix: parse Ollama reasoning fields
This commit is contained in:
YeonGyu-Kim
2026-06-08 10:11:52 +09:00
committed by GitHub
4 changed files with 166 additions and 0 deletions
+5
View File
@@ -40,6 +40,11 @@ Or provide an OAuth bearer token directly:
export ANTHROPIC_AUTH_TOKEN="anthropic-oauth-or-proxy-bearer-token"
```
For local OpenAI-compatible servers such as Ollama, including Qwen reasoning
models, see [`../docs/local-openai-compatible-providers.md`](../docs/local-openai-compatible-providers.md).
Use the exact model tag exposed by the server, for example `qwen3:latest`, and
prefer `OLLAMA_HOST` for Ollama-specific local routing.
## Mock parity harness
The workspace now includes a deterministic Anthropic-compatible mock service and a clean-environment CLI harness for end-to-end parity checks.
@@ -572,6 +572,7 @@ impl StreamState {
.delta
.reasoning_content
.filter(|value| !value.is_empty())
.or(choice.delta.reasoning.filter(|value| !value.is_empty()))
.or(choice
.delta
.thinking
@@ -827,6 +828,8 @@ struct ChatMessage {
#[serde(default)]
reasoning_content: Option<String>,
#[serde(default)]
reasoning: Option<String>,
#[serde(default)]
tool_calls: Vec<ResponseToolCall>,
}
@@ -901,6 +904,8 @@ struct ChunkDelta {
#[serde(default)]
reasoning_content: Option<String>,
#[serde(default)]
reasoning: Option<String>,
#[serde(default)]
thinking: Option<ThinkingDelta>,
#[serde(default, deserialize_with = "deserialize_null_as_empty_vec")]
tool_calls: Vec<DeltaToolCall>,
@@ -1510,6 +1515,7 @@ fn normalize_response(
.message
.reasoning_content
.filter(|value| !value.is_empty())
.or(choice.message.reasoning.filter(|value| !value.is_empty()))
{
content.push(OutputContentBlock::Thinking {
thinking,
@@ -1992,6 +1998,7 @@ mod tests {
role: "assistant".to_string(),
content: Some("final answer".to_string()),
reasoning_content: Some("hidden thought".to_string()),
reasoning: None,
tool_calls: Vec::new(),
},
finish_reason: Some("stop".to_string()),
@@ -2029,6 +2036,7 @@ mod tests {
delta: super::ChunkDelta {
content: None,
reasoning_content: Some("think".to_string()),
reasoning: None,
thinking: None,
tool_calls: Vec::new(),
},
@@ -2046,6 +2054,7 @@ mod tests {
delta: super::ChunkDelta {
content: Some(" answer".to_string()),
reasoning_content: None,
reasoning: None,
thinking: None,
tool_calls: Vec::new(),
},
@@ -166,6 +166,55 @@ async fn send_message_preserves_deepseek_reasoning_content_before_text() {
assert_eq!(body["thinking"], json!({"type": "enabled"}));
}
#[tokio::test]
async fn send_message_preserves_ollama_reasoning_before_text() {
let state = Arc::new(Mutex::new(Vec::<CapturedRequest>::new()));
let body = concat!(
"{",
"\"id\":\"chatcmpl_ollama_reasoning\",",
"\"model\":\"qwen3:latest\",",
"\"choices\":[{",
"\"message\":{\"role\":\"assistant\",\"reasoning\":\"Think locally\",\"content\":\"Answer locally\",\"tool_calls\":[]},",
"\"finish_reason\":\"stop\"",
"}],",
"\"usage\":{\"prompt_tokens\":11,\"completion_tokens\":5}",
"}"
);
let server = spawn_server(
state.clone(),
vec![http_response("200 OK", "application/json", body)],
)
.await;
let client = OpenAiCompatClient::new("ollama-test-key", OpenAiCompatConfig::openai())
.with_base_url(server.base_url());
let response = client
.send_message(&MessageRequest {
model: "openai/qwen3:latest".to_string(),
..sample_request(false)
})
.await
.expect("request should succeed");
assert_eq!(
response.content,
vec![
OutputContentBlock::Thinking {
thinking: "Think locally".to_string(),
signature: None,
},
OutputContentBlock::Text {
text: "Answer locally".to_string(),
},
]
);
let captured = state.lock().await;
let request = captured.first().expect("server should capture request");
let body: serde_json::Value = serde_json::from_str(&request.body).expect("json body");
assert_eq!(body["model"], json!("qwen3:latest"));
}
#[tokio::test]
async fn local_openai_gateway_strips_routing_prefix_and_preserves_extra_body_params() {
let state = Arc::new(Mutex::new(Vec::<CapturedRequest>::new()));
@@ -389,6 +438,83 @@ async fn stream_message_normalizes_text_and_multiple_tool_calls() {
assert!(request.body.contains("\"stream\":true"));
}
#[tokio::test]
async fn stream_message_preserves_ollama_reasoning_before_text() {
let state = Arc::new(Mutex::new(Vec::<CapturedRequest>::new()));
let sse = concat!(
"data: {\"id\":\"chatcmpl_stream_ollama_reasoning\",\"model\":\"qwen3:latest\",\"choices\":[{\"delta\":{\"reasoning\":\"Think\"}}]}\n\n",
"data: {\"id\":\"chatcmpl_stream_ollama_reasoning\",\"choices\":[{\"delta\":{\"content\":\" answer\"},\"finish_reason\":\"stop\"}]}\n\n",
"data: [DONE]\n\n"
);
let server = spawn_server(
state.clone(),
vec![http_response_with_headers(
"200 OK",
"text/event-stream",
sse,
&[("x-request-id", "req_ollama_reasoning_stream")],
)],
)
.await;
let client = OpenAiCompatClient::new("ollama-test-key", OpenAiCompatConfig::openai())
.with_base_url(server.base_url());
let mut stream = client
.stream_message(&MessageRequest {
model: "openai/qwen3:latest".to_string(),
..sample_request(false)
})
.await
.expect("stream should start");
assert_eq!(stream.request_id(), Some("req_ollama_reasoning_stream"));
let mut events = Vec::new();
while let Some(event) = stream.next_event().await.expect("event should parse") {
events.push(event);
}
assert!(matches!(events[0], StreamEvent::MessageStart(_)));
assert!(matches!(
events[1],
StreamEvent::ContentBlockStart(ContentBlockStartEvent {
index: 0,
content_block: OutputContentBlock::Thinking { .. },
})
));
assert!(matches!(
events[2],
StreamEvent::ContentBlockDelta(ContentBlockDeltaEvent {
index: 0,
delta: ContentBlockDelta::ThinkingDelta { .. },
})
));
assert!(matches!(
events[3],
StreamEvent::ContentBlockStop(ContentBlockStopEvent { index: 0 })
));
assert!(matches!(
events[4],
StreamEvent::ContentBlockStart(ContentBlockStartEvent {
index: 1,
content_block: OutputContentBlock::Text { .. },
})
));
assert!(matches!(
events[5],
StreamEvent::ContentBlockDelta(ContentBlockDeltaEvent {
index: 1,
delta: ContentBlockDelta::TextDelta { .. },
})
));
let captured = state.lock().await;
let request = captured.first().expect("captured request");
let body: serde_json::Value = serde_json::from_str(&request.body).expect("json body");
assert_eq!(body["model"], json!("qwen3:latest"));
assert_eq!(body["stream"], json!(true));
}
#[allow(clippy::await_holding_lock)]
#[tokio::test]
async fn stream_message_retries_retryable_sse_handshake_failures() {
+26
View File
@@ -2939,6 +2939,10 @@ fn validate_model_syntax(model: &str) -> Result<(), String> {
err_msg.push_str("\nDid you mean `openai/");
err_msg.push_str(trimmed);
err_msg.push_str("`? (Requires OPENAI_API_KEY env var)");
} else if trimmed.starts_with("qwen") && trimmed.contains(':') {
err_msg.push_str("\nFor a local Ollama model, set `OPENAI_BASE_URL=http://127.0.0.1:11434/v1` before using tagged names like `");
err_msg.push_str(trimmed);
err_msg.push_str("`.");
} else if trimmed.starts_with("qwen") {
err_msg.push_str("\nDid you mean `qwen/");
err_msg.push_str(trimmed);
@@ -19743,6 +19747,28 @@ mod alias_resolution_tests {
assert!(result.unwrap_err().contains("invalid model syntax"));
}
#[test]
fn qwen_invalid_model_hint_mentions_local_ollama_openai_base_url() {
let _guard = ollama_env_lock();
let _ollama_env = EnvVarGuard::unset("OLLAMA_HOST");
let _openai_env = EnvVarGuard::unset("OPENAI_BASE_URL");
let result = validate_model_syntax("qwen3:8b");
let error = result.expect_err("Ollama tag without local base URL should fail");
assert!(
error.contains("Ollama"),
"Qwen Ollama tag error should mention Ollama: {error}"
);
assert!(
error.contains("OPENAI_BASE_URL"),
"Qwen Ollama tag error should mention OPENAI_BASE_URL: {error}"
);
assert!(
error.contains("http://127.0.0.1:11434/v1"),
"Qwen Ollama tag error should show local Ollama OpenAI URL: {error}"
);
}
#[test]
fn test_direct_provider_model_passes() {
// Direct provider/model strings should remain unchanged and pass