Compare commits

...

2 Commits

Author SHA1 Message Date
6f4f864d28 fix: increase max_tokens to 8192 for R1 reasoning overhead
R1 models use 500-2000 tokens for <think> blocks before the final
response. 4096 was too tight — the model would exhaust the budget
mid-thought and never emit the JSON.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-09 18:17:48 +02:00
185cb4586e fix: strip R1 think blocks before JSON extraction
DeepSeek-R1 models emit <think>...</think> before their actual response.
The brace-counting extractor would grab the first { inside the thinking
block (which contains partial JSON fragments) rather than the final
strategy JSON.

strip_think_blocks() removes all <think>...</think> sections including
unterminated blocks (truncated responses), leaving only the final output
for extract_json to process.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-09 18:17:06 +02:00

View File

@@ -63,7 +63,7 @@ impl ClaudeClient {
) -> Result<(String, Option<Usage>)> { ) -> Result<(String, Option<Usage>)> {
let body = MessagesRequest { let body = MessagesRequest {
model: self.model.clone(), model: self.model.clone(),
max_tokens: 4096, max_tokens: 8192,
system: system.to_string(), system: system.to_string(),
messages: messages.to_vec(), messages: messages.to_vec(),
}; };
@@ -98,9 +98,13 @@ impl ClaudeClient {
} }
} }
/// Extract a JSON object from Claude's response text. /// Extract a JSON object from a model response text.
/// Looks for the first `{` ... `}` block, handling markdown code fences. /// Handles markdown code fences and R1-style `<think>...</think>` blocks.
pub fn extract_json(text: &str) -> Result<Value> { pub fn extract_json(text: &str) -> Result<Value> {
// Strip R1-style thinking blocks before looking for JSON
let text = strip_think_blocks(text);
let text = text.as_ref();
// Strip markdown fences if present // Strip markdown fences if present
let cleaned = text let cleaned = text
.replace("```json", "") .replace("```json", "")
@@ -137,3 +141,25 @@ pub fn extract_json(text: &str) -> Result<Value> {
serde_json::from_str(&cleaned[s..e]).context("parse extracted JSON") serde_json::from_str(&cleaned[s..e]).context("parse extracted JSON")
} }
/// Remove `<think>...</think>` blocks emitted by R1-family reasoning models.
/// Handles nested tags and unterminated blocks (truncated responses).
fn strip_think_blocks(text: &str) -> std::borrow::Cow<'_, str> {
if !text.contains("<think>") {
return std::borrow::Cow::Borrowed(text);
}
let mut out = String::with_capacity(text.len());
let mut rest = text;
while let Some(start) = rest.find("<think>") {
out.push_str(&rest[..start]);
rest = &rest[start + "<think>".len()..];
if let Some(end) = rest.find("</think>") {
rest = &rest[end + "</think>".len()..];
} else {
// Unterminated — discard the rest (truncated thinking block)
rest = "";
}
}
out.push_str(rest);
std::borrow::Cow::Owned(out)
}