fix: increase max_tokens to 8192 for R1 reasoning overhead

R1 models use 500-2000 tokens for <think> blocks before the final response. 4096 was too tight — the model would exhaust the budget mid-thought and never emit the JSON. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
fix: strip R1 think blocks before JSON extraction
2026-03-09 18:17:48 +02:00 · 2026-03-09 18:17:06 +02:00
1 changed files with 29 additions and 3 deletions
--- a/src/claude.rs
+++ b/src/claude.rs
@@ -63,7 +63,7 @@ impl ClaudeClient {
    ) -> Result<(String, Option<Usage>)> {
        let body = MessagesRequest {
            model: self.model.clone(),
-            max_tokens: 4096,
+            max_tokens: 8192,
            system: system.to_string(),
            messages: messages.to_vec(),
        };
@@ -98,9 +98,13 @@ impl ClaudeClient {
    }
 }
-/// Extract a JSON object from Claude's response text.
+/// Extract a JSON object from a model response text.
-/// Looks for the first `{` ... `}` block, handling markdown code fences.
+/// Handles markdown code fences and R1-style `<think>...</think>` blocks.
 pub fn extract_json(text: &str) -> Result<Value> {
    // Strip R1-style thinking blocks before looking for JSON
    let text = strip_think_blocks(text);
    let text = text.as_ref();
    // Strip markdown fences if present
    let cleaned = text
        .replace("```json", "")
@@ -137,3 +141,25 @@ pub fn extract_json(text: &str) -> Result<Value> {
    serde_json::from_str(&cleaned[s..e]).context("parse extracted JSON")
 }
 /// Remove `<think>...</think>` blocks emitted by R1-family reasoning models.
 /// Handles nested tags and unterminated blocks (truncated responses).
 fn strip_think_blocks(text: &str) -> std::borrow::Cow<'_, str> {
    if !text.contains("<think>") {
        return std::borrow::Cow::Borrowed(text);
    }
    let mut out = String::with_capacity(text.len());
    let mut rest = text;
    while let Some(start) = rest.find("<think>") {
        out.push_str(&rest[..start]);
        rest = &rest[start + "<think>".len()..];
        if let Some(end) = rest.find("</think>") {
            rest = &rest[end + "</think>".len()..];
        } else {
            // Unterminated — discard the rest (truncated thinking block)
            rest = "";
        }
    }
    out.push_str(rest);
    std::borrow::Cow::Owned(out)
 }