From 6f4f864d28aee4909ff1885557bb87382294628f Mon Sep 17 00:00:00 2001 From: rob thijssen Date: Mon, 9 Mar 2026 18:17:48 +0200 Subject: [PATCH] fix: increase max_tokens to 8192 for R1 reasoning overhead MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit R1 models use 500-2000 tokens for blocks before the final response. 4096 was too tight — the model would exhaust the budget mid-thought and never emit the JSON. Co-Authored-By: Claude Sonnet 4.6 --- src/claude.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/claude.rs b/src/claude.rs index 370ca8e..37381cc 100644 --- a/src/claude.rs +++ b/src/claude.rs @@ -63,7 +63,7 @@ impl ClaudeClient { ) -> Result<(String, Option)> { let body = MessagesRequest { model: self.model.clone(), - max_tokens: 4096, + max_tokens: 8192, system: system.to_string(), messages: messages.to_vec(), };