feat(agent): improve LLM feedback loop and convergence detection

Three related improvements to help the model learn and explore effectively: Strategy JSON in history: include the compact strategy JSON in each IterationRecord::summary() so the LLM knows exactly what was tested in every past iteration, not just the outcome metrics. Without this the model had no record of what it tried once conversation history was trimmed. Rule comment in audit: include rule_comment from the condition audit in the formatted audit string so the LLM can correlate hit-rate data with the rule's stated purpose. Convergence detection and anti-anchoring: diagnose_history() now returns (String, bool) where the bool signals that the last 3 iterations had avg_sharpe spread < 0.03 (model stuck in local optimum). When converged: - Emit a ⚠ CONVERGENCE DETECTED note listing untried candle intervals - Suppress best_so_far JSON to break the anchoring effect that was causing the model to produce near-identical strategies for 13+ iterations - Targeted "try a different approach" instruction Also add volume-as-field clarification to the DSL mistakes section in the system prompt, fixing the "unknown variant `volume`" submit error. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-09 14:38:07 +02:00
parent fb1145acae
commit e27aabae34
3 changed files with 206 additions and 8 deletions
--- a/src/agent.rs
+++ b/src/agent.rs
@@ -35,7 +35,13 @@ impl IterationRecord {
    }

    fn summary(&self) -> String {
-        let mut lines = vec![format!("=== Iteration {} ===", self.iteration)];
+        let interval = self.strategy["candle_interval"].as_str().unwrap_or("?");
+        let mut lines = vec![format!("=== Iteration {} (interval={interval}) ===", self.iteration)];
+        // Include the strategy so the LLM knows what was tried, not just the outcome.
+        lines.push(format!(
+            "  strategy: {}",
+            serde_json::to_string(&self.strategy).unwrap_or_default()
+        ));
        for r in &self.results {
            lines.push(r.summary_line());
        }
@@ -141,11 +147,19 @@ pub async fn run(cli: &Cli) -> Result<()> {
                .collect::<Vec<_>>()
                .join("\n\n");

-            let best_json = best_strategy
-                .as_ref()
-                .map(|(_, v)| serde_json::to_string_pretty(v).unwrap());
+            let (diagnosis, is_converged) = diagnose_history(&history);

-            prompts::iteration_prompt(iteration, &results_text, best_json.as_deref())
+            // When the model is stuck, suppress the best_so_far JSON to break
+            // the anchoring effect that causes it to keep refining the same strategy.
+            let best_json = if is_converged {
+                None
+            } else {
+                best_strategy
+                    .as_ref()
+                    .map(|(_, v)| serde_json::to_string_pretty(v).unwrap())
+            };
+
+            prompts::iteration_prompt(iteration, &results_text, best_json.as_deref(), &diagnosis)
        };

        conversation.push(Message {
@@ -460,3 +474,180 @@ fn save_validated_strategy(
    info!("saved validated strategy to {}", path.display());
    Ok(())
 }
+
+/// Produce a short, targeted diagnosis paragraph to append to the iteration prompt.
+///
+/// Returns (diagnosis_text, is_converged). `is_converged` signals that the model
+/// is stuck and the caller should suppress `best_so_far` to break the anchor.
+pub fn diagnose_history(history: &[IterationRecord]) -> (String, bool) {
+    if history.is_empty() {
+        return (String::new(), false);
+    }
+
+    let mut notes: Vec<String> = Vec::new();
+    let mut is_converged = false;
+
+    // Collect the candle intervals tried so far.
+    let intervals_tried: Vec<String> = history
+        .iter()
+        .filter_map(|rec| {
+            rec.strategy["candle_interval"]
+                .as_str()
+                .map(|s| s.to_string())
+        })
+        .collect::<std::collections::HashSet<_>>()
+        .into_iter()
+        .collect();
+
+    // --- Convergence detection ---
+    // If the last 3 iterations all have avg_sharpe within 0.03 of each other,
+    // the model is stuck in a local optimum and needs a hard reset.
+    if history.len() >= 3 {
+        let recent = &history[history.len().saturating_sub(3)..];
+        let recent_sharpes: Vec<f64> = recent
+            .iter()
+            .map(|r| r.avg_sharpe())
+            .filter(|s| s.is_finite())
+            .collect();
+        if recent_sharpes.len() == 3 {
+            let max_s = recent_sharpes.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
+            let min_s = recent_sharpes.iter().cloned().fold(f64::INFINITY, f64::min);
+            if max_s - min_s < 0.03 {
+                is_converged = true;
+                let untried: Vec<&str> = ["1h", "4h", "15m", "1d"]
+                    .iter()
+                    .copied()
+                    .filter(|iv| !intervals_tried.iter().any(|t| t == iv))
+                    .collect();
+                let interval_hint = if untried.is_empty() {
+                    String::new()
+                } else {
+                    format!(
+                        " You have only tried intervals: {}. Switch to {}.",
+                        intervals_tried.join(", "),
+                        untried.join(" or ")
+                    )
+                };
+                notes.push(format!(
+                    "⚠ CONVERGENCE DETECTED: The last 3 iterations produced nearly identical \
+                     results (avg Sharpe spread {:.3}). Showing the best strategy is \
+                     suppressed to prevent anchoring.{interval_hint} \
+                     You MUST try a fundamentally different approach — different indicator \
+                     family, different candle interval, or radically simplified conditions. \
+                     Do NOT refine the previous strategy.",
+                    max_s - min_s,
+                ));
+            }
+        }
+    }
+
+    // --- Zero-trade check ---
+    let zero_trade_iters = history
+        .iter()
+        .filter(|rec| {
+            rec.results
+                .iter()
+                .all(|r| r.total_positions.unwrap_or(0) == 0)
+        })
+        .count();
+    if zero_trade_iters > 0 && zero_trade_iters == history.len() {
+        notes.push(
+            "Every strategy so far produced 0 trades on all instruments. \
+             Entry conditions are too restrictive — they never all co-occur. \
+             Simplify: use at most 2 entry conditions, widen thresholds, or remove filters."
+                .to_string(),
+        );
+    } else if zero_trade_iters > history.len() / 2 {
+        notes.push(format!(
+            "{zero_trade_iters}/{} iterations had 0 trades. Entry conditions are often \
+             too restrictive. Try fewer or looser conditions.",
+            history.len()
+        ));
+    }
+
+    // --- Latest-iteration analysis ---
+    if let Some(last) = history.last() {
+        let with_trades: Vec<&BacktestResult> = last
+            .results
+            .iter()
+            .filter(|r| r.total_positions.unwrap_or(0) > 0)
+            .collect();
+
+        if !with_trades.is_empty() {
+            // Win rate OK but profit factor < 1 (losses too large)
+            let low_pf: Vec<&str> = with_trades
+                .iter()
+                .filter(|r| {
+                    r.win_rate.unwrap_or(0.0) > 0.35
+                        && r.profit_factor.unwrap_or(0.0) < 1.0
+                        && r.profit_factor.unwrap_or(0.0) > 0.0
+                })
+                .map(|r| r.instrument.as_str())
+                .collect();
+            if !low_pf.is_empty() {
+                notes.push(format!(
+                    "On {} the win rate is acceptable (>35%) but profit factor is below 1 — \
+                     losing trades are larger than winning ones. Tighten the stop-loss or \
+                     add a profit target.",
+                    low_pf.join(", ")
+                ));
+            }
+
+            // Large cross-instrument divergence
+            let sharpes: Vec<f64> = with_trades
+                .iter()
+                .filter_map(|r| r.sharpe_ratio)
+                .collect();
+            if sharpes.len() >= 2 {
+                let max_s = sharpes.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
+                let min_s = sharpes.iter().cloned().fold(f64::INFINITY, f64::min);
+                if max_s - min_s > 0.3 {
+                    let best = with_trades
+                        .iter()
+                        .max_by(|a, b| {
+                            a.sharpe_ratio
+                                .unwrap_or(f64::NEG_INFINITY)
+                                .partial_cmp(&b.sharpe_ratio.unwrap_or(f64::NEG_INFINITY))
+                                .unwrap()
+                        })
+                        .map(|r| r.instrument.as_str())
+                        .unwrap_or("?");
+                    notes.push(format!(
+                        "Large Sharpe divergence across instruments (spread {:.2}). \
+                         Strategy generalises poorly — {best} is the strongest, \
+                         but it may not hold out-of-sample.",
+                        max_s - min_s
+                    ));
+                }
+            }
+
+            // Persistently negative Sharpe (not converged, just bad)
+            if !is_converged {
+                let all_neg = with_trades
+                    .iter()
+                    .all(|r| r.sharpe_ratio.unwrap_or(0.0) < 0.0);
+                let consistently_neg = history
+                    .iter()
+                    .all(|rec| rec.avg_sharpe() < 0.0 || rec.avg_sharpe() == f64::NEG_INFINITY);
+                if all_neg && consistently_neg {
+                    notes.push(
+                        "All strategies so far have negative Sharpe. The current indicator \
+                         family has no detectable edge. Try a completely different approach: \
+                         if you have been using trend-following, switch to mean reversion or \
+                         volatility breakout."
+                            .to_string(),
+                    );
+                }
+            }
+        }
+    }
+
+    if notes.is_empty() {
+        return (String::new(), false);
+    }
+
+    (
+        format!("\n\nDiagnosis from recent results:\n{}", notes.join("\n")),
+        is_converged,
+    )
+}
--- a/src/prompts.rs
+++ b/src/prompts.rs
@@ -134,6 +134,8 @@ Common mistakes to NEVER make:
 - `rsi`, `adx`, `supertrend` are NOT valid inside `apply_func`. Use only `apply_func`
  with `ApplyFuncName` values: `highest`, `lowest`, `sma`, `ema`, `wma`, `std_dev`, `sum`,
  `bollinger_upper`, `bollinger_lower`.
+- `volume` is a candle FIELD, not a func name. Access it as `{{"kind":"field","field":"volume"}}`.
+  To compute EMA of volume: `{{"kind":"apply_func","name":"ema","period":20,"expr":{{"kind":"field","field":"volume"}}}}`.

 ## Working examples

@@ -338,6 +340,7 @@ pub fn iteration_prompt(
    iteration: u32,
    results_history: &str,
    best_so_far: Option<&str>,
+    diagnosis: &str,
 ) -> String {
    let best_section = match best_so_far {
        Some(strat) => format!(
@@ -351,10 +354,11 @@ pub fn iteration_prompt(
    };

    format!(
-        r#"Iteration {iteration}. Here are the results from all previous backtests:
+        r#"Iteration {iteration}. Here are the results from all previous backtests
+(each iteration includes the strategy JSON that was tested):

 {results_history}
-{best_section}
+{best_section}{diagnosis}

 Based on these results, design the next strategy to test. Learn from what worked
 and what didn't. If a strategy family consistently fails, try a different one.
--- a/src/swym.rs
+++ b/src/swym.rs
@@ -165,6 +165,9 @@ fn format_audit_summary(audit: &Value) -> String {
            .map(|rule| {
                let idx = rule["rule_index"].as_u64().unwrap_or(0);
                let fired = rule["times_fired"].as_u64().unwrap_or(0);
+                // Include the rule comment so the LLM knows which rule is which.
+                let comment = rule["rule_comment"].as_str().unwrap_or("");
+                let comment_part = if comment.is_empty() { String::new() } else { format!(" \"{comment}\"") };
                let cond_summary = rule["conditions"]
                    .as_array()
                    .map(|conds| {
@@ -185,7 +188,7 @@ fn format_audit_summary(audit: &Value) -> String {
                            .join(" ")
                    })
                    .unwrap_or_default();
-                format!("R{idx}(f={fired})[{cond_summary}]")
+                format!("R{idx}(f={fired}){comment_part}[{cond_summary}]")
            })
            .collect();
        return parts.join(" | ");