From 14643273c01fafbcfe96733b8126dd46f98d5398 Mon Sep 17 00:00:00 2001 From: rob thijssen Date: Wed, 6 May 2026 06:59:47 +0300 Subject: [PATCH] fix: weight language graph by repo language proportions Each commit was counted once per language in the repo regardless of that language's share, so Shell (present in many repos as small deploy scripts) appeared larger than Rust. Now weights each commit by the language's byte proportion in the repo (e.g. a commit to a 95% Rust / 5% Shell repo contributes 0.95 to Rust, 0.05 to Shell). Co-Authored-By: Claude Opus 4.6 (1M context) --- crates/moments-data/src/lib.rs | 70 +++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 30 deletions(-) diff --git a/crates/moments-data/src/lib.rs b/crates/moments-data/src/lib.rs index 4edf34e..9fb2ffb 100644 --- a/crates/moments-data/src/lib.rs +++ b/crates/moments-data/src/lib.rs @@ -229,36 +229,46 @@ impl EventReader for PgStore { async fn language_daily_counts(&self, from: NaiveDate, to: NaiveDate) -> Result, StoreError> { let rows = sqlx::query( r#" - SELECT d::date AS date, - rl.language, - COALESCE(MAX(rl.color), - (SELECT color FROM repo_languages - WHERE language = rl.language AND color IS NOT NULL - LIMIT 1) - ) AS color, - COUNT(e.id)::bigint AS commits - FROM generate_series($1::date, $2::date, '1 day') d - JOIN events e - ON e.occurred_at >= (d::date || 'T00:00:00Z')::timestamptz - AND e.occurred_at < ((d::date + 1) || 'T00:00:00Z')::timestamptz - AND e.public = true - AND e.action IN ('Commit', 'PushEvent', 'commit_repo') - JOIN repo_languages rl - ON rl.source = e.source - AND rl.repo = CASE e.source - WHEN 'github' THEN COALESCE( - e.payload->'repo'->>'name', - e.payload->'repository'->>'full_name', - e.payload->>'_repo' - ) - WHEN 'gitea' THEN COALESCE( - e.payload->'repo'->>'full_name', - e.payload->'repo'->>'name' - ) - ELSE NULL - END - GROUP BY d::date, rl.language - ORDER BY d::date, commits DESC + SELECT date, language, color, + ROUND(SUM(weight))::bigint AS commits + FROM ( + SELECT d::date AS date, + rl.language, + COALESCE(rl.color, + (SELECT color FROM repo_languages + WHERE language = rl.language AND color IS NOT NULL + LIMIT 1) + ) AS color, + rl.bytes::float / NULLIF(rt.total, 0) AS weight + FROM generate_series($1::date, $2::date, '1 day') d + JOIN events e + ON e.occurred_at >= (d::date || 'T00:00:00Z')::timestamptz + AND e.occurred_at < ((d::date + 1) || 'T00:00:00Z')::timestamptz + AND e.public = true + AND e.action IN ('Commit', 'PushEvent', 'commit_repo') + JOIN repo_languages rl + ON rl.source = e.source + AND rl.repo = CASE e.source + WHEN 'github' THEN COALESCE( + e.payload->'repo'->>'name', + e.payload->'repository'->>'full_name', + e.payload->>'_repo' + ) + WHEN 'gitea' THEN COALESCE( + e.payload->'repo'->>'full_name', + e.payload->'repo'->>'name' + ) + ELSE NULL + END + JOIN LATERAL ( + SELECT SUM(bytes)::float AS total + FROM repo_languages r2 + WHERE r2.source = rl.source AND r2.repo = rl.repo + ) rt ON true + ) weighted + GROUP BY date, language, color + HAVING ROUND(SUM(weight)) > 0 + ORDER BY date, commits DESC "#, ) .bind(from)