fix: weight language graph by repo language proportions

Each commit was counted once per language in the repo regardless of
that language's share, so Shell (present in many repos as small
deploy scripts) appeared larger than Rust. Now weights each commit
by the language's byte proportion in the repo (e.g. a commit to a
95% Rust / 5% Shell repo contributes 0.95 to Rust, 0.05 to Shell).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-06 06:59:47 +03:00
parent ee93429317
commit 14643273c0

View File

@@ -229,14 +229,17 @@ impl EventReader for PgStore {
async fn language_daily_counts(&self, from: NaiveDate, to: NaiveDate) -> Result<Vec<LanguageDailyCount>, StoreError> {
let rows = sqlx::query(
r#"
SELECT date, language, color,
ROUND(SUM(weight))::bigint AS commits
FROM (
SELECT d::date AS date,
rl.language,
COALESCE(MAX(rl.color),
COALESCE(rl.color,
(SELECT color FROM repo_languages
WHERE language = rl.language AND color IS NOT NULL
LIMIT 1)
) AS color,
COUNT(e.id)::bigint AS commits
rl.bytes::float / NULLIF(rt.total, 0) AS weight
FROM generate_series($1::date, $2::date, '1 day') d
JOIN events e
ON e.occurred_at >= (d::date || 'T00:00:00Z')::timestamptz
@@ -257,8 +260,15 @@ impl EventReader for PgStore {
)
ELSE NULL
END
GROUP BY d::date, rl.language
ORDER BY d::date, commits DESC
JOIN LATERAL (
SELECT SUM(bytes)::float AS total
FROM repo_languages r2
WHERE r2.source = rl.source AND r2.repo = rl.repo
) rt ON true
) weighted
GROUP BY date, language, color
HAVING ROUND(SUM(weight)) > 0
ORDER BY date, commits DESC
"#,
)
.bind(from)