fix: weight language graph by repo language proportions

Each commit was counted once per language in the repo regardless of
that language's share, so Shell (present in many repos as small
deploy scripts) appeared larger than Rust. Now weights each commit
by the language's byte proportion in the repo (e.g. a commit to a
95% Rust / 5% Shell repo contributes 0.95 to Rust, 0.05 to Shell).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-06 06:59:47 +03:00
parent ee93429317
commit 14643273c0

View File

@@ -229,14 +229,17 @@ impl EventReader for PgStore {
async fn language_daily_counts(&self, from: NaiveDate, to: NaiveDate) -> Result<Vec<LanguageDailyCount>, StoreError> { async fn language_daily_counts(&self, from: NaiveDate, to: NaiveDate) -> Result<Vec<LanguageDailyCount>, StoreError> {
let rows = sqlx::query( let rows = sqlx::query(
r#" r#"
SELECT date, language, color,
ROUND(SUM(weight))::bigint AS commits
FROM (
SELECT d::date AS date, SELECT d::date AS date,
rl.language, rl.language,
COALESCE(MAX(rl.color), COALESCE(rl.color,
(SELECT color FROM repo_languages (SELECT color FROM repo_languages
WHERE language = rl.language AND color IS NOT NULL WHERE language = rl.language AND color IS NOT NULL
LIMIT 1) LIMIT 1)
) AS color, ) AS color,
COUNT(e.id)::bigint AS commits rl.bytes::float / NULLIF(rt.total, 0) AS weight
FROM generate_series($1::date, $2::date, '1 day') d FROM generate_series($1::date, $2::date, '1 day') d
JOIN events e JOIN events e
ON e.occurred_at >= (d::date || 'T00:00:00Z')::timestamptz ON e.occurred_at >= (d::date || 'T00:00:00Z')::timestamptz
@@ -257,8 +260,15 @@ impl EventReader for PgStore {
) )
ELSE NULL ELSE NULL
END END
GROUP BY d::date, rl.language JOIN LATERAL (
ORDER BY d::date, commits DESC SELECT SUM(bytes)::float AS total
FROM repo_languages r2
WHERE r2.source = rl.source AND r2.repo = rl.repo
) rt ON true
) weighted
GROUP BY date, language, color
HAVING ROUND(SUM(weight)) > 0
ORDER BY date, commits DESC
"#, "#,
) )
.bind(from) .bind(from)