fix: weight language graph by repo language proportions
Each commit was counted once per language in the repo regardless of that language's share, so Shell (present in many repos as small deploy scripts) appeared larger than Rust. Now weights each commit by the language's byte proportion in the repo (e.g. a commit to a 95% Rust / 5% Shell repo contributes 0.95 to Rust, 0.05 to Shell). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -229,14 +229,17 @@ impl EventReader for PgStore {
|
||||
async fn language_daily_counts(&self, from: NaiveDate, to: NaiveDate) -> Result<Vec<LanguageDailyCount>, StoreError> {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT date, language, color,
|
||||
ROUND(SUM(weight))::bigint AS commits
|
||||
FROM (
|
||||
SELECT d::date AS date,
|
||||
rl.language,
|
||||
COALESCE(MAX(rl.color),
|
||||
COALESCE(rl.color,
|
||||
(SELECT color FROM repo_languages
|
||||
WHERE language = rl.language AND color IS NOT NULL
|
||||
LIMIT 1)
|
||||
) AS color,
|
||||
COUNT(e.id)::bigint AS commits
|
||||
rl.bytes::float / NULLIF(rt.total, 0) AS weight
|
||||
FROM generate_series($1::date, $2::date, '1 day') d
|
||||
JOIN events e
|
||||
ON e.occurred_at >= (d::date || 'T00:00:00Z')::timestamptz
|
||||
@@ -257,8 +260,15 @@ impl EventReader for PgStore {
|
||||
)
|
||||
ELSE NULL
|
||||
END
|
||||
GROUP BY d::date, rl.language
|
||||
ORDER BY d::date, commits DESC
|
||||
JOIN LATERAL (
|
||||
SELECT SUM(bytes)::float AS total
|
||||
FROM repo_languages r2
|
||||
WHERE r2.source = rl.source AND r2.repo = rl.repo
|
||||
) rt ON true
|
||||
) weighted
|
||||
GROUP BY date, language, color
|
||||
HAVING ROUND(SUM(weight)) > 0
|
||||
ORDER BY date, commits DESC
|
||||
"#,
|
||||
)
|
||||
.bind(from)
|
||||
|
||||
Reference in New Issue
Block a user