fix: weight language graph by repo language proportions
Each commit was counted once per language in the repo regardless of that language's share, so Shell (present in many repos as small deploy scripts) appeared larger than Rust. Now weights each commit by the language's byte proportion in the repo (e.g. a commit to a 95% Rust / 5% Shell repo contributes 0.95 to Rust, 0.05 to Shell). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -229,36 +229,46 @@ impl EventReader for PgStore {
|
|||||||
async fn language_daily_counts(&self, from: NaiveDate, to: NaiveDate) -> Result<Vec<LanguageDailyCount>, StoreError> {
|
async fn language_daily_counts(&self, from: NaiveDate, to: NaiveDate) -> Result<Vec<LanguageDailyCount>, StoreError> {
|
||||||
let rows = sqlx::query(
|
let rows = sqlx::query(
|
||||||
r#"
|
r#"
|
||||||
SELECT d::date AS date,
|
SELECT date, language, color,
|
||||||
rl.language,
|
ROUND(SUM(weight))::bigint AS commits
|
||||||
COALESCE(MAX(rl.color),
|
FROM (
|
||||||
(SELECT color FROM repo_languages
|
SELECT d::date AS date,
|
||||||
WHERE language = rl.language AND color IS NOT NULL
|
rl.language,
|
||||||
LIMIT 1)
|
COALESCE(rl.color,
|
||||||
) AS color,
|
(SELECT color FROM repo_languages
|
||||||
COUNT(e.id)::bigint AS commits
|
WHERE language = rl.language AND color IS NOT NULL
|
||||||
FROM generate_series($1::date, $2::date, '1 day') d
|
LIMIT 1)
|
||||||
JOIN events e
|
) AS color,
|
||||||
ON e.occurred_at >= (d::date || 'T00:00:00Z')::timestamptz
|
rl.bytes::float / NULLIF(rt.total, 0) AS weight
|
||||||
AND e.occurred_at < ((d::date + 1) || 'T00:00:00Z')::timestamptz
|
FROM generate_series($1::date, $2::date, '1 day') d
|
||||||
AND e.public = true
|
JOIN events e
|
||||||
AND e.action IN ('Commit', 'PushEvent', 'commit_repo')
|
ON e.occurred_at >= (d::date || 'T00:00:00Z')::timestamptz
|
||||||
JOIN repo_languages rl
|
AND e.occurred_at < ((d::date + 1) || 'T00:00:00Z')::timestamptz
|
||||||
ON rl.source = e.source
|
AND e.public = true
|
||||||
AND rl.repo = CASE e.source
|
AND e.action IN ('Commit', 'PushEvent', 'commit_repo')
|
||||||
WHEN 'github' THEN COALESCE(
|
JOIN repo_languages rl
|
||||||
e.payload->'repo'->>'name',
|
ON rl.source = e.source
|
||||||
e.payload->'repository'->>'full_name',
|
AND rl.repo = CASE e.source
|
||||||
e.payload->>'_repo'
|
WHEN 'github' THEN COALESCE(
|
||||||
)
|
e.payload->'repo'->>'name',
|
||||||
WHEN 'gitea' THEN COALESCE(
|
e.payload->'repository'->>'full_name',
|
||||||
e.payload->'repo'->>'full_name',
|
e.payload->>'_repo'
|
||||||
e.payload->'repo'->>'name'
|
)
|
||||||
)
|
WHEN 'gitea' THEN COALESCE(
|
||||||
ELSE NULL
|
e.payload->'repo'->>'full_name',
|
||||||
END
|
e.payload->'repo'->>'name'
|
||||||
GROUP BY d::date, rl.language
|
)
|
||||||
ORDER BY d::date, commits DESC
|
ELSE NULL
|
||||||
|
END
|
||||||
|
JOIN LATERAL (
|
||||||
|
SELECT SUM(bytes)::float AS total
|
||||||
|
FROM repo_languages r2
|
||||||
|
WHERE r2.source = rl.source AND r2.repo = rl.repo
|
||||||
|
) rt ON true
|
||||||
|
) weighted
|
||||||
|
GROUP BY date, language, color
|
||||||
|
HAVING ROUND(SUM(weight)) > 0
|
||||||
|
ORDER BY date, commits DESC
|
||||||
"#,
|
"#,
|
||||||
)
|
)
|
||||||
.bind(from)
|
.bind(from)
|
||||||
|
|||||||
Reference in New Issue
Block a user