feat: language stream graph on dashboard
Full-stack feature showing programming languages by commit activity as a stream graph on the dashboard. Backend: - migration: repo_languages table (source, repo, language, bytes, color) - worker: fetch language breakdowns via GitHub GraphQL (batched, 20 repos/request) and Gitea REST API during poll cycles - API: GET /v1/languages/daily (daily commit counts per language), GET /v1/languages/repos (all stored repo language data) - fix timezone bug in daily_counts and language_daily_counts: the PostgreSQL server timezone (Europe/Sofia, UTC+3) shifted day boundaries, miscounting events near midnight. Now uses explicit UTC boundaries in generate_series JOINs. - use per-source CASE for repo name extraction in language query to match gitea payload structure (repo.full_name vs repo.name) - Gitea languages use GitHub colors via COALESCE fallback Frontend: - LanguageStreamGraph component: pure SVG stream graph, weekly buckets, centered baseline, top 8 languages + Other, GitHub canonical language colors, legend with color dots - DashPage/ProjectPage: fetch repo languages once via new endpoint instead of per-repo forge proxy calls (eliminates 200+ GitHub API calls and 403 rate limit errors) - removed fetchLanguages forge proxy wrapper (dead code) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
9
crates/moments-data/migrations/0004_repo_languages.sql
Normal file
9
crates/moments-data/migrations/0004_repo_languages.sql
Normal file
@@ -0,0 +1,9 @@
|
||||
CREATE TABLE repo_languages (
|
||||
source TEXT NOT NULL,
|
||||
repo TEXT NOT NULL,
|
||||
language TEXT NOT NULL,
|
||||
bytes BIGINT NOT NULL,
|
||||
color TEXT,
|
||||
fetched_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
PRIMARY KEY (source, repo, language)
|
||||
);
|
||||
@@ -9,12 +9,13 @@
|
||||
//! Each item carries a self-contained payload — including the event-emitting
|
||||
//! host — so the reshape layer can construct URLs without needing config.
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use chrono::{DateTime, Utc};
|
||||
use moments_core::{EventSource, EventWriter, PollerStateStore, SourceError};
|
||||
use moments_entities::{Event, Source};
|
||||
use moments_entities::{Event, RepoLanguage, Source};
|
||||
use reqwest::{Client, header};
|
||||
use serde_json::Value;
|
||||
use tracing::debug;
|
||||
@@ -126,17 +127,19 @@ impl GiteaSource {
|
||||
/// for org feeds which contain all members' activity).
|
||||
///
|
||||
/// `base_url` should contain everything except the `&page=N` suffix.
|
||||
/// Returns (ingested_count, set_of_repo_full_names).
|
||||
async fn poll_feed(
|
||||
&self,
|
||||
state_key: &str,
|
||||
base_url: &str,
|
||||
filter_user: bool,
|
||||
) -> Result<usize, SourceError> {
|
||||
) -> Result<(usize, HashSet<String>), SourceError> {
|
||||
let prior = self.state.load(state_key).await?;
|
||||
let first_run = prior.is_none();
|
||||
let max_pages = if first_run { MAX_BACKFILL_PAGES } else { 1 };
|
||||
|
||||
let mut total = 0usize;
|
||||
let mut repos = HashSet::new();
|
||||
for page in 1..=max_pages {
|
||||
let url = format!("{base_url}&page={page}");
|
||||
let req = self.apply_headers(self.client.get(&url));
|
||||
@@ -155,6 +158,17 @@ impl GiteaSource {
|
||||
break;
|
||||
}
|
||||
|
||||
// Collect repo names from feed items
|
||||
for item in &items {
|
||||
if let Some(name) = item
|
||||
.get("repo")
|
||||
.and_then(|r| r.get("full_name"))
|
||||
.and_then(Value::as_str)
|
||||
{
|
||||
repos.insert(name.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
let events: Vec<Event> = items
|
||||
.iter()
|
||||
.filter(|it| {
|
||||
@@ -177,6 +191,44 @@ impl GiteaSource {
|
||||
}
|
||||
|
||||
self.state.touch(state_key).await?;
|
||||
Ok((total, repos))
|
||||
}
|
||||
|
||||
/// Fetch language breakdowns for the given repos via the Gitea REST API.
|
||||
async fn fetch_languages(&self, repos: &HashSet<String>) -> Result<usize, SourceError> {
|
||||
let mut total = 0usize;
|
||||
for repo in repos {
|
||||
let url = format!(
|
||||
"https://{}/api/v1/repos/{}/languages",
|
||||
self.config.host, repo
|
||||
);
|
||||
let req = self.apply_headers(self.client.get(&url));
|
||||
let resp = req
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| SourceError::Http(e.to_string()))?;
|
||||
if !resp.status().is_success() {
|
||||
tracing::warn!(repo = %repo, status = %resp.status(), "gitea language fetch failed; skipping");
|
||||
continue;
|
||||
}
|
||||
let lang_map: std::collections::HashMap<String, i64> = resp
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| SourceError::Parse(e.to_string()))?;
|
||||
|
||||
let languages: Vec<RepoLanguage> = lang_map
|
||||
.into_iter()
|
||||
.map(|(language, bytes)| RepoLanguage {
|
||||
source: Source::Gitea,
|
||||
repo: repo.clone(),
|
||||
language,
|
||||
bytes,
|
||||
color: None, // Gitea doesn't return colors
|
||||
})
|
||||
.collect();
|
||||
total += self.writer.upsert_repo_languages(&languages).await?;
|
||||
}
|
||||
debug!(total, repos = repos.len(), "gitea repo languages updated");
|
||||
Ok(total)
|
||||
}
|
||||
}
|
||||
@@ -188,9 +240,12 @@ impl EventSource for GiteaSource {
|
||||
}
|
||||
|
||||
async fn poll(&self) -> Result<usize, SourceError> {
|
||||
let mut all_repos = HashSet::new();
|
||||
|
||||
// Poll user's own activity feed (existing behavior).
|
||||
let user_url = self.user_feed_base_url();
|
||||
let mut total = self.poll_feed(SOURCE_NAME, &user_url, false).await?;
|
||||
let (mut total, repos) = self.poll_feed(SOURCE_NAME, &user_url, false).await?;
|
||||
all_repos.extend(repos);
|
||||
|
||||
// Discover orgs and poll each org's activity feed, filtering for
|
||||
// events performed by this user.
|
||||
@@ -199,13 +254,20 @@ impl EventSource for GiteaSource {
|
||||
let state_key = format!("gitea:org:{org}");
|
||||
let org_url = self.org_feed_base_url(org);
|
||||
match self.poll_feed(&state_key, &org_url, true).await {
|
||||
Ok(n) => total += n,
|
||||
Ok((n, repos)) => {
|
||||
total += n;
|
||||
all_repos.extend(repos);
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(org = %org, error = %e, "failed to poll org feed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Err(e) = self.fetch_languages(&all_repos).await {
|
||||
tracing::warn!(error = %e, "gitea language fetch failed; continuing");
|
||||
}
|
||||
|
||||
debug!(ingested = total, orgs = orgs.len(), "gitea poll complete");
|
||||
Ok(total)
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ use std::sync::Arc;
|
||||
use async_trait::async_trait;
|
||||
use chrono::{DateTime, Utc};
|
||||
use moments_core::{EventSource, EventWriter, PollerStateStore, SourceError};
|
||||
use moments_entities::{Event, Source};
|
||||
use moments_entities::{Event, RepoLanguage, Source};
|
||||
use reqwest::{Client, header};
|
||||
use serde_json::Value;
|
||||
use tracing::{debug, warn};
|
||||
@@ -296,6 +296,105 @@ impl GithubRepoSource {
|
||||
self.state.save(&state_key, None, newest).await?;
|
||||
Ok(total)
|
||||
}
|
||||
|
||||
/// Batch-fetch language breakdowns for repos via GraphQL, upserting
|
||||
/// into repo_languages. Repos are batched using GraphQL aliases to
|
||||
/// minimise round trips.
|
||||
async fn fetch_languages(&self, repos: &[Repo]) -> Result<usize, SourceError> {
|
||||
let token = match &self.config.token {
|
||||
Some(t) => t,
|
||||
None => return Ok(0),
|
||||
};
|
||||
|
||||
let mut total = 0usize;
|
||||
for chunk in repos.chunks(20) {
|
||||
let mut fragments = Vec::with_capacity(chunk.len());
|
||||
for (i, repo) in chunk.iter().enumerate() {
|
||||
let parts: Vec<&str> = repo.full_name.splitn(2, '/').collect();
|
||||
if parts.len() != 2 {
|
||||
continue;
|
||||
}
|
||||
fragments.push(format!(
|
||||
r#"r{i}: repository(owner: "{}", name: "{}") {{ languages(first: 20, orderBy: {{field: SIZE, direction: DESC}}) {{ edges {{ size node {{ name color }} }} }} }}"#,
|
||||
parts[0], parts[1]
|
||||
));
|
||||
}
|
||||
if fragments.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let query = format!("{{ {} }}", fragments.join(" "));
|
||||
let body = serde_json::json!({ "query": query });
|
||||
|
||||
let resp = self
|
||||
.client
|
||||
.post("https://api.github.com/graphql")
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header(header::USER_AGENT, USER_AGENT)
|
||||
.header(header::CONTENT_TYPE, "application/json")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| SourceError::Http(e.to_string()))?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
warn!(status = %resp.status(), "GraphQL language fetch failed");
|
||||
break;
|
||||
}
|
||||
|
||||
let data: Value = resp
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| SourceError::Parse(e.to_string()))?;
|
||||
|
||||
if let Some(errors) = data.get("errors").and_then(Value::as_array) {
|
||||
if let Some(msg) = errors.first().and_then(|e| e.get("message")).and_then(Value::as_str) {
|
||||
warn!(error = %msg, "GraphQL language fetch had errors");
|
||||
}
|
||||
}
|
||||
|
||||
let data_obj = match data.get("data") {
|
||||
Some(d) => d,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let mut languages = Vec::new();
|
||||
for (i, repo) in chunk.iter().enumerate() {
|
||||
let alias = format!("r{i}");
|
||||
let edges = data_obj
|
||||
.get(&alias)
|
||||
.and_then(|r| r.get("languages"))
|
||||
.and_then(|l| l.get("edges"))
|
||||
.and_then(Value::as_array);
|
||||
if let Some(edges) = edges {
|
||||
for edge in edges {
|
||||
let size = edge.get("size").and_then(Value::as_i64).unwrap_or(0);
|
||||
let name = edge
|
||||
.get("node")
|
||||
.and_then(|n| n.get("name"))
|
||||
.and_then(Value::as_str);
|
||||
let color = edge
|
||||
.get("node")
|
||||
.and_then(|n| n.get("color"))
|
||||
.and_then(Value::as_str);
|
||||
if let Some(name) = name {
|
||||
languages.push(RepoLanguage {
|
||||
source: Source::Github,
|
||||
repo: repo.full_name.clone(),
|
||||
language: name.to_string(),
|
||||
bytes: size,
|
||||
color: color.map(String::from),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
total += self.writer.upsert_repo_languages(&languages).await?;
|
||||
}
|
||||
|
||||
debug!(total, "repo languages updated");
|
||||
Ok(total)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -327,6 +426,10 @@ impl EventSource for GithubRepoSource {
|
||||
}
|
||||
}
|
||||
|
||||
if let Err(e) = self.fetch_languages(&repos).await {
|
||||
warn!(error = %e, "language fetch failed; continuing");
|
||||
}
|
||||
|
||||
self.state.touch(SOURCE_NAME).await?;
|
||||
debug!(ingested = total, repos = repos.len(), "github-repo poll complete");
|
||||
Ok(total)
|
||||
|
||||
@@ -248,6 +248,12 @@ mod tests {
|
||||
) -> Result<usize, moments_core::StoreError> {
|
||||
Ok(0)
|
||||
}
|
||||
async fn upsert_repo_languages(
|
||||
&self,
|
||||
_languages: &[moments_entities::RepoLanguage],
|
||||
) -> Result<usize, moments_core::StoreError> {
|
||||
Ok(0)
|
||||
}
|
||||
}
|
||||
struct NoopState;
|
||||
#[async_trait]
|
||||
|
||||
@@ -9,7 +9,7 @@ use async_trait::async_trait;
|
||||
use chrono::{DateTime, Utc};
|
||||
use moments_core::{EventReader, EventWriter, PollerState, PollerStateStore, StoreError};
|
||||
use chrono::NaiveDate;
|
||||
use moments_entities::{DailyCount, Event, EventQuery, ProjectSummary, Source, SourceSummary};
|
||||
use moments_entities::{DailyCount, Event, EventQuery, LanguageDailyCount, ProjectSummary, RepoLanguage, Source, SourceSummary};
|
||||
use sqlx::Row;
|
||||
use sqlx::postgres::{PgPool, PgPoolOptions};
|
||||
use std::str::FromStr;
|
||||
@@ -203,7 +203,8 @@ impl EventReader for PgStore {
|
||||
COUNT(e.id)::bigint AS count
|
||||
FROM generate_series($1::date, $2::date, '1 day') d
|
||||
LEFT JOIN events e
|
||||
ON e.occurred_at >= d AND e.occurred_at < d + interval '1 day'
|
||||
ON e.occurred_at >= (d::date || 'T00:00:00Z')::timestamptz
|
||||
AND e.occurred_at < ((d::date + 1) || 'T00:00:00Z')::timestamptz
|
||||
AND e.public = true
|
||||
GROUP BY d::date
|
||||
ORDER BY d::date
|
||||
@@ -224,6 +225,90 @@ impl EventReader for PgStore {
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
async fn language_daily_counts(&self, from: NaiveDate, to: NaiveDate) -> Result<Vec<LanguageDailyCount>, StoreError> {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT d::date AS date,
|
||||
rl.language,
|
||||
COALESCE(MAX(rl.color),
|
||||
(SELECT color FROM repo_languages
|
||||
WHERE language = rl.language AND color IS NOT NULL
|
||||
LIMIT 1)
|
||||
) AS color,
|
||||
COUNT(e.id)::bigint AS commits
|
||||
FROM generate_series($1::date, $2::date, '1 day') d
|
||||
JOIN events e
|
||||
ON e.occurred_at >= (d::date || 'T00:00:00Z')::timestamptz
|
||||
AND e.occurred_at < ((d::date + 1) || 'T00:00:00Z')::timestamptz
|
||||
AND e.public = true
|
||||
AND e.action IN ('Commit', 'PushEvent', 'commit_repo')
|
||||
JOIN repo_languages rl
|
||||
ON rl.source = e.source
|
||||
AND rl.repo = CASE e.source
|
||||
WHEN 'github' THEN COALESCE(
|
||||
e.payload->'repo'->>'name',
|
||||
e.payload->'repository'->>'full_name',
|
||||
e.payload->>'_repo'
|
||||
)
|
||||
WHEN 'gitea' THEN COALESCE(
|
||||
e.payload->'repo'->>'full_name',
|
||||
e.payload->'repo'->>'name'
|
||||
)
|
||||
ELSE NULL
|
||||
END
|
||||
GROUP BY d::date, rl.language
|
||||
ORDER BY d::date, commits DESC
|
||||
"#,
|
||||
)
|
||||
.bind(from)
|
||||
.bind(to)
|
||||
.fetch_all(&self.pool)
|
||||
.await
|
||||
.map_err(map_err)?;
|
||||
|
||||
rows.into_iter()
|
||||
.map(|r| {
|
||||
Ok(LanguageDailyCount {
|
||||
date: r.try_get("date").map_err(map_err)?,
|
||||
language: r.try_get("language").map_err(map_err)?,
|
||||
color: r.try_get("color").map_err(map_err)?,
|
||||
commits: r.try_get("commits").map_err(map_err)?,
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
async fn repo_languages(&self) -> Result<Vec<RepoLanguage>, StoreError> {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT source, repo, language, bytes,
|
||||
COALESCE(color,
|
||||
(SELECT color FROM repo_languages r2
|
||||
WHERE r2.language = repo_languages.language AND r2.color IS NOT NULL
|
||||
LIMIT 1)
|
||||
) AS color
|
||||
FROM repo_languages
|
||||
ORDER BY repo, bytes DESC
|
||||
"#,
|
||||
)
|
||||
.fetch_all(&self.pool)
|
||||
.await
|
||||
.map_err(map_err)?;
|
||||
|
||||
rows.into_iter()
|
||||
.map(|r| {
|
||||
let source_str: String = r.try_get("source").map_err(map_err)?;
|
||||
Ok(RepoLanguage {
|
||||
source: Source::from_str(&source_str).map_err(map_err)?,
|
||||
repo: r.try_get("repo").map_err(map_err)?,
|
||||
language: r.try_get("language").map_err(map_err)?,
|
||||
bytes: r.try_get("bytes").map_err(map_err)?,
|
||||
color: r.try_get("color").map_err(map_err)?,
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -331,4 +416,37 @@ impl EventWriter for PgStore {
|
||||
tx.commit().await.map_err(map_err)?;
|
||||
Ok(inserted)
|
||||
}
|
||||
|
||||
async fn upsert_repo_languages(&self, languages: &[RepoLanguage]) -> Result<usize, StoreError> {
|
||||
if languages.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let mut tx = self.pool.begin().await.map_err(map_err)?;
|
||||
let mut count = 0usize;
|
||||
for lang in languages {
|
||||
let n = sqlx::query(
|
||||
r#"
|
||||
INSERT INTO repo_languages (source, repo, language, bytes, color, fetched_at)
|
||||
VALUES ($1, $2, $3, $4, $5, now())
|
||||
ON CONFLICT (source, repo, language) DO UPDATE
|
||||
SET bytes = EXCLUDED.bytes,
|
||||
color = EXCLUDED.color,
|
||||
fetched_at = EXCLUDED.fetched_at
|
||||
"#,
|
||||
)
|
||||
.bind(lang.source.as_str())
|
||||
.bind(&lang.repo)
|
||||
.bind(&lang.language)
|
||||
.bind(lang.bytes)
|
||||
.bind(&lang.color)
|
||||
.execute(&mut *tx)
|
||||
.await
|
||||
.map_err(map_err)?
|
||||
.rows_affected();
|
||||
count += n as usize;
|
||||
}
|
||||
tx.commit().await.map_err(map_err)?;
|
||||
Ok(count)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user