feat: language stream graph on dashboard

Full-stack feature showing programming languages by commit activity
as a stream graph on the dashboard.

Backend:
- migration: repo_languages table (source, repo, language, bytes, color)
- worker: fetch language breakdowns via GitHub GraphQL (batched,
  20 repos/request) and Gitea REST API during poll cycles
- API: GET /v1/languages/daily (daily commit counts per language),
  GET /v1/languages/repos (all stored repo language data)
- fix timezone bug in daily_counts and language_daily_counts: the
  PostgreSQL server timezone (Europe/Sofia, UTC+3) shifted day
  boundaries, miscounting events near midnight. Now uses explicit
  UTC boundaries in generate_series JOINs.
- use per-source CASE for repo name extraction in language query
  to match gitea payload structure (repo.full_name vs repo.name)
- Gitea languages use GitHub colors via COALESCE fallback

Frontend:
- LanguageStreamGraph component: pure SVG stream graph, weekly
  buckets, centered baseline, top 8 languages + Other, GitHub
  canonical language colors, legend with color dots
- DashPage/ProjectPage: fetch repo languages once via new endpoint
  instead of per-repo forge proxy calls (eliminates 200+ GitHub
  API calls and 403 rate limit errors)
- removed fetchLanguages forge proxy wrapper (dead code)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-06 06:27:59 +03:00
parent c66aaeb268
commit ee93429317
12 changed files with 604 additions and 63 deletions

View File

@@ -11,7 +11,7 @@ use chrono::{DateTime, Datelike, NaiveDate, Utc};
use clap::Parser;
use moments_core::{EventReader, reshape};
use moments_data::PgStore;
use moments_entities::{DailyCount, EventQuery, ProjectSummary, Source, SourceSummary, TimelineItem};
use moments_entities::{DailyCount, EventQuery, LanguageDailyCount, ProjectSummary, RepoLanguage, Source, SourceSummary, TimelineItem};
use serde::Deserialize;
use tower_http::{cors::CorsLayer, trace::TraceLayer};
use tracing::info;
@@ -57,6 +57,8 @@ async fn main() -> anyhow::Result<()> {
.route("/v1/sources", get(list_sources))
.route("/v1/projects", get(list_projects))
.route("/v1/activity/daily", get(daily_counts))
.route("/v1/languages/daily", get(language_daily_counts))
.route("/v1/languages/repos", get(repo_languages))
.route("/v1/forge/{source}/{*rest}", get(forge_proxy))
.route("/v1/og/contributions.png", get(og_contributions))
.with_state(state)
@@ -157,6 +159,23 @@ async fn daily_counts(
Ok(Json(counts))
}
async fn language_daily_counts(
State(state): State<AppState>,
Query(params): Query<DailyCountsParams>,
) -> Result<Json<Vec<LanguageDailyCount>>, ApiError> {
let to = params.to.unwrap_or_else(|| Utc::now().date_naive());
let from = params.from.unwrap_or_else(|| to - chrono::Duration::days(365));
let counts = state.store.language_daily_counts(from, to).await.map_err(internal)?;
Ok(Json(counts))
}
async fn repo_languages(
State(state): State<AppState>,
) -> Result<Json<Vec<RepoLanguage>>, ApiError> {
let langs = state.store.repo_languages().await.map_err(internal)?;
Ok(Json(langs))
}
async fn og_contributions(
State(state): State<AppState>,
) -> Result<impl IntoResponse, ApiError> {

View File

@@ -6,7 +6,7 @@ pub use sources::{EventSource, PollerState, PollerStateStore, SourceError, run_p
use async_trait::async_trait;
use chrono::NaiveDate;
use moments_entities::{DailyCount, Event, EventQuery, ProjectSummary, SourceSummary};
use moments_entities::{DailyCount, Event, EventQuery, LanguageDailyCount, ProjectSummary, RepoLanguage, SourceSummary};
#[derive(Debug, thiserror::Error)]
pub enum StoreError {
@@ -21,10 +21,13 @@ pub trait EventReader: Send + Sync {
async fn source_summaries(&self, include_private: bool) -> Result<Vec<SourceSummary>, StoreError>;
async fn list_projects(&self) -> Result<Vec<ProjectSummary>, StoreError>;
async fn daily_counts(&self, from: NaiveDate, to: NaiveDate) -> Result<Vec<DailyCount>, StoreError>;
async fn language_daily_counts(&self, from: NaiveDate, to: NaiveDate) -> Result<Vec<LanguageDailyCount>, StoreError>;
async fn repo_languages(&self) -> Result<Vec<RepoLanguage>, StoreError>;
}
/// Write-side port consumed by `moments-worker`. Idempotent upserts on `id`.
#[async_trait]
pub trait EventWriter: Send + Sync {
async fn upsert_events(&self, events: &[Event]) -> Result<usize, StoreError>;
async fn upsert_repo_languages(&self, languages: &[RepoLanguage]) -> Result<usize, StoreError>;
}

View File

@@ -0,0 +1,9 @@
CREATE TABLE repo_languages (
source TEXT NOT NULL,
repo TEXT NOT NULL,
language TEXT NOT NULL,
bytes BIGINT NOT NULL,
color TEXT,
fetched_at TIMESTAMPTZ NOT NULL DEFAULT now(),
PRIMARY KEY (source, repo, language)
);

View File

@@ -9,12 +9,13 @@
//! Each item carries a self-contained payload — including the event-emitting
//! host — so the reshape layer can construct URLs without needing config.
use std::collections::HashSet;
use std::sync::Arc;
use async_trait::async_trait;
use chrono::{DateTime, Utc};
use moments_core::{EventSource, EventWriter, PollerStateStore, SourceError};
use moments_entities::{Event, Source};
use moments_entities::{Event, RepoLanguage, Source};
use reqwest::{Client, header};
use serde_json::Value;
use tracing::debug;
@@ -126,17 +127,19 @@ impl GiteaSource {
/// for org feeds which contain all members' activity).
///
/// `base_url` should contain everything except the `&page=N` suffix.
/// Returns (ingested_count, set_of_repo_full_names).
async fn poll_feed(
&self,
state_key: &str,
base_url: &str,
filter_user: bool,
) -> Result<usize, SourceError> {
) -> Result<(usize, HashSet<String>), SourceError> {
let prior = self.state.load(state_key).await?;
let first_run = prior.is_none();
let max_pages = if first_run { MAX_BACKFILL_PAGES } else { 1 };
let mut total = 0usize;
let mut repos = HashSet::new();
for page in 1..=max_pages {
let url = format!("{base_url}&page={page}");
let req = self.apply_headers(self.client.get(&url));
@@ -155,6 +158,17 @@ impl GiteaSource {
break;
}
// Collect repo names from feed items
for item in &items {
if let Some(name) = item
.get("repo")
.and_then(|r| r.get("full_name"))
.and_then(Value::as_str)
{
repos.insert(name.to_string());
}
}
let events: Vec<Event> = items
.iter()
.filter(|it| {
@@ -177,6 +191,44 @@ impl GiteaSource {
}
self.state.touch(state_key).await?;
Ok((total, repos))
}
/// Fetch language breakdowns for the given repos via the Gitea REST API.
async fn fetch_languages(&self, repos: &HashSet<String>) -> Result<usize, SourceError> {
let mut total = 0usize;
for repo in repos {
let url = format!(
"https://{}/api/v1/repos/{}/languages",
self.config.host, repo
);
let req = self.apply_headers(self.client.get(&url));
let resp = req
.send()
.await
.map_err(|e| SourceError::Http(e.to_string()))?;
if !resp.status().is_success() {
tracing::warn!(repo = %repo, status = %resp.status(), "gitea language fetch failed; skipping");
continue;
}
let lang_map: std::collections::HashMap<String, i64> = resp
.json()
.await
.map_err(|e| SourceError::Parse(e.to_string()))?;
let languages: Vec<RepoLanguage> = lang_map
.into_iter()
.map(|(language, bytes)| RepoLanguage {
source: Source::Gitea,
repo: repo.clone(),
language,
bytes,
color: None, // Gitea doesn't return colors
})
.collect();
total += self.writer.upsert_repo_languages(&languages).await?;
}
debug!(total, repos = repos.len(), "gitea repo languages updated");
Ok(total)
}
}
@@ -188,9 +240,12 @@ impl EventSource for GiteaSource {
}
async fn poll(&self) -> Result<usize, SourceError> {
let mut all_repos = HashSet::new();
// Poll user's own activity feed (existing behavior).
let user_url = self.user_feed_base_url();
let mut total = self.poll_feed(SOURCE_NAME, &user_url, false).await?;
let (mut total, repos) = self.poll_feed(SOURCE_NAME, &user_url, false).await?;
all_repos.extend(repos);
// Discover orgs and poll each org's activity feed, filtering for
// events performed by this user.
@@ -199,13 +254,20 @@ impl EventSource for GiteaSource {
let state_key = format!("gitea:org:{org}");
let org_url = self.org_feed_base_url(org);
match self.poll_feed(&state_key, &org_url, true).await {
Ok(n) => total += n,
Ok((n, repos)) => {
total += n;
all_repos.extend(repos);
}
Err(e) => {
tracing::warn!(org = %org, error = %e, "failed to poll org feed");
}
}
}
if let Err(e) = self.fetch_languages(&all_repos).await {
tracing::warn!(error = %e, "gitea language fetch failed; continuing");
}
debug!(ingested = total, orgs = orgs.len(), "gitea poll complete");
Ok(total)
}

View File

@@ -20,7 +20,7 @@ use std::sync::Arc;
use async_trait::async_trait;
use chrono::{DateTime, Utc};
use moments_core::{EventSource, EventWriter, PollerStateStore, SourceError};
use moments_entities::{Event, Source};
use moments_entities::{Event, RepoLanguage, Source};
use reqwest::{Client, header};
use serde_json::Value;
use tracing::{debug, warn};
@@ -296,6 +296,105 @@ impl GithubRepoSource {
self.state.save(&state_key, None, newest).await?;
Ok(total)
}
/// Batch-fetch language breakdowns for repos via GraphQL, upserting
/// into repo_languages. Repos are batched using GraphQL aliases to
/// minimise round trips.
async fn fetch_languages(&self, repos: &[Repo]) -> Result<usize, SourceError> {
let token = match &self.config.token {
Some(t) => t,
None => return Ok(0),
};
let mut total = 0usize;
for chunk in repos.chunks(20) {
let mut fragments = Vec::with_capacity(chunk.len());
for (i, repo) in chunk.iter().enumerate() {
let parts: Vec<&str> = repo.full_name.splitn(2, '/').collect();
if parts.len() != 2 {
continue;
}
fragments.push(format!(
r#"r{i}: repository(owner: "{}", name: "{}") {{ languages(first: 20, orderBy: {{field: SIZE, direction: DESC}}) {{ edges {{ size node {{ name color }} }} }} }}"#,
parts[0], parts[1]
));
}
if fragments.is_empty() {
continue;
}
let query = format!("{{ {} }}", fragments.join(" "));
let body = serde_json::json!({ "query": query });
let resp = self
.client
.post("https://api.github.com/graphql")
.header(header::AUTHORIZATION, format!("Bearer {token}"))
.header(header::USER_AGENT, USER_AGENT)
.header(header::CONTENT_TYPE, "application/json")
.json(&body)
.send()
.await
.map_err(|e| SourceError::Http(e.to_string()))?;
if !resp.status().is_success() {
warn!(status = %resp.status(), "GraphQL language fetch failed");
break;
}
let data: Value = resp
.json()
.await
.map_err(|e| SourceError::Parse(e.to_string()))?;
if let Some(errors) = data.get("errors").and_then(Value::as_array) {
if let Some(msg) = errors.first().and_then(|e| e.get("message")).and_then(Value::as_str) {
warn!(error = %msg, "GraphQL language fetch had errors");
}
}
let data_obj = match data.get("data") {
Some(d) => d,
None => continue,
};
let mut languages = Vec::new();
for (i, repo) in chunk.iter().enumerate() {
let alias = format!("r{i}");
let edges = data_obj
.get(&alias)
.and_then(|r| r.get("languages"))
.and_then(|l| l.get("edges"))
.and_then(Value::as_array);
if let Some(edges) = edges {
for edge in edges {
let size = edge.get("size").and_then(Value::as_i64).unwrap_or(0);
let name = edge
.get("node")
.and_then(|n| n.get("name"))
.and_then(Value::as_str);
let color = edge
.get("node")
.and_then(|n| n.get("color"))
.and_then(Value::as_str);
if let Some(name) = name {
languages.push(RepoLanguage {
source: Source::Github,
repo: repo.full_name.clone(),
language: name.to_string(),
bytes: size,
color: color.map(String::from),
});
}
}
}
}
total += self.writer.upsert_repo_languages(&languages).await?;
}
debug!(total, "repo languages updated");
Ok(total)
}
}
#[async_trait]
@@ -327,6 +426,10 @@ impl EventSource for GithubRepoSource {
}
}
if let Err(e) = self.fetch_languages(&repos).await {
warn!(error = %e, "language fetch failed; continuing");
}
self.state.touch(SOURCE_NAME).await?;
debug!(ingested = total, repos = repos.len(), "github-repo poll complete");
Ok(total)

View File

@@ -248,6 +248,12 @@ mod tests {
) -> Result<usize, moments_core::StoreError> {
Ok(0)
}
async fn upsert_repo_languages(
&self,
_languages: &[moments_entities::RepoLanguage],
) -> Result<usize, moments_core::StoreError> {
Ok(0)
}
}
struct NoopState;
#[async_trait]

View File

@@ -9,7 +9,7 @@ use async_trait::async_trait;
use chrono::{DateTime, Utc};
use moments_core::{EventReader, EventWriter, PollerState, PollerStateStore, StoreError};
use chrono::NaiveDate;
use moments_entities::{DailyCount, Event, EventQuery, ProjectSummary, Source, SourceSummary};
use moments_entities::{DailyCount, Event, EventQuery, LanguageDailyCount, ProjectSummary, RepoLanguage, Source, SourceSummary};
use sqlx::Row;
use sqlx::postgres::{PgPool, PgPoolOptions};
use std::str::FromStr;
@@ -203,7 +203,8 @@ impl EventReader for PgStore {
COUNT(e.id)::bigint AS count
FROM generate_series($1::date, $2::date, '1 day') d
LEFT JOIN events e
ON e.occurred_at >= d AND e.occurred_at < d + interval '1 day'
ON e.occurred_at >= (d::date || 'T00:00:00Z')::timestamptz
AND e.occurred_at < ((d::date + 1) || 'T00:00:00Z')::timestamptz
AND e.public = true
GROUP BY d::date
ORDER BY d::date
@@ -224,6 +225,90 @@ impl EventReader for PgStore {
})
.collect()
}
async fn language_daily_counts(&self, from: NaiveDate, to: NaiveDate) -> Result<Vec<LanguageDailyCount>, StoreError> {
let rows = sqlx::query(
r#"
SELECT d::date AS date,
rl.language,
COALESCE(MAX(rl.color),
(SELECT color FROM repo_languages
WHERE language = rl.language AND color IS NOT NULL
LIMIT 1)
) AS color,
COUNT(e.id)::bigint AS commits
FROM generate_series($1::date, $2::date, '1 day') d
JOIN events e
ON e.occurred_at >= (d::date || 'T00:00:00Z')::timestamptz
AND e.occurred_at < ((d::date + 1) || 'T00:00:00Z')::timestamptz
AND e.public = true
AND e.action IN ('Commit', 'PushEvent', 'commit_repo')
JOIN repo_languages rl
ON rl.source = e.source
AND rl.repo = CASE e.source
WHEN 'github' THEN COALESCE(
e.payload->'repo'->>'name',
e.payload->'repository'->>'full_name',
e.payload->>'_repo'
)
WHEN 'gitea' THEN COALESCE(
e.payload->'repo'->>'full_name',
e.payload->'repo'->>'name'
)
ELSE NULL
END
GROUP BY d::date, rl.language
ORDER BY d::date, commits DESC
"#,
)
.bind(from)
.bind(to)
.fetch_all(&self.pool)
.await
.map_err(map_err)?;
rows.into_iter()
.map(|r| {
Ok(LanguageDailyCount {
date: r.try_get("date").map_err(map_err)?,
language: r.try_get("language").map_err(map_err)?,
color: r.try_get("color").map_err(map_err)?,
commits: r.try_get("commits").map_err(map_err)?,
})
})
.collect()
}
async fn repo_languages(&self) -> Result<Vec<RepoLanguage>, StoreError> {
let rows = sqlx::query(
r#"
SELECT source, repo, language, bytes,
COALESCE(color,
(SELECT color FROM repo_languages r2
WHERE r2.language = repo_languages.language AND r2.color IS NOT NULL
LIMIT 1)
) AS color
FROM repo_languages
ORDER BY repo, bytes DESC
"#,
)
.fetch_all(&self.pool)
.await
.map_err(map_err)?;
rows.into_iter()
.map(|r| {
let source_str: String = r.try_get("source").map_err(map_err)?;
Ok(RepoLanguage {
source: Source::from_str(&source_str).map_err(map_err)?,
repo: r.try_get("repo").map_err(map_err)?,
language: r.try_get("language").map_err(map_err)?,
bytes: r.try_get("bytes").map_err(map_err)?,
color: r.try_get("color").map_err(map_err)?,
})
})
.collect()
}
}
#[async_trait]
@@ -331,4 +416,37 @@ impl EventWriter for PgStore {
tx.commit().await.map_err(map_err)?;
Ok(inserted)
}
async fn upsert_repo_languages(&self, languages: &[RepoLanguage]) -> Result<usize, StoreError> {
if languages.is_empty() {
return Ok(0);
}
let mut tx = self.pool.begin().await.map_err(map_err)?;
let mut count = 0usize;
for lang in languages {
let n = sqlx::query(
r#"
INSERT INTO repo_languages (source, repo, language, bytes, color, fetched_at)
VALUES ($1, $2, $3, $4, $5, now())
ON CONFLICT (source, repo, language) DO UPDATE
SET bytes = EXCLUDED.bytes,
color = EXCLUDED.color,
fetched_at = EXCLUDED.fetched_at
"#,
)
.bind(lang.source.as_str())
.bind(&lang.repo)
.bind(&lang.language)
.bind(lang.bytes)
.bind(&lang.color)
.execute(&mut *tx)
.await
.map_err(map_err)?
.rows_affected();
count += n as usize;
}
tx.commit().await.map_err(map_err)?;
Ok(count)
}
}

View File

@@ -104,6 +104,25 @@ pub struct ProjectSummary {
pub last_activity: Option<DateTime<Utc>>,
}
/// Per-language daily commit count for the language stream graph.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LanguageDailyCount {
pub date: chrono::NaiveDate,
pub language: String,
pub color: Option<String>,
pub commits: i64,
}
/// Per-repo language breakdown from the forge.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RepoLanguage {
pub source: Source,
pub repo: String,
pub language: String,
pub bytes: i64,
pub color: Option<String>,
}
// ---------------------------------------------------------------------
// Presentation shape — what `GET /v1/events` actually returns.
// The API reshapes raw payloads into these so the frontend stays dumb.