Compare commits

...

3 Commits

Author SHA1 Message Date
2821548e6e feat(ui): add avg-by-hour panel to dashboard stats
Complements the existing avg-by-weekday chart with its orthogonal
partner: which hour of the day the user typically commits. The api
buckets events by EXTRACT(hour FROM occurred_at AT TIME ZONE $tz) so
the chart matches the clock the user sees rather than UTC; the UI
passes the browser's resolved IANA timezone. Renders as 24 mini-bars
below the weekday chart with labels every 4 hours and per-bar
tooltips showing the average events/day at that hour.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-20 16:34:17 +03:00
72eeb547af chore(deploy): self-heal /tmp perms before staging
frootmig periodically has its /tmp reset from the standard sticky-
world-writable 1777 to root-owned 0755 (cause not yet pinned down),
which breaks the unprivileged rsync of the deploy stage dir and
surfaces as a cryptic "Permission denied" plus a follow-on install
failure. Stat /tmp before each rsync and, if the mode is off, sudo
chmod it back to 1777 — visible in the deploy log so it's obvious
which host keeps drifting.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-20 07:59:06 +03:00
86411bb88e fix(worker): dedup gitea events from overlapping user and org feeds
Gitea writes one Action row per interested user-context. A push to an
org repo by user U produces two rows — one with user_id=U, one with
user_id=org — differing only in `id` and `user_id`. Polling both the
user feed and org feeds (which we do, and need to, since neither alone
catches every cross-namespace event) surfaced both rows; the
`gitea:{action_row_id}` id gave them distinct ids, so the upsert dedup
never fired and ~38% of events on org-repo project pages rendered
twice. Switch to a content-derived id keyed on (op_type, act_user_id,
repo_id, ref_name, comment_id, created) so the two rows collide on
upsert, and add a migration that re-keys existing rows to the same
formula while collapsing the duplicates already in the table.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-20 07:53:43 +03:00
9 changed files with 312 additions and 8 deletions

View File

@@ -11,7 +11,7 @@ use chrono::{DateTime, Datelike, NaiveDate, Utc};
use clap::Parser;
use moments_core::{EventReader, reshape};
use moments_data::PgStore;
use moments_entities::{DailyCount, EventQuery, LanguageDailyCount, ProjectSummary, RepoLanguage, Source, SourceSummary, TimelineItem};
use moments_entities::{DailyCount, EventQuery, HourlyAvg, LanguageDailyCount, ProjectSummary, RepoLanguage, Source, SourceSummary, TimelineItem};
use serde::Deserialize;
use tower_http::{cors::CorsLayer, trace::TraceLayer};
use tracing::info;
@@ -57,6 +57,7 @@ async fn main() -> anyhow::Result<()> {
.route("/v1/sources", get(list_sources))
.route("/v1/projects", get(list_projects))
.route("/v1/activity/daily", get(daily_counts))
.route("/v1/activity/hourly", get(hourly_avgs))
.route("/v1/languages/daily", get(language_daily_counts))
.route("/v1/languages/repos", get(repo_languages))
.route("/v1/forge/{source}/{*rest}", get(forge_proxy))
@@ -169,6 +170,38 @@ async fn language_daily_counts(
Ok(Json(counts))
}
#[derive(Debug, Deserialize)]
struct HourlyAvgsParams {
from: Option<NaiveDate>,
to: Option<NaiveDate>,
/// IANA timezone name (e.g. "Europe/Helsinki"). Defaults to UTC.
/// Hour buckets are computed in this zone so the chart matches the
/// clock the user sees.
tz: Option<String>,
}
async fn hourly_avgs(
State(state): State<AppState>,
Query(params): Query<HourlyAvgsParams>,
) -> Result<Json<Vec<HourlyAvg>>, ApiError> {
let to = params.to.unwrap_or_else(|| Utc::now().date_naive());
let from = params.from.unwrap_or_else(|| to - chrono::Duration::days(365));
let tz = params.tz.as_deref().unwrap_or("UTC");
// Validate the tz string before handing it to postgres — a bad name
// here would surface as an opaque 500 from the DB. chrono-tz would do
// it for free but we don't depend on it; instead reject obvious shell
// injection vectors (the value is bound, not interpolated, so this is
// belt-and-braces).
if tz.len() > 64 || tz.chars().any(|c| !(c.is_ascii_alphanumeric() || matches!(c, '/' | '_' | '+' | '-'))) {
return Err(ApiError {
status: StatusCode::BAD_REQUEST,
message: "invalid tz".into(),
});
}
let avgs = state.store.hourly_avgs(from, to, tz, /* include_private */ true).await.map_err(internal)?;
Ok(Json(avgs))
}
async fn repo_languages(
State(state): State<AppState>,
) -> Result<Json<Vec<RepoLanguage>>, ApiError> {

View File

@@ -6,7 +6,7 @@ pub use sources::{EventSource, PollerState, PollerStateStore, SourceError, run_p
use async_trait::async_trait;
use chrono::NaiveDate;
use moments_entities::{DailyCount, Event, EventQuery, LanguageDailyCount, ProjectSummary, RepoLanguage, SourceSummary};
use moments_entities::{DailyCount, Event, EventQuery, HourlyAvg, LanguageDailyCount, ProjectSummary, RepoLanguage, SourceSummary};
#[derive(Debug, thiserror::Error)]
pub enum StoreError {
@@ -22,6 +22,7 @@ pub trait EventReader: Send + Sync {
async fn list_projects(&self) -> Result<Vec<ProjectSummary>, StoreError>;
async fn daily_counts(&self, from: NaiveDate, to: NaiveDate, include_private: bool) -> Result<Vec<DailyCount>, StoreError>;
async fn language_daily_counts(&self, from: NaiveDate, to: NaiveDate, include_private: bool) -> Result<Vec<LanguageDailyCount>, StoreError>;
async fn hourly_avgs(&self, from: NaiveDate, to: NaiveDate, tz: &str, include_private: bool) -> Result<Vec<HourlyAvg>, StoreError>;
async fn repo_languages(&self) -> Result<Vec<RepoLanguage>, StoreError>;
}

View File

@@ -0,0 +1,55 @@
-- Collapse duplicate Gitea events introduced by polling both the user
-- activity feed and per-org activity feeds.
--
-- Gitea writes one Action row per interested user-context: a push to
-- helexa/cortex by user grenade produces two rows, one with
-- user_id=grenade and one with user_id=helexa. Everything else (op_type,
-- act_user_id, repo_id, ref_name, comment_id, created) is identical.
-- Our prior id scheme (gitea:{action_row_id}) gave them different ids,
-- so the upsert-on-id dedup never fired and the timeline rendered each
-- push twice.
--
-- This migration re-keys every existing gitea row to the same canonical
-- formula `parse_gitea_event` now emits, deleting duplicates encountered
-- along the way. Idempotent: running it again is a no-op because the
-- canonical id of a canonical id is itself.
-- Snapshot the canonical id for every gitea row.
CREATE TEMP TABLE _gitea_canonical AS
SELECT
id AS old_id,
'gitea:'
|| coalesce(payload->>'op_type', '') || ':'
|| coalesce(payload->>'act_user_id', payload->'act_user'->>'id', '0') || ':'
|| coalesce(payload->>'repo_id', payload->'repo'->>'id', '0') || ':'
|| coalesce(payload->>'ref_name', '') || ':'
|| coalesce(payload->>'comment_id', '0') || ':'
|| coalesce(payload->>'created', '')
AS new_id
FROM events
WHERE source = 'gitea';
-- For each canonical id, keep the row whose current id is lexicographically
-- smallest (stable, arbitrary tie-break) and delete the rest. The "old id
-- already matches the new id" case lands here too — DELETE skips it because
-- rn = 1 for any singleton group.
DELETE FROM events
WHERE id IN (
SELECT old_id FROM (
SELECT old_id, new_id,
row_number() OVER (PARTITION BY new_id ORDER BY old_id) AS rn
FROM _gitea_canonical
) ranked
WHERE rn > 1
);
-- Rename remaining rows to the canonical id. Postgres defers PK uniqueness
-- to statement end, so swapping ids across rows in one UPDATE is fine
-- provided the final set is unique (dedup above guarantees that).
UPDATE events e
SET id = c.new_id
FROM _gitea_canonical c
WHERE e.id = c.old_id
AND e.id <> c.new_id;
DROP TABLE _gitea_canonical;

View File

@@ -276,8 +276,14 @@ impl EventSource for GiteaSource {
/// Convert a Gitea activity feed item into our Event row. The host gets
/// stamped into the payload as `_host` so the reshape layer can build
/// web URLs without needing global config.
///
/// The id is content-derived rather than using Gitea's `id` field directly:
/// Gitea creates one Action row per interested user-context, so a push to
/// an org repo by user U produces two rows (one under U's context, one
/// under the org's), distinguished only by `id` and `user_id`. Keying on
/// `(op_type, act_user_id, repo_id, ref_name, comment_id, created)` makes
/// those two rows collapse to the same event on upsert.
fn parse_gitea_event(item: &Value, host: &str) -> Option<Event> {
let id = item.get("id").and_then(Value::as_i64)?;
let op_type = item.get("op_type").and_then(Value::as_str)?.to_string();
let created_str = item.get("created").and_then(Value::as_str)?;
let occurred_at = DateTime::parse_from_rfc3339(created_str)
@@ -285,13 +291,15 @@ fn parse_gitea_event(item: &Value, host: &str) -> Option<Event> {
.with_timezone(&Utc);
let private = item.get("is_private").and_then(Value::as_bool).unwrap_or(false);
let id = gitea_canonical_id(item, &op_type, created_str);
let mut payload = item.clone();
if let Some(obj) = payload.as_object_mut() {
obj.insert("_host".into(), Value::String(host.into()));
}
Some(Event {
id: format!("gitea:{id}"),
id,
source: Source::Gitea,
action: op_type,
occurred_at,
@@ -300,6 +308,25 @@ fn parse_gitea_event(item: &Value, host: &str) -> Option<Event> {
})
}
/// Build the canonical, content-derived id for a Gitea action. Must stay
/// in lockstep with the SQL formula in migration 0005 so back-fill and
/// new writes share the same id space.
fn gitea_canonical_id(item: &Value, op_type: &str, created: &str) -> String {
let act_user_id = item
.get("act_user_id")
.and_then(Value::as_i64)
.or_else(|| item.get("act_user").and_then(|u| u.get("id")).and_then(Value::as_i64))
.unwrap_or(0);
let repo_id = item
.get("repo_id")
.and_then(Value::as_i64)
.or_else(|| item.get("repo").and_then(|r| r.get("id")).and_then(Value::as_i64))
.unwrap_or(0);
let ref_name = item.get("ref_name").and_then(Value::as_str).unwrap_or("");
let comment_id = item.get("comment_id").and_then(Value::as_i64).unwrap_or(0);
format!("gitea:{op_type}:{act_user_id}:{repo_id}:{ref_name}:{comment_id}:{created}")
}
#[cfg(test)]
mod tests {
use super::*;
@@ -310,14 +337,16 @@ mod tests {
let raw = json!({
"id": 973,
"op_type": "commit_repo",
"act_user_id": 42,
"repo_id": 7,
"ref_name": "refs/heads/main",
"is_private": false,
"content": "{\"Commits\":[{\"Sha1\":\"abc123\"}],\"Len\":1}",
"created": "2026-05-03T16:37:45Z",
"repo": { "full_name": "grenade/moments" }
"repo": { "id": 7, "full_name": "grenade/moments" }
});
let ev = parse_gitea_event(&raw, "git.lair.cafe").expect("parses");
assert_eq!(ev.id, "gitea:973");
assert_eq!(ev.id, "gitea:commit_repo:42:7:refs/heads/main:0:2026-05-03T16:37:45Z");
assert_eq!(ev.source, Source::Gitea);
assert_eq!(ev.action, "commit_repo");
assert!(ev.public);
@@ -328,6 +357,43 @@ mod tests {
);
}
#[test]
fn dup_action_rows_for_user_and_org_contexts_collapse_to_same_id() {
// Gitea creates two Action rows when grenade pushes to helexa/cortex:
// one with user_id=grenade (surfaced by the user feed), one with
// user_id=helexa (surfaced by the org feed). Everything except `id`
// and `user_id` is identical. The canonical id ignores both.
let user_ctx = json!({
"id": 1322,
"user_id": 42,
"op_type": "commit_repo",
"act_user_id": 42,
"act_user": { "login": "grenade", "id": 42 },
"repo_id": 99,
"repo": { "id": 99, "full_name": "helexa/cortex" },
"ref_name": "refs/heads/main",
"comment_id": 0,
"is_private": false,
"created": "2026-05-20T04:32:50Z"
});
let org_ctx = json!({
"id": 1323,
"user_id": 7,
"op_type": "commit_repo",
"act_user_id": 42,
"act_user": { "login": "grenade", "id": 42 },
"repo_id": 99,
"repo": { "id": 99, "full_name": "helexa/cortex" },
"ref_name": "refs/heads/main",
"comment_id": 0,
"is_private": false,
"created": "2026-05-20T04:32:50Z"
});
let a = parse_gitea_event(&user_ctx, "git.lair.cafe").expect("parses");
let b = parse_gitea_event(&org_ctx, "git.lair.cafe").expect("parses");
assert_eq!(a.id, b.id, "duplicate action rows must collide on id");
}
#[test]
fn org_event_user_filter_predicate() {
let by_user = json!({

View File

@@ -9,7 +9,7 @@ use async_trait::async_trait;
use chrono::{DateTime, Utc};
use moments_core::{EventReader, EventWriter, PollerState, PollerStateStore, StoreError};
use chrono::NaiveDate;
use moments_entities::{DailyCount, Event, EventQuery, LanguageDailyCount, ProjectSummary, RepoLanguage, Source, SourceSummary};
use moments_entities::{DailyCount, Event, EventQuery, HourlyAvg, LanguageDailyCount, ProjectSummary, RepoLanguage, Source, SourceSummary};
use sqlx::Row;
use sqlx::postgres::{PgPool, PgPoolOptions};
use std::str::FromStr;
@@ -291,6 +291,55 @@ impl EventReader for PgStore {
.collect()
}
async fn hourly_avgs(
&self,
from: NaiveDate,
to: NaiveDate,
tz: &str,
include_private: bool,
) -> Result<Vec<HourlyAvg>, StoreError> {
// GREATEST guards against from > to (returns NaN-via-div-by-zero
// otherwise). EXTRACT(hour FROM tz-shifted timestamp) buckets each
// event into the user's local hour rather than UTC, so the chart
// matches the labels they'd see on a clock.
let rows = sqlx::query(
r#"
WITH params AS (
SELECT GREATEST(($2::date - $1::date + 1), 1)::float8 AS day_count
),
bucketed AS (
SELECT EXTRACT(hour FROM (occurred_at AT TIME ZONE $3))::int AS hour
FROM events
WHERE occurred_at >= ($1::date::timestamp AT TIME ZONE 'UTC')
AND occurred_at < (($2::date + 1)::timestamp AT TIME ZONE 'UTC')
AND ($4::bool OR public = true)
)
SELECT g.h::int AS hour,
(COUNT(b.hour)::float8 / (SELECT day_count FROM params)) AS avg
FROM generate_series(0, 23) AS g(h)
LEFT JOIN bucketed b ON b.hour = g.h
GROUP BY g.h
ORDER BY g.h
"#,
)
.bind(from)
.bind(to)
.bind(tz)
.bind(include_private)
.fetch_all(&self.pool)
.await
.map_err(map_err)?;
rows.into_iter()
.map(|r| {
Ok(HourlyAvg {
hour: r.try_get("hour").map_err(map_err)?,
avg: r.try_get("avg").map_err(map_err)?,
})
})
.collect()
}
async fn repo_languages(&self) -> Result<Vec<RepoLanguage>, StoreError> {
let rows = sqlx::query(
r#"

View File

@@ -91,6 +91,14 @@ pub struct DailyCount {
pub count: i64,
}
/// Average events per day at a given hour of the day, computed in a
/// caller-supplied IANA timezone. 24 entries (0..=23).
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HourlyAvg {
pub hour: i32,
pub avg: f64,
}
/// Per-repo activity rollup for the dashboard.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProjectSummary {

View File

@@ -48,6 +48,31 @@ ssh_run() {
fi
}
# Ensure /tmp on the remote is world-writable + sticky (mode 1777). Some
# hosts in this fleet have had /tmp reset to root-owned 0755 by an
# unrelated configuration step, which silently breaks the rsync of the
# deploy stage dir under our unprivileged user. Check the mode first so a
# correctly-configured host doesn't incur a needless sudo call.
ensure_tmp_writable() {
local host="$1"
if (( dry_run )); then
printf '\033[2m[dry-run]\033[0m ssh %s -- stat /tmp; chmod 1777 if needed\n' "$host" >&2
return 0
fi
local mode
mode="$(ssh -o BatchMode=yes "$host" 'stat -c %a /tmp')" || {
warn "could not stat /tmp on $host"
return 1
}
if [[ "$mode" != "1777" ]]; then
warn "/tmp on $host is mode $mode; fixing to 1777"
ssh -o BatchMode=yes "$host" 'sudo chmod 1777 /tmp' || {
warn "failed to chmod /tmp on $host"
return 1
}
fi
}
[[ $# -ge 1 ]] || usage
environment="$1"; shift
components=()
@@ -192,6 +217,8 @@ deploy_api() {
# live system dirs.
local remote_stage="/tmp/moments-deploy.api.${$}.${RANDOM}"
ensure_tmp_writable "$host" || return 1
rsync \
--archive \
--hard-links \
@@ -344,6 +371,8 @@ deploy_worker() {
# path via the heredoc. Never rsync into /.
local remote_stage="/tmp/moments-deploy.worker.${$}.${RANDOM}"
ensure_tmp_writable "$host" || return 1
rsync \
--archive \
--hard-links \

View File

@@ -70,6 +70,11 @@ export interface DailyCount {
count: number;
}
export interface HourlyAvg {
hour: number;
avg: number;
}
export interface LanguageDailyCount {
date: string;
language: string;
@@ -120,6 +125,13 @@ export async function fetchDailyCounts(from: string, to: string): Promise<DailyC
return resp.json();
}
export async function fetchHourlyAvgs(from: string, to: string, tz: string): Promise<HourlyAvg[]> {
const qs = new URLSearchParams({ from, to, tz });
const resp = await fetch(`${API_BASE}/activity/hourly?${qs}`);
if (!resp.ok) throw new Error(`hourly-avgs: HTTP ${resp.status}`);
return resp.json();
}
export async function fetchLanguageDailyCounts(from: string, to: string): Promise<LanguageDailyCount[]> {
const resp = await fetch(`${API_BASE}/languages/daily?from=${from}&to=${to}`);
if (!resp.ok) throw new Error(`language-daily-counts: HTTP ${resp.status}`);

View File

@@ -1,7 +1,7 @@
import { useMemo } from 'react';
import { useQuery } from '@tanstack/react-query';
import { fetchDailyCounts, fetchSources } from '../api/client';
import { fetchDailyCounts, fetchHourlyAvgs, fetchSources } from '../api/client';
export function ContributionStats() {
const sourcesQ = useQuery({
@@ -31,6 +31,21 @@ export function ContributionStats() {
staleTime: 10 * 60_000,
});
// Bucket hour-of-day in the user's local timezone so the chart matches
// the clock they see. Browser may report e.g. "Europe/Helsinki"; fall
// back to UTC if the resolver returns something the server won't
// accept (it validates the string before binding).
const tz = useMemo(
() => Intl.DateTimeFormat().resolvedOptions().timeZone || 'UTC',
[],
);
const hourlyQ = useQuery({
queryKey: ['hourly-avgs-alltime', fromStr, toStr, tz],
queryFn: () => fetchHourlyAvgs(fromStr, toStr, tz),
enabled: !!earliest,
staleTime: 10 * 60_000,
});
const stats = useMemo(() => {
const counts = dailyQ.data ?? [];
if (counts.length === 0) return null;
@@ -96,6 +111,17 @@ export function ContributionStats() {
return { currentStreak, longestStreak, busiest, dayAvgs, maxAvg, activeDays };
}, [dailyQ.data]);
const hourly = useMemo(() => {
const data = hourlyQ.data ?? [];
if (data.length === 0) return null;
const byHour = new Array(24).fill(0);
for (const { hour, avg } of data) {
if (hour >= 0 && hour < 24) byHour[hour] = avg;
}
const max = Math.max(...byHour);
return { hours: byHour, max };
}, [hourlyQ.data]);
if (!stats) return null;
return (
@@ -139,6 +165,31 @@ export function ContributionStats() {
))}
</div>
</div>
{hourly && (
<div className="mt-3">
<span style={{ opacity: 0.7, fontSize: '0.75rem' }}>avg by hour ({tz})</span>
<div className="d-flex align-items-end gap-1 mt-1" style={{ height: 64 }}>
{hourly.hours.map((avg, h) => (
<div key={h} className="d-flex flex-column align-items-center" style={{ flex: 1, height: '100%', justifyContent: 'flex-end' }}>
<div style={{ width: '100%', borderRadius: 2, background: 'rgba(255,255,255,0.05)', flex: 1, display: 'flex', flexDirection: 'column', justifyContent: 'flex-end' }}>
<div
style={{
height: hourly.max > 0 ? `${(avg / hourly.max) * 100}%` : '0%',
borderRadius: 2,
backgroundColor: '#39d353',
opacity: 0.7,
}}
title={`${h.toString().padStart(2, '0')}:00 — ${avg.toFixed(2)}/day`}
/>
</div>
<span style={{ fontSize: '0.6rem', opacity: 0.7, marginTop: 2, minHeight: '0.7rem' }}>
{h % 4 === 0 ? h.toString().padStart(2, '0') : ''}
</span>
</div>
))}
</div>
</div>
)}
</div>
</div>
);