From 58e9d5b0889f2ff548a8c66e337ddebe78bf6ddd Mon Sep 17 00:00:00 2001 From: rob thijssen Date: Tue, 12 May 2026 06:46:03 +0300 Subject: [PATCH] doc: dashboard planning --- docs/plan/01-dashboard-v1.md | 233 +++++++++++++++++++++++++++++++++++ 1 file changed, 233 insertions(+) create mode 100644 docs/plan/01-dashboard-v1.md diff --git a/docs/plan/01-dashboard-v1.md b/docs/plan/01-dashboard-v1.md new file mode 100644 index 0000000..904dbd7 --- /dev/null +++ b/docs/plan/01-dashboard-v1.md @@ -0,0 +1,233 @@ +# gongfoo Dashboard — Read-Only v1 + +## Context + +The controller and agents expose only internal control-plane endpoints (events, heartbeat, spawn, terminate, health). There's no way to see system state — runner images, queue depth, host capacity, runner lifecycle — without querying Postgres directly or reading journalctl. A read-only dashboard on the existing mTLS listener gives operational visibility without adding a new port or weakening auth. + +## Architecture + +``` +browser (client cert) ──mTLS──▶ nginx (oolon) + ├─ /api/* ──proxy+mTLS──▶ controller (frootmig:18443) /v1/dashboard/* + └─ /* ──static──▶ /var/www/gongfoo/ (Vite SPA) +``` + +- Dashboard API routes are added to the controller's existing axum Router on the same mTLS listener +- Nginx on the dashboard host requires client certs from consumers AND presents its own host cert as client cert when proxying to the controller +- Frontend is a Vite React-SWC-TS SPA served as static files by nginx + +--- + +## Part 1: Controller API (6 GET endpoints) + +All routes under `/v1/dashboard/`, read-only Postgres queries, no brew loop state needed. + +### New module: `crates/gongfoo-controller/src/dashboard.rs` + +State type (separate from ReceiverState — only needs PgPool): +```rust +pub struct DashboardState { pub pool: PgPool } +``` + +### Endpoints + +| Route | Returns | Key query | +|-------|---------|-----------| +| `GET /v1/dashboard/summary` | Active runner counts by state, host health stats, recent failure count | `GROUP BY state` on runners + `COUNT` on hosts | +| `GET /v1/dashboard/hosts` | All hosts with CPU/mem capacity, usage, active runner count, drain/heartbeat status | `LEFT JOIN runners + runner_images`, `GROUP BY h.id` | +| `GET /v1/dashboard/images` | All runner images with labels and resource requests | `SELECT * FROM runner_images` | +| `GET /v1/dashboard/runners?state=active&limit=100` | Runners with state, host, image, timings. Filter: `active`/`recent`/`failed`/specific state | `JOIN hosts + runner_images`, filter by state, `LIMIT` | +| `GET /v1/dashboard/queue` | Latest queue depth per label set | `DISTINCT ON (label_set)` from `queue_observations` | +| `GET /v1/dashboard/activity` | Completed/failed per minute over last hour | `date_trunc('minute', completed_at)`, `GROUP BY` | + +### Router integration in `main.rs` + +Merge dashboard routes into the existing router before the TLS listener loop. Both sub-routers call `.with_state()` independently, then `.merge()` yields `Router<()>`: + +```rust +let app = Router::new() + .route("/v1/events", post(receiver::handle_event)) + .route("/v1/heartbeat", post(receiver::handle_heartbeat)) + .with_state(receiver_state) + .merge( + Router::new() + .route("/v1/dashboard/summary", get(dashboard::handle_summary)) + .route("/v1/dashboard/hosts", get(dashboard::handle_hosts)) + .route("/v1/dashboard/images", get(dashboard::handle_images)) + .route("/v1/dashboard/runners", get(dashboard::handle_runners)) + .route("/v1/dashboard/queue", get(dashboard::handle_queue)) + .route("/v1/dashboard/activity", get(dashboard::handle_activity)) + .with_state(dashboard_state), + ); +``` + +### Response types: `crates/gongfoo-proto/src/dashboard.rs` + +Add structs for each endpoint response (DashboardSummary, DashboardHost, DashboardImage, DashboardRunner, DashboardQueueEntry, DashboardActivityBucket). All derive `Debug, Clone, Serialize, Deserialize`. Export via `lib.rs`. + +--- + +## Part 2: Frontend (`dashboard/`) + +Scaffold with `npm create vite@latest dashboard -- --template react-swc-ts`. + +### Structure + +``` +dashboard/ + vite.config.ts -- dev proxy to controller + src/ + main.tsx + App.tsx -- tab navigation between views + api.ts -- fetchApi(path) wrapper, relative /api/ URLs + types.ts -- TS interfaces matching proto response structs + components/ + Layout.tsx -- header + tab bar + SummaryView.tsx -- metric cards: active runners, host health, queue, failures + HostsView.tsx -- table: hostname, CPU/mem bars, runners, drain, heartbeat age + RunnersView.tsx -- table: name, state badge, image, host, timings; filter buttons + QueueView.tsx -- cards per label set with queued count + ActivityChart.tsx -- CSS-only bars per minute (completed green, failed red) + StateBadge.tsx -- colored span per runner state + CapacityBar.tsx -- horizontal bar showing used/total + TimeAgo.tsx -- relative time display +``` + +### Key decisions + +- **No routing library** — tab state in React useState, no URL routing needed for v1 +- **No chart library** — CSS-only bars for activity chart +- **No component library** — plain CSS, dark theme +- **Auto-refresh** — each view polls its endpoint every 5s via useEffect + setInterval +- **No CORS needed** — same-origin (nginx serves both SPA and proxies API) + +### `vite.config.ts` dev proxy + +```ts +server: { + proxy: { + '/api': { + target: 'https://frootmig.kosherinata.internal:18443', + changeOrigin: true, + rewrite: (path) => path.replace(/^\/api/, ''), + secure: false, // skip cert verify in dev + } + } +} +``` + +--- + +## Part 3: Nginx config + +### New file: `asset/nginx/gongfoo.internal.conf` + +```nginx +server { + server_name gongfoo.internal; + listen 443 ssl; + http2 on; + + ssl_certificate /etc/nginx/tls/cert/gongfoo.internal.pem; + ssl_certificate_key /etc/nginx/tls/key/gongfoo.internal.pem; + ssl_protocols TLSv1.3; + + # Require client certs from dashboard consumers + ssl_client_certificate /etc/pki/ca-trust/source/anchors/root-internal.pem; + ssl_verify_client on; + + # API — reverse proxy to controller with mTLS client cert + location /api/ { + proxy_pass https://frootmig.kosherinata.internal:18443/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + proxy_ssl_certificate /etc/nginx/tls/cert/gongfoo.internal.pem; + proxy_ssl_certificate_key /etc/nginx/tls/key/gongfoo.internal.pem; + proxy_ssl_trusted_certificate /etc/pki/ca-trust/source/anchors/root-internal.pem; + proxy_ssl_verify on; + proxy_ssl_protocols TLSv1.3; + } + + # Frontend — static SPA + location / { + root /var/www/gongfoo; + try_files $uri $uri/ /index.html; + } +} +``` + +The trailing `/` on `proxy_pass` strips the `/api` prefix: `/api/v1/dashboard/summary` → `/v1/dashboard/summary`. + +--- + +## Part 4: Manifest + +Add under `prod.components`: + +```yaml +dashboard: + hosts: [oolon.kosherinata.internal] + config: + server_name: gongfoo.internal +``` + +The controller endpoint is derived from the existing `controller.hosts[0]` + port. + +--- + +## Part 5: Deploy script + +Add `deploy_dashboard()` to `script/deploy.sh`. Not in the default component set — deployed via `./script/deploy.sh prod dashboard`. + +Steps: +1. Build frontend: `cd dashboard && npm ci && npm run build` +2. rsync `dashboard/dist/` → `dashboard_host:/var/www/gongfoo/` +3. Render nginx config (substitute server_name and controller endpoint) +4. Deploy to `/etc/nginx/sites-available/`, symlink to `sites-enabled/` +5. Ensure nginx sites-enabled include exists in nginx.conf +6. Provision step-ca cert if not present (for `gongfoo.internal`) +7. SELinux: `restorecon`, `setsebool httpd_can_network_connect` +8. Firewall: ensure https service is enabled +9. `nginx -t && systemctl reload nginx` + +--- + +## Files to create + +| Path | Purpose | +|------|---------| +| `crates/gongfoo-proto/src/dashboard.rs` | Response types for 6 endpoints | +| `crates/gongfoo-controller/src/dashboard.rs` | Handlers, DashboardState, SQL queries | +| `asset/nginx/gongfoo.internal.conf` | Nginx mTLS reverse proxy + SPA | +| `dashboard/` (full Vite scaffold) | Frontend SPA | + +## Files to modify + +| Path | Change | +|------|--------| +| `crates/gongfoo-proto/src/lib.rs` | Add `mod dashboard; pub use dashboard::*;` | +| `crates/gongfoo-controller/src/main.rs` | Add `mod dashboard;`, merge dashboard router, add `use axum::routing::get` | +| `asset/manifest.yml` | Add `dashboard` component | +| `script/deploy.sh` | Add `deploy_dashboard()` + case entry | + +## Implementation order + +1. Proto response types +2. Controller dashboard module + router integration +3. `cargo build` to verify +4. Nginx config +5. Vite scaffold + types + api wrapper +6. Frontend views (SummaryView first to prove the chain, then rest) +7. Manifest + deploy script +8. `sqlx prepare` to update offline query cache + +## Verification + +1. `cargo build --workspace` — compiles with new dashboard module +2. `cargo clippy --all-targets` — clean +3. `cd dashboard && npm ci && npm run build` — frontend builds +4. Deploy controller, then `curl --cert ... --key ... --cacert ... https://frootmig:18443/v1/dashboard/summary` — returns JSON +5. Deploy dashboard, then open `https://gongfoo.internal` with browser configured with client cert — SPA loads, data populates