chore(deploy): add manifest, systemd units, nginx config, deploy.sh

Wires up the prod deployment per architecture-doc conventions:

- api → nikola.kosherinata.internal, loopback bind 127.0.0.1:42424
  (less-common port, registered with SELinux as http_port_t).
- worker → frootmig.kosherinata.internal, no listening port.
- web (static ui/dist + nginx server_name rob.tn) → nikola, with
  /api/* reverse-proxied to the loopback API.
- db → existing magrathea cluster via mTLS, hostname-baked DATABASE_URL
  rendered into /etc/moments/{api,worker}.env at deploy time.

Cert rotation: step-ca renews host certs every 24h; .path units watch
/etc/pki/tls/misc/<host>.pem and trigger systemctl restart of the
relevant service. Both binaries hold cert state in rustls and read
once at startup, so restart is the right reload semantics.

deploy.sh contract matches the architecture doc: positional env arg,
component list (or `all` / `default`), --dry-run support. Renders
config templates from `pass`, rsyncs over ssh+sudo, runs sysusers /
restorecon / semanage / systemctl / nginx -t idempotently.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-03 20:17:17 +03:00
parent 7919a2d9ab
commit 110b523fd0
13 changed files with 602 additions and 2 deletions

View File

@@ -0,0 +1,9 @@
# /etc/moments/api.env — rendered by deploy.sh, do not edit on the host.
# {{HOSTNAME}} resolves to the target host's FQDN at deploy time.
JOURNAL_STREAM=1
RUST_LOG=info,sqlx=warn,tower_http=info
BIND_ADDR=127.0.0.1:42424
DATABASE_URL=postgres://moments_ro@magrathea.kosherinata.internal:5432/moments?sslmode=verify-full&sslrootcert=/etc/pki/ca-trust/source/anchors/root-internal.pem&sslcert=/etc/pki/tls/misc/{{HOSTNAME}}.pem&sslkey=/etc/pki/tls/private/{{HOSTNAME}}.pem

View File

@@ -0,0 +1,27 @@
# /etc/moments/worker.env — rendered by deploy.sh, do not edit on the host.
# {{HOSTNAME}} resolves to the target host's FQDN at deploy time.
# {{GITHUB_TOKEN}} is resolved from `pass`; the rendered file lives in
# /etc/moments/ chmod 0640 owned by root:moments.
JOURNAL_STREAM=1
RUST_LOG=info,sqlx=warn
DATABASE_URL=postgres://moments_rw@magrathea.kosherinata.internal:5432/moments?sslmode=verify-full&sslrootcert=/etc/pki/ca-trust/source/anchors/root-internal.pem&sslcert=/etc/pki/tls/misc/{{HOSTNAME}}.pem&sslkey=/etc/pki/tls/private/{{HOSTNAME}}.pem
GITHUB_USER=grenade
GITHUB_TOKEN={{GITHUB_TOKEN}}
POLL_INTERVAL_SECS=600
SEARCH_POLL_INTERVAL_SECS=86400
GITEA_HOST=git.lair.cafe
GITEA_USER=grenade
GITEA_POLL_INTERVAL_SECS=600
HG_HOST=hg-edge.mozilla.org
HG_REPOS=build/puppet,build/tools,build/buildbot-configs
HG_AUTHOR_TERMS=thijssen,grenade
HG_POLL_INTERVAL_SECS=86400
BUGZILLA_HOST=bugzilla.mozilla.org
BUGZILLA_EMAIL=rthijssen@mozilla.com
BUGZILLA_POLL_INTERVAL_SECS=86400

36
asset/manifest.yml Normal file
View File

@@ -0,0 +1,36 @@
app: moments
environments:
prod:
components:
api:
hosts: [nikola.kosherinata.internal]
config:
bind: 127.0.0.1:42424
db_role: moments_ro
db_host: magrathea.kosherinata.internal
db_port: 5432
db_name: moments
worker:
hosts: [frootmig.kosherinata.internal]
config:
db_role: moments_rw
db_host: magrathea.kosherinata.internal
db_port: 5432
db_name: moments
github_user: grenade
gitea_host: git.lair.cafe
gitea_user: grenade
hg_host: hg-edge.mozilla.org
hg_repos: build/puppet,build/tools,build/buildbot-configs
hg_author_terms: thijssen,grenade
bugzilla_host: bugzilla.mozilla.org
bugzilla_email: rthijssen@mozilla.com
secrets:
GITHUB_TOKEN: github.com/grenade/admin-token
# GITEA_TOKEN, BUGZILLA_API_KEY: optional, omit unless required.
web:
hosts: [nikola.kosherinata.internal]
config:
server_name: rob.tn
root: /var/www/moments
api_upstream: http://127.0.0.1:42424

65
asset/nginx/rob.tn.conf Normal file
View File

@@ -0,0 +1,65 @@
# /etc/nginx/conf.d/rob.tn.conf — rob.tn site config for moments.
#
# Static frontend out of /var/www/moments; /api/* reverse-proxied to the
# moments-api binary on loopback. The UI fetches /api/v1/... so the strip
# matches what Vite's dev proxy does (drop the /api prefix before sending
# to axum, whose routes are mounted at /v1/*).
upstream moments_api {
server 127.0.0.1:42424 max_fails=3 fail_timeout=30s;
keepalive 8;
}
server {
listen 443 ssl http2;
listen [::]:443 ssl http2;
server_name rob.tn;
ssl_certificate /etc/pki/tls/misc/nikola.kosherinata.internal.pem;
ssl_certificate_key /etc/pki/tls/private/nikola.kosherinata.internal.pem;
# Public forge — visitors are not on the internal mTLS mesh, so no
# client-cert verification here. The X25519MLKEM768 default falls
# back to classical curves for clients that don't speak PQ yet.
ssl_protocols TLSv1.3;
root /var/www/moments;
index index.html;
# Static SPA: serve the file if it exists, else fall back to index.html
# so client-side routing works.
location / {
try_files $uri $uri/ /index.html;
add_header Cache-Control "no-cache" always;
}
# Asset bundles are content-hashed by Vite — safe to cache aggressively.
location ~* \.(js|css|woff2?|ttf|eot|svg|png|jpg|jpeg|gif|ico|webp|avif)$ {
expires 30d;
add_header Cache-Control "public, max-age=2592000, immutable";
try_files $uri =404;
}
location /api/ {
# Strip /api so axum sees /v1/events, not /api/v1/events.
rewrite ^/api/(.*)$ /$1 break;
proxy_pass http://moments_api;
proxy_http_version 1.1;
proxy_set_header Connection "";
proxy_set_header Host $host;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_read_timeout 30s;
proxy_connect_timeout 5s;
}
access_log /var/log/nginx/rob.tn.access.log;
error_log /var/log/nginx/rob.tn.error.log;
}
server {
listen 80;
listen [::]:80;
server_name rob.tn;
return 301 https://$host$request_uri;
}

View File

@@ -0,0 +1,6 @@
[Unit]
Description=Restart moments-api on host cert change
[Service]
Type=oneshot
ExecStart=/bin/systemctl restart moments-api.service

View File

@@ -0,0 +1,13 @@
[Unit]
Description=Watch host cert for moments-api
Documentation=https://git.lair.cafe/grenade/architecture
[Path]
# Hostname is substituted at deploy time. step-ca rotates host certs every
# 24h; rustls reads them at process start, so the API must restart on
# rotation. Read-only public timeline — a few seconds of churn is fine.
PathChanged=/etc/pki/tls/misc/{{HOSTNAME}}.pem
Unit=moments-api-cert-reload.service
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,34 @@
[Unit]
Description=moments read-only HTTP API
Documentation=https://git.lair.cafe/grenade/moments
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
User=moments
Group=moments
EnvironmentFile=/etc/moments/api.env
ExecStart=/usr/local/bin/moments-api
Restart=on-failure
RestartSec=5s
# Hardening
NoNewPrivileges=true
ProtectSystem=strict
ProtectHome=true
PrivateTmp=true
PrivateDevices=true
ProtectKernelTunables=true
ProtectKernelModules=true
ProtectControlGroups=true
RestrictRealtime=true
RestrictSUIDSGID=true
LockPersonality=true
MemoryDenyWriteExecute=true
SystemCallArchitectures=native
ReadWritePaths=/var/lib/moments
RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,6 @@
[Unit]
Description=Restart moments-worker on host cert change
[Service]
Type=oneshot
ExecStart=/bin/systemctl restart moments-worker.service

View File

@@ -0,0 +1,12 @@
[Unit]
Description=Watch host cert for moments-worker
Documentation=https://git.lair.cafe/grenade/architecture
[Path]
# Worker holds a sqlx pool with rustls — restart on cert rotation. The
# poller is idempotent, so dropping mid-poll is safe.
PathChanged=/etc/pki/tls/misc/{{HOSTNAME}}.pem
Unit=moments-worker-cert-reload.service
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,34 @@
[Unit]
Description=moments ingestion worker
Documentation=https://git.lair.cafe/grenade/moments
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
User=moments
Group=moments
EnvironmentFile=/etc/moments/worker.env
ExecStart=/usr/local/bin/moments-worker
Restart=on-failure
RestartSec=10s
# Hardening
NoNewPrivileges=true
ProtectSystem=strict
ProtectHome=true
PrivateTmp=true
PrivateDevices=true
ProtectKernelTunables=true
ProtectKernelModules=true
ProtectControlGroups=true
RestrictRealtime=true
RestrictSUIDSGID=true
LockPersonality=true
MemoryDenyWriteExecute=true
SystemCallArchitectures=native
ReadWritePaths=/var/lib/moments
RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,2 @@
#Type Name ID GECOS Home directory Shell
u moments - "moments service account" /var/lib/moments /usr/sbin/nologin

View File

@@ -34,8 +34,38 @@ The API expects a Postgres reachable at `DATABASE_URL`. For magrathea, that's an
DATABASE_URL=postgres://localhost/moments cargo run -p moments-api DATABASE_URL=postgres://localhost/moments cargo run -p moments-api
``` ```
Migrations live in `crates/moments-data/migrations/` and run automatically on API startup. Migrations live in `crates/moments-data/migrations/` and run automatically on worker startup. The API connects as `moments_ro` and never runs migrations — the worker (as `moments_rw`) is the schema owner.
## Deployment ## Deployment
See `asset/manifest.yml` and `script/deploy.sh`. ```sh
./script/deploy.sh prod all # api + worker + web
./script/deploy.sh prod api worker # subset
./script/deploy.sh prod default # api + web only (worker untouched)
./script/deploy.sh prod all --dry-run
```
Topology:
| Component | Host | Notes |
| --------- | --------------------------------- | --------------------------------------------- |
| api | `nikola.kosherinata.internal` | binds `127.0.0.1:42424`, fronted by local nginx |
| worker | `frootmig.kosherinata.internal` | no listening port; pollers only |
| web | `nikola.kosherinata.internal` | static `ui/dist/` under `/var/www/moments` |
| db | `magrathea.kosherinata.internal` | postgres mTLS, passwordless |
Postgres roles `moments_rw` and `moments_ro` must exist on the primary, with `pg_ident.conf` mappings in place for `nikola.kosherinata.internal``moments_ro` and `frootmig.kosherinata.internal``moments_rw`. See `asset/sql/bootstrap-moments.sql` and `asset/postgres/ident.conf.tmpl`.
Secrets resolved by `deploy.sh` via `pass`:
- `github.com/grenade/admin-token` — GitHub PAT for events + search APIs (worker only).
Optional, set if needed in `worker.env`: `GITEA_TOKEN`, `BUGZILLA_API_KEY`.
### DNS cutover
`rob.tn` currently resolves to GitHub Pages. After the first successful prod deploy:
1. Update Cloudflare DNS for `rob.tn` to the WAN IP that fronts `nikola` (unproxied — see architecture doc §11).
2. Confirm `curl -fsS https://rob.tn/api/v1/healthz` returns `ok`.
3. Add an archival notice to the top of [grenade-events-react/readme.md](https://github.com/grenade/grenade-events-react) pointing at this repo, and archive the GitHub repo.

326
script/deploy.sh Executable file
View File

@@ -0,0 +1,326 @@
#!/usr/bin/env bash
#
# moments deployment script.
#
# ./script/deploy.sh <environment> [component...]
# ./script/deploy.sh prod api worker web
# ./script/deploy.sh prod all
#
# Builds artifacts locally, resolves secrets from `pass`, renders config
# templates, rsyncs everything to the target hosts, and reloads systemd /
# nginx / firewalld / SELinux state idempotently.
set -euo pipefail
shopt -s nullglob
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
manifest="${repo_root}/asset/manifest.yml"
dry_run=0
usage() {
cat <<EOF >&2
usage: $(basename "$0") <environment> [component...] [--dry-run]
$(basename "$0") prod api worker web
$(basename "$0") prod all
$(basename "$0") prod default # api + web (worker isn't restarted unless asked)
EOF
exit 2
}
log() { printf '\033[1;34m[deploy]\033[0m %s\n' "$*" >&2; }
warn() { printf '\033[1;33m[deploy]\033[0m %s\n' "$*" >&2; }
die() { printf '\033[1;31m[deploy]\033[0m %s\n' "$*" >&2; exit 1; }
run() {
if (( dry_run )); then
printf '\033[2m[dry-run]\033[0m %s\n' "$*" >&2
else
"$@"
fi
}
ssh_run() {
local host="$1"; shift
if (( dry_run )); then
printf '\033[2m[dry-run]\033[0m ssh %s -- %s\n' "$host" "$*" >&2
else
ssh -o BatchMode=yes "$host" "$@"
fi
}
[[ $# -ge 1 ]] || usage
environment="$1"; shift
components=()
while [[ $# -gt 0 ]]; do
case "$1" in
--dry-run) dry_run=1 ;;
*) components+=("$1") ;;
esac
shift
done
[[ -f "$manifest" ]] || die "manifest not found: $manifest"
command -v yq >/dev/null 2>&1 || die "yq is required"
command -v pass >/dev/null 2>&1 || die "pass is required"
command -v rsync >/dev/null 2>&1 || die "rsync is required"
command -v cargo >/dev/null 2>&1 || die "cargo is required"
# Resolve component list ----------------------------------------------------
env_path=".environments.${environment}"
yq -e "${env_path}" "$manifest" >/dev/null \
|| die "environment '$environment' not found in manifest"
mapfile -t all_components < <(yq -r "${env_path}.components | keys | .[]" "$manifest")
if [[ ${#components[@]} -eq 0 ]]; then
usage
fi
case "${components[0]:-}" in
all) components=("${all_components[@]}") ;;
default) components=(api web) ;;
esac
# Build artifacts -----------------------------------------------------------
needs_rust=0
needs_web=0
for c in "${components[@]}"; do
case "$c" in
api|worker) needs_rust=1 ;;
web) needs_web=1 ;;
esac
done
if (( needs_rust )); then
log "cargo build --release (api, worker)"
run cargo build --release --bin moments-api --bin moments-worker --manifest-path "${repo_root}/Cargo.toml"
fi
if (( needs_web )); then
log "vite build (ui)"
run sh -c "cd '${repo_root}/ui' && pnpm install --frozen-lockfile && pnpm run build"
fi
# Per-component deploy ------------------------------------------------------
deploy_api() {
local host="$1"
log "api -> $host"
if (( dry_run )); then
printf '\033[2m[dry-run]\033[0m render api.env (HOSTNAME=%s) + units, rsync to %s:/, run sysusers/restorecon/semanage/systemctl on %s\n' \
"$host" "$host" "$host" >&2
return 0
fi
local fqdn
fqdn="$host"
local stage
stage="$(mktemp -d)"
trap "rm -rf '$stage'" RETURN
install -d "$stage/etc/moments" "$stage/etc/systemd/system" "$stage/etc/sysusers.d" "$stage/usr/local/bin"
# Render env file with hostname substitution.
sed "s|{{HOSTNAME}}|${fqdn}|g" "${repo_root}/asset/config/api.env.tmpl" \
> "$stage/etc/moments/api.env"
sed "s|{{HOSTNAME}}|${fqdn}|g" "${repo_root}/asset/systemd/moments-api-cert.path" \
> "$stage/etc/systemd/system/moments-api-cert.path"
install -m 0644 "${repo_root}/asset/systemd/moments-api.service" "$stage/etc/systemd/system/"
install -m 0644 "${repo_root}/asset/systemd/moments-api-cert-reload.service" "$stage/etc/systemd/system/"
install -m 0644 "${repo_root}/asset/systemd/moments.sysusers.conf" "$stage/etc/sysusers.d/moments.conf"
install -m 0755 "${repo_root}/target/release/moments-api" "$stage/usr/local/bin/moments-api"
# Permissions on the rendered env: root-owned, moments group readable.
chmod 0640 "$stage/etc/moments/api.env"
if (( dry_run )); then
printf '\033[2m[dry-run]\033[0m rsync staged -> %s:/\n' "$host" >&2
else
rsync -aHAX --rsync-path="sudo rsync" "$stage/" "${host}:/"
fi
ssh_run "$host" "sudo bash -s" <<'REMOTE_EOF'
set -euo pipefail
fqdn="$(hostname -f)"
systemd-sysusers /etc/sysusers.d/moments.conf
install -d -o root -g moments -m 0750 /etc/moments
install -d -o moments -g moments -m 0750 /var/lib/moments
chown root:moments /etc/moments/api.env
chmod 0640 /etc/moments/api.env
# Grant the moments user read access to the host private key — required for
# the postgres mTLS connection.
setfacl -m u:moments:r "/etc/pki/tls/private/${fqdn}.pem" || true
# Label loopback API port. Idempotent — the -m flag turns "already labelled"
# into a no-op.
if ! semanage port -l | awk '{print $1, $3}' | grep -qE "^http_port_t .*42424"; then
semanage port -a -t http_port_t -p tcp 42424 || \
semanage port -m -t http_port_t -p tcp 42424
fi
restorecon -Rv /usr/local/bin/moments-api /etc/moments /var/lib/moments
systemctl daemon-reload
systemctl enable --now moments-api-cert.path
systemctl enable --now moments-api.service
systemctl restart moments-api.service
# Health probe.
for i in 1 2 3 4 5 6 7 8 9 10; do
if curl -fsS http://127.0.0.1:42424/v1/healthz >/dev/null; then
echo "moments-api healthy"
exit 0
fi
sleep 1
done
echo "moments-api did not become healthy" >&2
journalctl -u moments-api.service -n 50 --no-pager >&2
exit 1
REMOTE_EOF
}
deploy_worker() {
local host="$1"
log "worker -> $host"
if (( dry_run )); then
printf '\033[2m[dry-run]\033[0m render worker.env (HOSTNAME=%s, GITHUB_TOKEN from pass) + units, rsync to %s:/, run sysusers/restorecon/systemctl on %s\n' \
"$host" "$host" "$host" >&2
return 0
fi
local fqdn
fqdn="$host"
local github_token=""
if pass show github.com/grenade/admin-token >/dev/null 2>&1; then
github_token="$(pass show github.com/grenade/admin-token)"
else
warn "no github admin-token in pass; worker will run without GITHUB_TOKEN"
fi
local stage
stage="$(mktemp -d)"
trap "rm -rf '$stage'" RETURN
install -d "$stage/etc/moments" "$stage/etc/systemd/system" "$stage/etc/sysusers.d" "$stage/usr/local/bin"
sed -e "s|{{HOSTNAME}}|${fqdn}|g" \
-e "s|{{GITHUB_TOKEN}}|${github_token}|g" \
"${repo_root}/asset/config/worker.env.tmpl" > "$stage/etc/moments/worker.env"
sed "s|{{HOSTNAME}}|${fqdn}|g" "${repo_root}/asset/systemd/moments-worker-cert.path" \
> "$stage/etc/systemd/system/moments-worker-cert.path"
install -m 0644 "${repo_root}/asset/systemd/moments-worker.service" "$stage/etc/systemd/system/"
install -m 0644 "${repo_root}/asset/systemd/moments-worker-cert-reload.service" "$stage/etc/systemd/system/"
install -m 0644 "${repo_root}/asset/systemd/moments.sysusers.conf" "$stage/etc/sysusers.d/moments.conf"
install -m 0755 "${repo_root}/target/release/moments-worker" "$stage/usr/local/bin/moments-worker"
chmod 0640 "$stage/etc/moments/worker.env"
if (( dry_run )); then
printf '\033[2m[dry-run]\033[0m rsync staged -> %s:/\n' "$host" >&2
else
rsync -aHAX --rsync-path="sudo rsync" "$stage/" "${host}:/"
fi
ssh_run "$host" "sudo bash -s" <<'REMOTE_EOF'
set -euo pipefail
fqdn="$(hostname -f)"
systemd-sysusers /etc/sysusers.d/moments.conf
install -d -o root -g moments -m 0750 /etc/moments
install -d -o moments -g moments -m 0750 /var/lib/moments
chown root:moments /etc/moments/worker.env
chmod 0640 /etc/moments/worker.env
setfacl -m u:moments:r "/etc/pki/tls/private/${fqdn}.pem" || true
restorecon -Rv /usr/local/bin/moments-worker /etc/moments /var/lib/moments
systemctl daemon-reload
systemctl enable --now moments-worker-cert.path
systemctl enable --now moments-worker.service
systemctl restart moments-worker.service
# Liveness probe — worker doesn't expose a port, so check is-active.
if ! systemctl is-active --quiet moments-worker.service; then
journalctl -u moments-worker.service -n 50 --no-pager >&2
exit 1
fi
echo "moments-worker active"
REMOTE_EOF
}
deploy_web() {
local host="$1"
log "web -> $host"
if (( dry_run )); then
printf '\033[2m[dry-run]\033[0m rsync ui/dist/ to %s:/var/www/moments/ + nginx config, run nginx -t/reload on %s\n' \
"$host" "$host" >&2
return 0
fi
local stage
stage="$(mktemp -d)"
trap "rm -rf '$stage'" RETURN
install -d "$stage/var/www/moments" "$stage/etc/nginx/conf.d"
rsync -a "${repo_root}/ui/dist/" "$stage/var/www/moments/"
install -m 0644 "${repo_root}/asset/nginx/rob.tn.conf" "$stage/etc/nginx/conf.d/rob.tn.conf"
if (( dry_run )); then
printf '\033[2m[dry-run]\033[0m rsync staged -> %s:/\n' "$host" >&2
else
rsync -aHAX --delete --rsync-path="sudo rsync" "$stage/var/www/moments/" "${host}:/var/www/moments/"
rsync -aHAX --rsync-path="sudo rsync" "$stage/etc/nginx/conf.d/rob.tn.conf" "${host}:/etc/nginx/conf.d/rob.tn.conf"
fi
ssh_run "$host" "sudo bash -s" <<'REMOTE_EOF'
set -euo pipefail
# Allow nginx to talk upstream to the loopback API socket.
setsebool -P httpd_can_network_connect on
restorecon -Rv /var/www/moments /etc/nginx/conf.d/rob.tn.conf
if ! nginx -t; then
echo "nginx config check failed" >&2
exit 1
fi
systemctl reload nginx
echo "nginx reloaded"
REMOTE_EOF
}
# Dispatch ------------------------------------------------------------------
failed=()
for component in "${components[@]}"; do
mapfile -t hosts < <(yq -r "${env_path}.components.${component}.hosts[]" "$manifest")
for host in "${hosts[@]}"; do
case "$component" in
api) deploy_api "$host" || failed+=("api@$host") ;;
worker) deploy_worker "$host" || failed+=("worker@$host") ;;
web) deploy_web "$host" || failed+=("web@$host") ;;
*) warn "unknown component: $component" ;;
esac
done
done
if [[ ${#failed[@]} -gt 0 ]]; then
die "failed: ${failed[*]}"
fi
log "deploy complete"