Files
moments/script/deploy.sh
rob thijssen 8867ff5df3 feat(deploy): manifest-driven config, teardown + db-perms, hardening
deploy.sh:
- never rsync into /; stage to /tmp on the remote and install at final
  paths via sudo bash heredoc, closing the parent-dir attribute leak
  that broke three hosts in the earlier rsync incident
- shell-quote heredoc args via ${var@Q}
- drop -A -X on the remaining (web) rsyncs
- generic worker.secrets loop reads (env-var → pass path) from manifest;
  GITEA_TOKEN now flows through automatically
- in-memory bash substitution for templates (secrets never on argv)
- simplify semanage port labelling: --add 2>/dev/null || --modify (the
  old grep pre-check matched only the first listed port)
- restorecon back to short flags (Fedora policycoreutils has no long
  forms; --recursive errored at deploy time)
- quieter health probe loop: curl diagnostics only on final failure

manifest as source of truth:
- api.config.bind drives BIND_ADDR, firewalld port, semanage label,
  health-probe URL
- web.config.{server_name,root,api_upstream} drives nginx render,
  rsync targets, restorecon scope
- nginx config renamed to site.conf.tmpl; firewalld svc to
  moments-api.xml.tmpl; both rendered at deploy time
- topology flip: api → nikola, worker → frootmig (anjie freed)

new scripts:
- script/teardown.sh: idempotent component teardown, never rsyncs,
  shared-state cleanup gated on absence of remaining env files,
  --remove-docroot guard against shallow / system paths
- script/db-perms.sh: rewritten — fixes grep/append role mismatch that
  appended duplicates on re-run, adds postgres reload, hits primary +
  standby in a single invocation

readme: genericized; deployment topology no longer carries real host
or site names.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-04 16:39:10 +03:00

497 lines
18 KiB
Bash
Executable File

#!/usr/bin/env bash
#
# moments deployment script.
#
# ./script/deploy.sh <environment> [component...]
# ./script/deploy.sh prod api worker web
# ./script/deploy.sh prod all
#
# Builds artifacts locally, resolves secrets from `pass`, renders config
# templates, rsyncs everything to the target hosts, and reloads systemd /
# nginx / firewalld / SELinux state idempotently.
set -euo pipefail
shopt -s nullglob
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
manifest="${repo_root}/asset/manifest.yml"
dry_run=0
usage() {
cat <<EOF >&2
usage: $(basename "$0") <environment> [component...] [--dry-run]
$(basename "$0") prod api worker web
$(basename "$0") prod all
$(basename "$0") prod default # api + web (worker isn't restarted unless asked)
EOF
exit 2
}
log() { printf '\033[1;34m[deploy]\033[0m %s\n' "$*" >&2; }
warn() { printf '\033[1;33m[deploy]\033[0m %s\n' "$*" >&2; }
die() { printf '\033[1;31m[deploy]\033[0m %s\n' "$*" >&2; exit 1; }
run() {
if (( dry_run )); then
printf '\033[2m[dry-run]\033[0m %s\n' "$*" >&2
else
"$@"
fi
}
ssh_run() {
local host="$1"; shift
if (( dry_run )); then
printf '\033[2m[dry-run]\033[0m ssh %s -- %s\n' "$host" "$*" >&2
else
ssh -o BatchMode=yes "$host" "$@"
fi
}
[[ $# -ge 1 ]] || usage
environment="$1"; shift
components=()
while [[ $# -gt 0 ]]; do
case "$1" in
--dry-run) dry_run=1 ;;
*) components+=("$1") ;;
esac
shift
done
[[ -f "$manifest" ]] || die "manifest not found: $manifest"
command -v yq >/dev/null 2>&1 || die "yq is required"
command -v pass >/dev/null 2>&1 || die "pass is required"
command -v rsync >/dev/null 2>&1 || die "rsync is required"
command -v cargo >/dev/null 2>&1 || die "cargo is required"
# Resolve component list ----------------------------------------------------
env_path=".environments.${environment}"
yq --exit-status "${env_path}" "$manifest" >/dev/null \
|| die "environment '$environment' not found in manifest"
mapfile -t all_components < <(yq --raw-output "${env_path}.components | keys | .[]" "$manifest")
if [[ ${#components[@]} -eq 0 ]]; then
usage
fi
case "${components[0]:-}" in
all) components=("${all_components[@]}") ;;
default) components=(api web) ;;
esac
# Build artifacts -----------------------------------------------------------
needs_rust=0
needs_web=0
for c in "${components[@]}"; do
case "$c" in
api|worker) needs_rust=1 ;;
web) needs_web=1 ;;
esac
done
if (( needs_rust )); then
log "cargo build --release (api, worker)"
run cargo build --release --bin moments-api --bin moments-worker --manifest-path "${repo_root}/Cargo.toml"
fi
if (( needs_web )); then
log "vite build (ui)"
run sh -c "cd '${repo_root}/ui' && pnpm install --frozen-lockfile && pnpm run build"
fi
# Per-component deploy ------------------------------------------------------
deploy_api() {
local host="$1"
log "api -> $host"
local bind
bind="$(yq --raw-output "${env_path}.components.api.config.bind" "$manifest")"
[[ -n "$bind" && "$bind" != "null" ]] || die "api.config.bind missing in manifest"
[[ "$bind" == *:* ]] \
|| die "api.config.bind must be host:port form: '$bind'"
local api_port
api_port="${bind##*:}"
[[ "$api_port" =~ ^[0-9]+$ ]] \
|| die "api.config.bind port is not numeric: '$api_port'"
if (( dry_run )); then
printf '\033[2m[dry-run]\033[0m render api.env (HOSTNAME=%s, BIND=%s) + firewalld svc (port=%s) + units, stage to %s:/tmp/, install via heredoc, run sysusers/restorecon/semanage/systemctl on %s\n' \
"$host" "$bind" "$api_port" "$host" "$host" >&2
return 0
fi
local fqdn="$host"
local stage
stage="$(mktemp --directory)"
trap "rm --recursive --force '$stage'" RETURN
install --directory \
"$stage/etc/moments" \
"$stage/etc/systemd/system" \
"$stage/etc/sysusers.d" \
"$stage/etc/firewalld/services" \
"$stage/usr/local/bin"
local rendered
rendered="$(<"${repo_root}/asset/config/api.env.tmpl")"
rendered=${rendered//'{{HOSTNAME}}'/$fqdn}
rendered=${rendered//'{{BIND}}'/$bind}
printf '%s\n' "$rendered" > "$stage/etc/moments/api.env"
rendered="$(<"${repo_root}/asset/systemd/moments-api-cert.path")"
rendered=${rendered//'{{HOSTNAME}}'/$fqdn}
printf '%s\n' "$rendered" > "$stage/etc/systemd/system/moments-api-cert.path"
rendered="$(<"${repo_root}/asset/firewalld/moments-api.xml.tmpl")"
rendered=${rendered//'{{API_PORT}}'/$api_port}
printf '%s\n' "$rendered" > "$stage/etc/firewalld/services/moments-api.xml"
chmod 0644 "$stage/etc/firewalld/services/moments-api.xml"
install --mode=0644 "${repo_root}/asset/systemd/moments-api.service" "$stage/etc/systemd/system/"
install --mode=0644 "${repo_root}/asset/systemd/moments-api-cert-reload.service" "$stage/etc/systemd/system/"
install --mode=0644 "${repo_root}/asset/systemd/moments.sysusers.conf" "$stage/etc/sysusers.d/moments.conf"
install --mode=0755 "${repo_root}/target/release/moments-api" "$stage/usr/local/bin/moments-api"
chmod 0640 "$stage/etc/moments/api.env"
# Stage to a tmpdir on the remote, then `install` each file at its final
# path via the heredoc. Never rsync into /, since rsync of staged parent
# dirs (etc/, usr/, ...) can leak ownership, ACLs and xattrs onto the
# live system dirs.
local remote_stage="/tmp/moments-deploy.api.${$}.${RANDOM}"
rsync \
--archive \
--hard-links \
--numeric-ids \
--rsh='ssh -o BatchMode=yes' \
"$stage/" \
"${host}:${remote_stage}/"
ssh_run "$host" "sudo bash -s -- ${remote_stage@Q} ${api_port@Q}" <<'REMOTE_EOF'
set -euo pipefail
remote_stage="$1"
api_port="$2"
trap 'rm --recursive --force "$remote_stage"' EXIT
fqdn="$(hostname --fqdn)"
install --owner=root --group=root --mode=0644 \
"$remote_stage/etc/sysusers.d/moments.conf" \
/etc/sysusers.d/moments.conf
systemd-sysusers /etc/sysusers.d/moments.conf
install --directory --owner=root --group=moments --mode=0750 /etc/moments
install --directory --owner=moments --group=moments --mode=0750 /var/lib/moments
install --owner=root --group=moments --mode=0640 \
"$remote_stage/etc/moments/api.env" \
/etc/moments/api.env
install --owner=root --group=root --mode=0644 \
"$remote_stage/etc/systemd/system/moments-api.service" \
/etc/systemd/system/moments-api.service
install --owner=root --group=root --mode=0644 \
"$remote_stage/etc/systemd/system/moments-api-cert.path" \
/etc/systemd/system/moments-api-cert.path
install --owner=root --group=root --mode=0644 \
"$remote_stage/etc/systemd/system/moments-api-cert-reload.service" \
/etc/systemd/system/moments-api-cert-reload.service
install --owner=root --group=root --mode=0644 \
"$remote_stage/etc/firewalld/services/moments-api.xml" \
/etc/firewalld/services/moments-api.xml
install --owner=root --group=root --mode=0755 \
"$remote_stage/usr/local/bin/moments-api" \
/usr/local/bin/moments-api
# Grant the moments user read access to the host private key for the
# postgres mTLS connection.
setfacl --modify=u:moments:r "/etc/pki/tls/private/${fqdn}.pem" || true
# Idempotent label: --add fails if the port is already labelled (we suppress
# that one stderr line); --modify is then a no-op or fixes a stale type.
semanage port --add --type=http_port_t --proto=tcp "$api_port" 2>/dev/null \
|| semanage port --modify --type=http_port_t --proto=tcp "$api_port"
firewall-cmd --reload
zone="$(firewall-cmd --get-default-zone)"
if ! firewall-cmd --zone="$zone" --query-service=moments-api >/dev/null 2>&1; then
firewall-cmd --permanent --zone="$zone" --add-service=moments-api
firewall-cmd --zone="$zone" --add-service=moments-api
fi
restorecon -Rv /usr/local/bin/moments-api /etc/moments /var/lib/moments
systemctl daemon-reload
systemctl enable --now moments-api-cert.path
systemctl enable --now moments-api.service
systemctl restart moments-api.service
# Quietly retry while the service binds; only show curl's diagnostics if
# every attempt fails. The journalctl tail on failure is the verbose source.
for i in 1 2 3 4 5 6 7 8 9 10; do
if curl --fail --silent "http://${fqdn}:${api_port}/v1/healthz" >/dev/null 2>&1; then
echo "moments-api healthy"
exit 0
fi
sleep 1
done
echo "moments-api did not become healthy" >&2
curl --fail --silent --show-error "http://${fqdn}:${api_port}/v1/healthz" >/dev/null || true
journalctl --unit=moments-api.service --lines=50 --no-pager >&2
exit 1
REMOTE_EOF
}
deploy_worker() {
local host="$1"
log "worker -> $host"
# Manifest entries under `worker.secrets` map env-var name -> pass store path.
# The script fetches each via `pass` and substitutes the matching {{NAME}}
# placeholder in worker.env.tmpl. Adding a new secret is then a manifest +
# template change; no script edit required.
local -a secret_lines secret_keys
mapfile -t secret_lines < <(yq --raw-output \
"${env_path}.components.worker.secrets // {} | to_entries | .[] | \"\(.key)=\(.value)\"" \
"$manifest")
local line
for line in "${secret_lines[@]}"; do
[[ -n "$line" ]] && secret_keys+=("${line%%=*}")
done
if (( dry_run )); then
printf '\033[2m[dry-run]\033[0m render worker.env (HOSTNAME=%s, secrets [%s] from pass) + units, stage to %s:/tmp/, install via heredoc, run sysusers/restorecon/systemctl on %s\n' \
"$host" "${secret_keys[*]:-none}" "$host" "$host" >&2
return 0
fi
local fqdn="$host"
local stage
stage="$(mktemp --directory)"
trap "rm --recursive --force '$stage'" RETURN
install --directory \
"$stage/etc/moments" \
"$stage/etc/systemd/system" \
"$stage/etc/sysusers.d" \
"$stage/usr/local/bin"
# Render templates in-memory so secrets never appear on a command line
# (sed would expose them to anything that can read /proc/<pid>/cmdline).
local rendered
rendered="$(<"${repo_root}/asset/config/worker.env.tmpl")"
rendered=${rendered//'{{HOSTNAME}}'/$fqdn}
local key pass_path value
for line in "${secret_lines[@]}"; do
[[ -z "$line" ]] && continue
key="${line%%=*}"
pass_path="${line#*=}"
if pass show "$pass_path" >/dev/null 2>&1; then
value="$(pass show "$pass_path")"
else
warn "no secret in pass at '${pass_path}' for ${key}; worker will run without ${key}"
value=""
fi
rendered=${rendered//"{{${key}}}"/$value}
done
printf '%s\n' "$rendered" > "$stage/etc/moments/worker.env"
rendered="$(<"${repo_root}/asset/systemd/moments-worker-cert.path")"
rendered=${rendered//'{{HOSTNAME}}'/$fqdn}
printf '%s\n' "$rendered" > "$stage/etc/systemd/system/moments-worker-cert.path"
install --mode=0644 "${repo_root}/asset/systemd/moments-worker.service" "$stage/etc/systemd/system/"
install --mode=0644 "${repo_root}/asset/systemd/moments-worker-cert-reload.service" "$stage/etc/systemd/system/"
install --mode=0644 "${repo_root}/asset/systemd/moments.sysusers.conf" "$stage/etc/sysusers.d/moments.conf"
install --mode=0755 "${repo_root}/target/release/moments-worker" "$stage/usr/local/bin/moments-worker"
chmod 0640 "$stage/etc/moments/worker.env"
# Stage to a tmpdir on the remote, then `install` each file at its final
# path via the heredoc. Never rsync into /.
local remote_stage="/tmp/moments-deploy.worker.${$}.${RANDOM}"
rsync \
--archive \
--hard-links \
--numeric-ids \
--rsh='ssh -o BatchMode=yes' \
"$stage/" \
"${host}:${remote_stage}/"
ssh_run "$host" "sudo bash -s -- ${remote_stage@Q}" <<'REMOTE_EOF'
set -euo pipefail
remote_stage="$1"
trap 'rm --recursive --force "$remote_stage"' EXIT
fqdn="$(hostname --fqdn)"
install --owner=root --group=root --mode=0644 \
"$remote_stage/etc/sysusers.d/moments.conf" \
/etc/sysusers.d/moments.conf
systemd-sysusers /etc/sysusers.d/moments.conf
install --directory --owner=root --group=moments --mode=0750 /etc/moments
install --directory --owner=moments --group=moments --mode=0750 /var/lib/moments
install --owner=root --group=moments --mode=0640 \
"$remote_stage/etc/moments/worker.env" \
/etc/moments/worker.env
install --owner=root --group=root --mode=0644 \
"$remote_stage/etc/systemd/system/moments-worker.service" \
/etc/systemd/system/moments-worker.service
install --owner=root --group=root --mode=0644 \
"$remote_stage/etc/systemd/system/moments-worker-cert.path" \
/etc/systemd/system/moments-worker-cert.path
install --owner=root --group=root --mode=0644 \
"$remote_stage/etc/systemd/system/moments-worker-cert-reload.service" \
/etc/systemd/system/moments-worker-cert-reload.service
install --owner=root --group=root --mode=0755 \
"$remote_stage/usr/local/bin/moments-worker" \
/usr/local/bin/moments-worker
setfacl --modify=u:moments:r "/etc/pki/tls/private/${fqdn}.pem" || true
restorecon -Rv /usr/local/bin/moments-worker /etc/moments /var/lib/moments
systemctl daemon-reload
systemctl enable --now moments-worker-cert.path
systemctl enable --now moments-worker.service
systemctl restart moments-worker.service
if ! systemctl is-active --quiet moments-worker.service; then
journalctl --unit=moments-worker.service --lines=50 --no-pager >&2
exit 1
fi
echo "moments-worker active"
REMOTE_EOF
}
deploy_web() {
local host="$1"
log "web -> $host"
local server_name web_root api_upstream
server_name="$(yq --raw-output "${env_path}.components.web.config.server_name" "$manifest")"
web_root="$(yq --raw-output "${env_path}.components.web.config.root" "$manifest")"
api_upstream="$(yq --raw-output "${env_path}.components.web.config.api_upstream" "$manifest")"
[[ -n "$server_name" && "$server_name" != "null" ]] || die "web.config.server_name missing in manifest"
[[ -n "$web_root" && "$web_root" != "null" ]] || die "web.config.root missing in manifest"
[[ -n "$api_upstream" && "$api_upstream" != "null" ]] || die "web.config.api_upstream missing in manifest"
[[ "$web_root" == /* ]] \
|| die "web.config.root must be an absolute path: '$web_root'"
[[ "$api_upstream" == http://* || "$api_upstream" == https://* ]] \
|| die "web.config.api_upstream must be a http(s) URL: '$api_upstream'"
local api_upstream_scheme api_upstream_addr api_upstream_port
api_upstream_scheme="${api_upstream%%://*}"
api_upstream_addr="${api_upstream#*://}"
[[ "$api_upstream_addr" == *:* ]] \
|| die "web.config.api_upstream must include an explicit port: '$api_upstream'"
api_upstream_port="${api_upstream_addr##*:}"
[[ "$api_upstream_port" =~ ^[0-9]+$ ]] \
|| die "extracted upstream port is not numeric: '$api_upstream_port'"
local site_conf_path="/etc/nginx/conf.d/${server_name}.conf"
if (( dry_run )); then
printf '\033[2m[dry-run]\033[0m render %s (server_name=%s, docroot=%s, upstream=%s://%s) + rsync ui/dist/ to %s:%s/, run nginx -t/reload on %s\n' \
"$site_conf_path" "$server_name" "$web_root" \
"$api_upstream_scheme" "$api_upstream_addr" \
"$host" "$web_root" "$host" >&2
return 0
fi
local stage
stage="$(mktemp --directory)"
trap "rm --recursive --force '$stage'" RETURN
install --directory "${stage}${web_root}" "$stage/etc/nginx/conf.d"
rsync --archive "${repo_root}/ui/dist/" "${stage}${web_root}/"
local rendered
rendered="$(<"${repo_root}/asset/nginx/site.conf.tmpl")"
rendered=${rendered//'{{SERVER_NAME}}'/$server_name}
rendered=${rendered//'{{DOCROOT}}'/$web_root}
rendered=${rendered//'{{API_UPSTREAM_SCHEME}}'/$api_upstream_scheme}
rendered=${rendered//'{{API_UPSTREAM_ADDR}}'/$api_upstream_addr}
printf '%s\n' "$rendered" > "${stage}${site_conf_path}"
chmod 0644 "${stage}${site_conf_path}"
# Both targets are leaf paths (the docroot itself, and a single named
# file) so rsync does not traverse /var or /etc parents — `--chown` is
# enough; -A/-X are intentionally absent.
rsync \
--archive \
--hard-links \
--numeric-ids \
--chown root:root \
--rsh='ssh -o BatchMode=yes' \
--rsync-path 'sudo rsync' \
--delete \
"${stage}${web_root}/" \
"${host}:${web_root}/"
rsync \
--archive \
--hard-links \
--numeric-ids \
--chown root:root \
--rsh='ssh -o BatchMode=yes' \
--rsync-path 'sudo rsync' \
"${stage}${site_conf_path}" \
"${host}:${site_conf_path}"
ssh_run "$host" "sudo bash -s -- ${web_root@Q} ${site_conf_path@Q} ${api_upstream_port@Q}" <<'REMOTE_EOF'
set -euo pipefail
web_root="$1"
site_conf_path="$2"
api_upstream_port="$3"
# Allow nginx to make outbound connections to the moments-api upstream
# across the WG mesh.
setsebool -P httpd_can_network_connect on
# Idempotent label: --add fails if the port is already labelled (we suppress
# that one stderr line); --modify is then a no-op or fixes a stale type.
semanage port --add --type=http_port_t --proto=tcp "$api_upstream_port" 2>/dev/null \
|| semanage port --modify --type=http_port_t --proto=tcp "$api_upstream_port"
restorecon -Rv "$web_root" "$site_conf_path"
if ! nginx -t; then
echo "nginx config check failed" >&2
exit 1
fi
systemctl reload nginx
echo "nginx reloaded"
REMOTE_EOF
}
# Dispatch ------------------------------------------------------------------
failed=()
for component in "${components[@]}"; do
mapfile -t hosts < <(yq --raw-output "${env_path}.components.${component}.hosts[]" "$manifest")
for host in "${hosts[@]}"; do
case "$component" in
api) deploy_api "$host" || failed+=("api@$host") ;;
worker) deploy_worker "$host" || failed+=("worker@$host") ;;
web) deploy_web "$host" || failed+=("web@$host") ;;
*) warn "unknown component: $component" ;;
esac
done
done
if [[ ${#failed[@]} -gt 0 ]]; then
die "failed: ${failed[*]}"
fi
log "deploy complete"