Files
mm/script/deploy.sh
rob thijssen a986bb72c4 fix(deploy): use port 17380 and add journal diagnostics on failure
- Change mm-dash default bind from 3000 to 17380 to avoid collisions
  with existing services on shared hosts
- Add journal output for failed units in the verification step so
  startup errors are immediately visible in deploy output
- Stop services before restarting to release ports cleanly

Tested end-to-end: all 7 services active, nginx configured and
reloading, postgres bootstrapped with mTLS ident mapping.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-23 08:40:56 +03:00

298 lines
9.6 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT_DIR="$(dirname "$SCRIPT_DIR")"
MANIFEST="$ROOT_DIR/asset/manifest.yml"
APP="mm"
SERVICES=(mm-scan mm-index mm-analyze mm-rank mm-act mm-dash mm-sched)
DRY_RUN=false
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
die() { echo "ERROR: $*" >&2; exit 1; }
info() { echo "==> $*"; }
warn() { echo "WARN: $*" >&2; }
run() {
if $DRY_RUN; then
echo "[dry-run] $*"
else
"$@"
fi
}
ssh_run() {
local host="$1"; shift
if $DRY_RUN; then
echo "[dry-run] ssh $host: $*"
else
ssh -o BatchMode=yes "$host" "$@"
fi
}
# ---------------------------------------------------------------------------
# Parse args
# ---------------------------------------------------------------------------
ENV=""
COMPONENTS=()
while [[ $# -gt 0 ]]; do
case "$1" in
--dry-run) DRY_RUN=true; shift ;;
-*) die "unknown flag: $1" ;;
*)
if [[ -z "$ENV" ]]; then
ENV="$1"
else
COMPONENTS+=("$1")
fi
shift
;;
esac
done
[[ -n "$ENV" ]] || die "usage: $0 <environment> [component...] [--dry-run]"
[[ -f "$MANIFEST" ]] || die "manifest not found: $MANIFEST"
# Resolve components
if [[ ${#COMPONENTS[@]} -eq 0 ]] || [[ "${COMPONENTS[0]}" == "default" ]] || [[ "${COMPONENTS[0]}" == "all" ]]; then
COMPONENTS=(services postgres nginx)
fi
# ---------------------------------------------------------------------------
# Read manifest
# ---------------------------------------------------------------------------
manifest_get() {
yq -r "$1" "$MANIFEST"
}
APP_HOST=$(manifest_get ".environments.$ENV.components.services.hosts[0]")
PG_HOST=$(manifest_get ".environments.$ENV.components.postgres.hosts[0]")
PG_STANDBY=$(manifest_get ".environments.$ENV.components.postgres.standby[0]")
NGINX_HOST=$(manifest_get ".environments.$ENV.components.nginx.hosts[0]")
[[ "$APP_HOST" != "null" ]] || die "no services host in manifest for env=$ENV"
info "environment: $ENV"
info "app host: $APP_HOST"
info "pg primary: $PG_HOST"
info "pg standby: $PG_STANDBY"
info "nginx host: $NGINX_HOST"
echo
# ---------------------------------------------------------------------------
# Component: postgres
# ---------------------------------------------------------------------------
deploy_postgres() {
info "--- deploying postgres component ---"
# Bootstrap role + database on primary
info "bootstrapping database on $PG_HOST"
run rsync -az "$ROOT_DIR/asset/sql/bootstrap.sql" "$PG_HOST:/tmp/mm-bootstrap.sql"
ssh_run "$PG_HOST" "sudo -u postgres psql -f /tmp/mm-bootstrap.sql && rm /tmp/mm-bootstrap.sql"
# Deploy pg_ident mapping to both primary and standby
local ident_line="cert_cn ${APP_HOST} mm"
local ident_file="/var/lib/pgsql/18/data/pg_ident.conf.d/${APP_HOST}.conf"
for pg_server in "$PG_HOST" "$PG_STANDBY"; do
[[ "$pg_server" != "null" ]] || continue
info "deploying ident mapping to $pg_server"
ssh_run "$pg_server" "
sudo mkdir -p /var/lib/pgsql/18/data/pg_ident.conf.d
echo '$ident_line' | sudo tee '$ident_file' > /dev/null
sudo systemctl reload postgresql-18
"
done
info "postgres component done"
}
# ---------------------------------------------------------------------------
# Component: services
# ---------------------------------------------------------------------------
deploy_services() {
info "--- deploying services component ---"
# Build if needed
local target_dir="$ROOT_DIR/target/release"
local needs_build=false
for svc in "${SERVICES[@]}"; do
if [[ ! -f "$target_dir/$svc" ]]; then
needs_build=true
break
fi
done
if $needs_build; then
info "building release binaries"
run cargo build --release --workspace --manifest-path "$ROOT_DIR/Cargo.toml"
else
info "release binaries up to date"
fi
# Render config template
info "rendering config template"
local rendered
rendered=$(cat "$ROOT_DIR/asset/config/config.toml.tmpl")
local gitea_token
if $DRY_RUN; then
gitea_token="<redacted>"
else
gitea_token=$(pass git.lair.cafe/grenade/admin-token 2>/dev/null) \
|| die "failed to read gitea token from pass store"
fi
rendered="${rendered//\{\{GITEA_TOKEN\}\}/$gitea_token}"
# Deploy sysusers drop-in and create service account first — everything
# else depends on the mm user/group existing.
info "bootstrapping service account on $APP_HOST"
if ! $DRY_RUN; then
cat "$ROOT_DIR/asset/systemd/mm.sysusers.conf" | ssh -o BatchMode=yes "$APP_HOST" "
sudo mkdir -p /etc/sysusers.d
sudo tee /etc/sysusers.d/mm.conf > /dev/null
sudo systemd-sysusers
"
else
echo "[dry-run] would deploy sysusers drop-in and create mm user on $APP_HOST"
fi
# Create directories (mm user/group now exists)
info "creating directories on $APP_HOST"
ssh_run "$APP_HOST" "
sudo mkdir -p /etc/mm /var/lib/mm
sudo chown root:mm /etc/mm
sudo chmod 0750 /etc/mm
sudo chown mm:mm /var/lib/mm
sudo chmod 0750 /var/lib/mm
sudo setfacl -m u:mm:r /etc/pki/tls/private/\$(hostname -f).pem || true
"
# rsync binaries
info "deploying binaries to $APP_HOST"
for svc in "${SERVICES[@]}"; do
run rsync -az --rsync-path 'sudo rsync' "$target_dir/$svc" "$APP_HOST:/usr/local/bin/$svc"
done
# Deploy config (piped via ssh to avoid secrets on local disk)
info "deploying config"
if ! $DRY_RUN; then
echo "$rendered" | ssh -o BatchMode=yes "$APP_HOST" "
sudo tee /etc/mm/config.toml > /dev/null
sudo chown root:mm /etc/mm/config.toml
sudo chmod 0640 /etc/mm/config.toml
"
else
echo "[dry-run] would deploy rendered config to $APP_HOST:/etc/mm/config.toml"
fi
# rsync systemd units
info "deploying systemd units"
for svc in "${SERVICES[@]}"; do
run rsync -az --rsync-path 'sudo rsync' "$ROOT_DIR/asset/systemd/$svc.service" "$APP_HOST:/etc/systemd/system/$svc.service"
done
# rsync firewalld service
run rsync -az --rsync-path 'sudo rsync' "$ROOT_DIR/asset/firewalld/mm-dash.xml" "$APP_HOST:/etc/firewalld/services/mm-dash.xml"
# Post-deploy configuration
info "configuring $APP_HOST"
ssh_run "$APP_HOST" "
# SELinux
sudo restorecon -R /usr/local/bin/mm-* /etc/mm /var/lib/mm 2>/dev/null || true
# Firewall
sudo firewall-cmd --reload
zone=\$(sudo firewall-cmd --get-default-zone)
if ! sudo firewall-cmd --zone=\$zone --query-service=mm-dash 2>/dev/null; then
sudo firewall-cmd --permanent --zone=\$zone --add-service=mm-dash
sudo firewall-cmd --zone=\$zone --add-service=mm-dash
fi
# systemd — stop first to release ports, then reload and restart
sudo systemctl daemon-reload
for svc in ${SERVICES[*]}; do
sudo systemctl stop \$svc 2>/dev/null || true
done
for svc in ${SERVICES[*]}; do
sudo systemctl enable --now \$svc
done
"
# Health check
info "verifying services"
for svc in "${SERVICES[@]}"; do
if ! $DRY_RUN; then
if ssh_run "$APP_HOST" "systemctl is-active --quiet $svc"; then
info " $svc: active"
else
warn " $svc: NOT active"
ssh -o BatchMode=yes "$APP_HOST" "sudo journalctl --no-pager -l --unit $svc -n 10" 2>/dev/null || true
echo
fi
fi
done
info "services component done"
}
# ---------------------------------------------------------------------------
# Component: nginx
# ---------------------------------------------------------------------------
deploy_nginx() {
info "--- deploying nginx component ---"
# Ensure sites-available and sites-enabled exist before rsync
ssh_run "$NGINX_HOST" "
sudo mkdir -p /etc/nginx/sites-available /etc/nginx/sites-enabled
# Add include directive to nginx.conf if not already present
if ! grep -q 'sites-enabled' /etc/nginx/nginx.conf; then
sudo sed -i '/http {/a \\ include /etc/nginx/sites-enabled/*.conf;' /etc/nginx/nginx.conf
fi
"
run rsync -az --rsync-path 'sudo rsync' "$ROOT_DIR/asset/nginx/mm.kosherinata.conf" \
"$NGINX_HOST:/etc/nginx/sites-available/mm.internal.conf"
ssh_run "$NGINX_HOST" "
# Symlink if not exists
if [[ ! -L /etc/nginx/sites-enabled/mm.internal.conf ]]; then
sudo ln -s /etc/nginx/sites-available/mm.internal.conf /etc/nginx/sites-enabled/mm.internal.conf
fi
# SELinux: allow nginx to proxy to backend
sudo setsebool -P httpd_can_network_connect on 2>/dev/null || true
# Validate and reload
sudo nginx -t && sudo systemctl reload nginx
"
info "nginx component done"
}
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
for component in "${COMPONENTS[@]}"; do
case "$component" in
services) deploy_services ;;
postgres) deploy_postgres ;;
nginx) deploy_nginx ;;
*) warn "unknown component: $component" ;;
esac
done
info "deployment complete"