From 8a2334eacb07a3e14ff92c3fc40fb0de5d376b2f Mon Sep 17 00:00:00 2001 From: rob thijssen Date: Mon, 18 May 2026 18:55:02 +0300 Subject: [PATCH] deploy: dnf-native version check + lair.cafe repo bootstrap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the string compare of 'git describe --tags' vs the binary's self-reported --version (which lies about prereleases — every 0.1.16-* RPM reports just "0.1.16") with the dnf-native question of "is the installed package current against what the repo offers". Mechanism: - installed_nvr(): rpm -q --qf '%{version}-%{release}' for the resident package, falling back to "(not installed)". Capturing rpm's output through a variable keeps its "package X is not installed" stdout message out of the result on failure. - needs_update(): probes rpm -q first (treats absent as "needs work"), then asks dnf check-update --refresh -q. Other dnf failures collapse into "needs update" so the subsequent install surfaces a real error rather than this check swallowing one silently. - ensure_lair_repo(): probes for /etc/yum.repos.d/lair-cafe-unstable.repo and adds it with `dnf config-manager addrepo` when missing. The upstream .repo file ships enabled=0 (unstable channel doesn't auto-engage on fetch), so we then run `dnf config-manager setopt lair-cafe-unstable.enabled=1` every run — cheap, idempotent. - Cortex and neuron install branches now guard `systemctl stop` with `[ ! -f /usr/lib/systemd/system/...service ] || sudo systemctl stop` so fresh installs (no unit file yet) don't short-circuit the install step under set -e. - dnf output is captured into a variable and only printed (with a [host] prefix per line) on failure, so success stays quiet and failures show the actual diagnostic instead of being eaten by &> /dev/null. Co-Authored-By: Claude Opus 4.7 (1M context) --- script/deploy.sh | 186 +++++++++++++++++++++++++++++++---------------- 1 file changed, 125 insertions(+), 61 deletions(-) diff --git a/script/deploy.sh b/script/deploy.sh index c0b3448..379df6d 100755 --- a/script/deploy.sh +++ b/script/deploy.sh @@ -27,63 +27,124 @@ mapfile -t neuron_entries < <( yq -r '.neurons[] | .host + "\t" + .flavour' "${MANIFEST}" ) -latest_helexa_version=$(git -C "${REPO_DIR}" describe --tags --abbrev=0 | sed 's/^v//') +# Return the installed package's "version-release" string, or +# "(not installed)" when rpm reports the package as absent. Capture +# rpm's output into a variable so its "package X is not installed" +# stdout message (rpm writes that to stdout, not stderr, when -q fails) +# doesn't leak into the result. +installed_nvr() { + local host="$1" pkg="$2" + local nvr + if nvr=$(ssh "${host}" "rpm -q --qf '%{version}-%{release}' ${pkg} 2>/dev/null"); then + echo "${nvr}" + else + echo "(not installed)" + fi +} + +# Ensure the rpm.lair.cafe unstable repo is configured AND enabled on +# the remote host. +# +# The upstream .repo file at https://rpm.lair.cafe/lair-cafe-unstable.repo +# ships with `enabled=0` so a host that just fetched it won't start +# pulling unstable packages by accident. We have to explicitly flip +# enabled=1 via `dnf config-manager setopt`. Both addrepo and setopt +# are idempotent. +# +# Non-fatal — if either step fails the subsequent `dnf install` will +# surface a clearer diagnostic on its own. +ensure_lair_repo() { + local host="$1" + if ! ssh "${host}" "test -f /etc/yum.repos.d/lair-cafe-unstable.repo" 2>/dev/null; then + echo "[${host}] adding rpm.lair.cafe unstable repo" + if ! ssh "${host}" sudo dnf config-manager addrepo \ + --from-repofile=https://rpm.lair.cafe/lair-cafe-unstable.repo \ + >/dev/null 2>&1; then + echo "[${host}] WARNING: failed to add lair.cafe repo file (proceeding anyway)" + return 0 + fi + fi + # The .repo file ships enabled=0; flip it on. Cheap, idempotent. + if ! ssh "${host}" sudo dnf config-manager setopt \ + lair-cafe-unstable.enabled=1 >/dev/null 2>&1; then + echo "[${host}] WARNING: failed to enable lair-cafe-unstable (proceeding anyway)" + fi +} + +# True when the named package needs to be installed or upgraded on the +# remote host — either it's not present, or a newer version exists in +# the repo. False only when the installed version is current. +# +# `dnf check-update ` returns 0 when the package isn't installed +# at all (there's nothing to update), so we have to probe with rpm -q +# first to distinguish "absent" from "current". Other dnf failures +# collapse into "needs update" so the subsequent install step surfaces +# the real diagnostic rather than this check swallowing it. +needs_update() { + local host="$1" pkg="$2" + # Not installed → needs work. + if ! ssh "${host}" "rpm -q ${pkg}" >/dev/null 2>&1; then + return 0 + fi + # Installed; ask dnf whether the repo has something newer. + if ssh "${host}" sudo dnf check-update --refresh -q "${pkg}" >/dev/null 2>&1; then + return 1 + else + return 0 + fi +} # --------------------------------------------------------------------------- # cortex (gateway) # --------------------------------------------------------------------------- -observed_cortex_version=$(ssh "${cortex_host}" cortex --version | sed 's/^cortex //') -if [[ "${latest_helexa_version}" = "${observed_cortex_version}" ]]; then - echo "[${cortex_host}] cortex is up to date (${observed_cortex_version})" - if ssh "${cortex_host}" sudo systemctl stop cortex.service && rsync \ - --archive \ - --compress \ - --rsync-path 'sudo rsync' \ - --chown root:root \ - --chmod 644 \ - "${REPO_DIR}/cortex.toml" \ - "${cortex_host}:/etc/cortex/cortex.toml"; then - echo "[${cortex_host}] sync'd cortex.toml" - ssh "${cortex_host}" sudo systemctl daemon-reload - ssh "${cortex_host}" sudo systemctl start cortex.service - else - echo "[${cortex_host}] failed to sync cortex.toml" - fi - if ssh "${cortex_host}" systemctl is-active --quiet cortex.service; then - echo "[${cortex_host}] cortex service is active" - elif ssh "${cortex_host}" sudo systemctl start cortex.service; then - echo "[${cortex_host}] started cortex service" - else - echo "[${cortex_host}] failed to start cortex service" - fi -else - echo "[${cortex_host}] cortex is out of date (${observed_cortex_version} != ${latest_helexa_version})" - if ssh "${cortex_host}" sudo systemctl stop cortex.service; then +ensure_lair_repo "${cortex_host}" +cortex_nvr=$(installed_nvr "${cortex_host}" cortex) +if needs_update "${cortex_host}" cortex; then + echo "[${cortex_host}] cortex update available (current: ${cortex_nvr})" + # Stop the service only if the unit file exists — fresh installs + # don't have it, and `systemctl stop` on a missing unit returns + # non-zero, which would otherwise short-circuit the install branch + # under set -e. + if ssh "${cortex_host}" "[ ! -f /usr/lib/systemd/system/cortex.service ] || sudo systemctl stop cortex.service"; then echo "[${cortex_host}] stopped cortex service" - if ssh "${cortex_host}" sudo dnf upgrade --refresh -y cortex; then - echo "[${cortex_host}] upgraded cortex" - if rsync \ - --archive \ - --compress \ - --verbose \ - --rsync-path 'sudo rsync' \ - --chown root:root \ - --chmod 644 \ - "${REPO_DIR}/cortex.toml" \ - "${cortex_host}:/etc/cortex/cortex.toml"; then - echo "[${cortex_host}] sync'd cortex.toml" - ssh "${cortex_host}" sudo systemctl daemon-reload - ssh "${cortex_host}" sudo systemctl start cortex.service - else - echo "[${cortex_host}] failed to sync cortex.toml" - fi + if dnf_output=$(ssh "${cortex_host}" sudo dnf install --refresh --allowerasing -y cortex 2>&1); then + cortex_nvr=$(installed_nvr "${cortex_host}" cortex) + echo "[${cortex_host}] installed/upgraded cortex to ${cortex_nvr}" else - echo "[${cortex_host}] failed to upgrade cortex" + echo "[${cortex_host}] failed to install/upgrade cortex:" + echo "${dnf_output}" | sed "s/^/[${cortex_host}] /" fi else echo "[${cortex_host}] failed to stop cortex service" fi +else + echo "[${cortex_host}] cortex is up to date (${cortex_nvr})" + ssh "${cortex_host}" sudo systemctl stop cortex.service || true +fi + +# Sync cortex.toml whether the package was upgraded or not — the config +# can change without a package bump. +if rsync \ + --archive \ + --compress \ + --rsync-path 'sudo rsync' \ + --chown root:root \ + --chmod 644 \ + "${REPO_DIR}/cortex.toml" \ + "${cortex_host}:/etc/cortex/cortex.toml"; then + echo "[${cortex_host}] sync'd cortex.toml" +else + echo "[${cortex_host}] failed to sync cortex.toml" +fi + +ssh "${cortex_host}" sudo systemctl daemon-reload +if ssh "${cortex_host}" systemctl is-active --quiet cortex.service; then + echo "[${cortex_host}] cortex service is active" +elif ssh "${cortex_host}" sudo systemctl start cortex.service; then + echo "[${cortex_host}] started cortex service" +else + echo "[${cortex_host}] failed to start cortex service" fi # --------------------------------------------------------------------------- @@ -94,26 +155,19 @@ for entry in "${neuron_entries[@]}"; do IFS=$'\t' read -r neuron_host neuron_flavour <<< "${entry}" package="helexa-neuron-${neuron_flavour}" - observed_neuron_version=$(ssh "${neuron_host}" neuron --version 2> /dev/null | sed 's/^neuron //' || true) - if [[ "${latest_helexa_version}" = "${observed_neuron_version}" ]]; then - echo "[${neuron_host}] neuron is up to date (${observed_neuron_version}, ${package})" - if ssh "${neuron_host}" systemctl is-active --quiet neuron.service; then - echo "[${neuron_host}] neuron service is active" - elif ssh "${neuron_host}" sudo systemctl start neuron.service; then - echo "[${neuron_host}] started neuron service" - else - echo "[${neuron_host}] failed to start neuron service" - fi - else - echo "[${neuron_host}] upgrading neuron from ${observed_neuron_version:-(absent)} to ${latest_helexa_version} (${package})" + ensure_lair_repo "${neuron_host}" + neuron_nvr=$(installed_nvr "${neuron_host}" "${package}") + if needs_update "${neuron_host}" "${package}"; then + echo "[${neuron_host}] ${package} update available (current: ${neuron_nvr})" if ssh "${neuron_host}" "[ ! -f /usr/lib/systemd/system/neuron.service ] || sudo systemctl stop neuron.service"; then echo "[${neuron_host}] stopped neuron service" # --allowerasing lets dnf swap out a previously-installed # bare helexa-neuron or a different flavour without manual # intervention. The Conflicts: clauses in the spec ensure # only one flavour is ever resident. - if ssh "${neuron_host}" sudo dnf install --refresh --allowerasing -y "${package}" &> /dev/null; then - echo "[${neuron_host}] installed/upgraded ${package}" + if dnf_output=$(ssh "${neuron_host}" sudo dnf install --refresh --allowerasing -y "${package}" 2>&1); then + neuron_nvr=$(installed_nvr "${neuron_host}" "${package}") + echo "[${neuron_host}] installed/upgraded ${package} to ${neuron_nvr}" # Ensure firewalld allows neuron port ssh "${neuron_host}" "sudo firewall-cmd --query-service=helexa-neuron --quiet 2>/dev/null || sudo firewall-cmd --add-service=helexa-neuron --permanent && sudo firewall-cmd --reload" 2>/dev/null || true if ssh "${neuron_host}" "sudo systemctl daemon-reload && sudo systemctl start neuron.service"; then @@ -122,10 +176,20 @@ for entry in "${neuron_entries[@]}"; do echo "[${neuron_host}] failed to start neuron service" fi else - echo "[${neuron_host}] failed to install ${package}" + echo "[${neuron_host}] failed to install ${package}:" + echo "${dnf_output}" | sed "s/^/[${neuron_host}] /" fi else echo "[${neuron_host}] failed to stop neuron service" fi + else + echo "[${neuron_host}] ${package} is up to date (${neuron_nvr})" + if ssh "${neuron_host}" systemctl is-active --quiet neuron.service; then + echo "[${neuron_host}] neuron service is active" + elif ssh "${neuron_host}" sudo systemctl start neuron.service; then + echo "[${neuron_host}] started neuron service" + else + echo "[${neuron_host}] failed to start neuron service" + fi fi done