Compare commits
2 Commits
03bed93fee
...
249c9442e8
| Author | SHA1 | Date | |
|---|---|---|---|
|
249c9442e8
|
|||
|
5e17081fb4
|
@@ -57,21 +57,14 @@ jobs:
|
|||||||
build-cortex:
|
build-cortex:
|
||||||
name: Build cortex binary
|
name: Build cortex binary
|
||||||
needs: prepare
|
needs: prepare
|
||||||
|
# runner-rust image already provides rust/cargo/clippy/rustfmt via
|
||||||
|
# dnf — no rustup install step needed.
|
||||||
runs-on: rust
|
runs-on: rust
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
ref: ${{ inputs.ref }}
|
ref: ${{ inputs.ref }}
|
||||||
|
|
||||||
- name: Install/update Rust toolchain
|
|
||||||
run: |
|
|
||||||
if command -v rustup &> /dev/null; then
|
|
||||||
rustup update stable
|
|
||||||
else
|
|
||||||
curl --proto '=https' --tlsv1.2 --silent --show-error --fail https://sh.rustup.rs | sh -s -- -y
|
|
||||||
fi
|
|
||||||
echo "${HOME}/.cargo/bin" >> "$GITHUB_PATH"
|
|
||||||
|
|
||||||
- name: Build cortex (release)
|
- name: Build cortex (release)
|
||||||
run: cargo build --release -p cortex-cli
|
run: cargo build --release -p cortex-cli
|
||||||
|
|
||||||
@@ -115,21 +108,14 @@ jobs:
|
|||||||
build_jobs: 8
|
build_jobs: 8
|
||||||
nvcc_threads: 4
|
nvcc_threads: 4
|
||||||
cargo_features: "cuda cudnn flash-attn"
|
cargo_features: "cuda cudnn flash-attn"
|
||||||
|
# runner-cuda-13.0 extends runner-rust, so rust/cargo are already
|
||||||
|
# present via dnf — no rustup install step needed.
|
||||||
runs-on: ${{ matrix.runner }}
|
runs-on: ${{ matrix.runner }}
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
ref: ${{ inputs.ref }}
|
ref: ${{ inputs.ref }}
|
||||||
|
|
||||||
- name: Install/update Rust toolchain
|
|
||||||
run: |
|
|
||||||
if command -v rustup &> /dev/null; then
|
|
||||||
rustup update stable
|
|
||||||
else
|
|
||||||
curl --proto '=https' --tlsv1.2 --silent --show-error --fail https://sh.rustup.rs | sh -s -- -y
|
|
||||||
fi
|
|
||||||
echo "${HOME}/.cargo/bin" >> "$GITHUB_PATH"
|
|
||||||
|
|
||||||
- name: Build neuron with CUDA (${{ matrix.flavour }})
|
- name: Build neuron with CUDA (${{ matrix.flavour }})
|
||||||
run: |
|
run: |
|
||||||
set -eux
|
set -eux
|
||||||
|
|||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -5,4 +5,3 @@
|
|||||||
.vscode/
|
.vscode/
|
||||||
cortex.toml
|
cortex.toml
|
||||||
doc/plan/*
|
doc/plan/*
|
||||||
script/deploy.sh
|
|
||||||
|
|||||||
131
script/deploy.sh
Executable file
131
script/deploy.sh
Executable file
@@ -0,0 +1,131 @@
|
|||||||
|
#!/bin/env bash
|
||||||
|
#
|
||||||
|
# Rolling deploy across the helexa fleet, driven by asset/manifest.yml.
|
||||||
|
# Installs / upgrades cortex on the gateway host and the appropriate
|
||||||
|
# helexa-neuron-<flavour> package on each neuron host, then restarts
|
||||||
|
# their services.
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
REPO_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
||||||
|
MANIFEST="${REPO_DIR}/asset/manifest.yml"
|
||||||
|
|
||||||
|
if [[ ! -f "${MANIFEST}" ]]; then
|
||||||
|
echo "fatal: manifest not found at ${MANIFEST}" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Parse the manifest with yq. NOTE: this expects the pip-installed yq
|
||||||
|
# (a jq wrapper using jq syntax) — `pip install yq`. The Fedora rpm
|
||||||
|
# `yq` is mikefarah/yq and uses different (yaml-native) syntax; if a
|
||||||
|
# host has that one instead these queries will fail.
|
||||||
|
cortex_host=$(yq -r '.cortex.host' "${MANIFEST}")
|
||||||
|
|
||||||
|
# Emit one TAB-separated 'host\tflavour' line per neuron.
|
||||||
|
mapfile -t neuron_entries < <(
|
||||||
|
yq -r '.neurons[] | .host + "\t" + .flavour' "${MANIFEST}"
|
||||||
|
)
|
||||||
|
|
||||||
|
latest_helexa_version=$(git -C "${REPO_DIR}" describe --tags --abbrev=0 | sed 's/^v//')
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# cortex (gateway)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
observed_cortex_version=$(ssh "${cortex_host}" cortex --version | sed 's/^cortex //')
|
||||||
|
if [[ "${latest_helexa_version}" = "${observed_cortex_version}" ]]; then
|
||||||
|
echo "[${cortex_host}] cortex is up to date (${observed_cortex_version})"
|
||||||
|
if ssh "${cortex_host}" sudo systemctl stop cortex.service && rsync \
|
||||||
|
--archive \
|
||||||
|
--compress \
|
||||||
|
--rsync-path 'sudo rsync' \
|
||||||
|
--chown root:root \
|
||||||
|
--chmod 644 \
|
||||||
|
"${REPO_DIR}/cortex.toml" \
|
||||||
|
"${cortex_host}:/etc/cortex/cortex.toml"; then
|
||||||
|
echo "[${cortex_host}] sync'd cortex.toml"
|
||||||
|
ssh "${cortex_host}" sudo systemctl daemon-reload
|
||||||
|
ssh "${cortex_host}" sudo systemctl start cortex.service
|
||||||
|
else
|
||||||
|
echo "[${cortex_host}] failed to sync cortex.toml"
|
||||||
|
fi
|
||||||
|
if ssh "${cortex_host}" systemctl is-active --quiet cortex.service; then
|
||||||
|
echo "[${cortex_host}] cortex service is active"
|
||||||
|
elif ssh "${cortex_host}" sudo systemctl start cortex.service; then
|
||||||
|
echo "[${cortex_host}] started cortex service"
|
||||||
|
else
|
||||||
|
echo "[${cortex_host}] failed to start cortex service"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "[${cortex_host}] cortex is out of date (${observed_cortex_version} != ${latest_helexa_version})"
|
||||||
|
if ssh "${cortex_host}" sudo systemctl stop cortex.service; then
|
||||||
|
echo "[${cortex_host}] stopped cortex service"
|
||||||
|
if ssh "${cortex_host}" sudo dnf upgrade --refresh -y cortex; then
|
||||||
|
echo "[${cortex_host}] upgraded cortex"
|
||||||
|
if rsync \
|
||||||
|
--archive \
|
||||||
|
--compress \
|
||||||
|
--verbose \
|
||||||
|
--rsync-path 'sudo rsync' \
|
||||||
|
--chown root:root \
|
||||||
|
--chmod 644 \
|
||||||
|
"${REPO_DIR}/cortex.toml" \
|
||||||
|
"${cortex_host}:/etc/cortex/cortex.toml"; then
|
||||||
|
echo "[${cortex_host}] sync'd cortex.toml"
|
||||||
|
ssh "${cortex_host}" sudo systemctl daemon-reload
|
||||||
|
ssh "${cortex_host}" sudo systemctl start cortex.service
|
||||||
|
else
|
||||||
|
echo "[${cortex_host}] failed to sync cortex.toml"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "[${cortex_host}] failed to upgrade cortex"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "[${cortex_host}] failed to stop cortex service"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# neuron (per-host, flavour from manifest)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
for entry in "${neuron_entries[@]}"; do
|
||||||
|
IFS=$'\t' read -r neuron_host neuron_flavour <<< "${entry}"
|
||||||
|
package="helexa-neuron-${neuron_flavour}"
|
||||||
|
|
||||||
|
observed_neuron_version=$(ssh "${neuron_host}" neuron --version 2> /dev/null | sed 's/^neuron //' || true)
|
||||||
|
if [[ "${latest_helexa_version}" = "${observed_neuron_version}" ]]; then
|
||||||
|
echo "[${neuron_host}] neuron is up to date (${observed_neuron_version}, ${package})"
|
||||||
|
if ssh "${neuron_host}" systemctl is-active --quiet neuron.service; then
|
||||||
|
echo "[${neuron_host}] neuron service is active"
|
||||||
|
elif ssh "${neuron_host}" sudo systemctl start neuron.service; then
|
||||||
|
echo "[${neuron_host}] started neuron service"
|
||||||
|
else
|
||||||
|
echo "[${neuron_host}] failed to start neuron service"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "[${neuron_host}] upgrading neuron from ${observed_neuron_version:-(absent)} to ${latest_helexa_version} (${package})"
|
||||||
|
if ssh "${neuron_host}" "[ ! -f /usr/lib/systemd/system/neuron.service ] || sudo systemctl stop neuron.service"; then
|
||||||
|
echo "[${neuron_host}] stopped neuron service"
|
||||||
|
# --allowerasing lets dnf swap out a previously-installed
|
||||||
|
# bare helexa-neuron or a different flavour without manual
|
||||||
|
# intervention. The Conflicts: clauses in the spec ensure
|
||||||
|
# only one flavour is ever resident.
|
||||||
|
if ssh "${neuron_host}" sudo dnf install --refresh --allowerasing -y "${package}" &> /dev/null; then
|
||||||
|
echo "[${neuron_host}] installed/upgraded ${package}"
|
||||||
|
# Ensure firewalld allows neuron port
|
||||||
|
ssh "${neuron_host}" "sudo firewall-cmd --query-service=helexa-neuron --quiet 2>/dev/null || sudo firewall-cmd --add-service=helexa-neuron --permanent && sudo firewall-cmd --reload" 2>/dev/null || true
|
||||||
|
if ssh "${neuron_host}" "sudo systemctl daemon-reload && sudo systemctl start neuron.service"; then
|
||||||
|
echo "[${neuron_host}] started neuron service"
|
||||||
|
else
|
||||||
|
echo "[${neuron_host}] failed to start neuron service"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "[${neuron_host}] failed to install ${package}"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "[${neuron_host}] failed to stop neuron service"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done
|
||||||
Reference in New Issue
Block a user