diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index 53d7b3a..8ab5270 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -26,8 +26,8 @@ jobs: - name: Test run: cargo test --workspace - rpm: - name: Build SRPM + srpm-cortex: + name: Build cortex SRPM runs-on: fedora needs: check if: startsWith(github.ref, 'refs/tags/v') @@ -39,14 +39,12 @@ jobs: run: | VERSION="${GITHUB_REF#refs/tags/v}" echo "VERSION=${VERSION}" >> "$GITHUB_OUTPUT" - echo "Building version: ${VERSION}" - - name: Stamp version into spec + - name: Stamp version run: | VERSION="${{ steps.version.outputs.VERSION }}" sed -i '/\[workspace\.package\]/,/\[/{ s/^version = ".*"/version = "'"${VERSION}"'"/ }' Cargo.toml sed -i "s/^Version:.*/Version: ${VERSION}/" cortex.spec - echo "Stamped version ${VERSION}" - name: Generate source tarball run: | @@ -77,19 +75,71 @@ jobs: - name: Upload SRPM artifact uses: actions/upload-artifact@v3 with: - name: srpm + name: srpm-cortex path: '*.src.rpm' - copr: - name: Publish to COPR + srpm-neuron: + name: Build neuron SRPM runs-on: fedora - needs: rpm + needs: check + if: startsWith(github.ref, 'refs/tags/v') + steps: + - uses: actions/checkout@v4 + + - name: Determine version + id: version + run: | + VERSION="${GITHUB_REF#refs/tags/v}" + echo "VERSION=${VERSION}" >> "$GITHUB_OUTPUT" + + - name: Stamp version + run: | + VERSION="${{ steps.version.outputs.VERSION }}" + sed -i '/\[workspace\.package\]/,/\[/{ s/^version = ".*"/version = "'"${VERSION}"'"/ }' Cargo.toml + sed -i "s/^Version:.*/Version: ${VERSION}/" neuron.spec + + - name: Generate source tarball + run: | + set -ex + VERSION="${{ steps.version.outputs.VERSION }}" + tar czf /tmp/neuron-${VERSION}.tar.gz \ + --transform "s,^\.,neuron-${VERSION}," \ + --exclude='./target' \ + --exclude='./.git' \ + --exclude='*.tar.gz' \ + --exclude='*.src.rpm' \ + . + mv /tmp/neuron-${VERSION}.tar.gz . + + - name: Vendor Rust dependencies + run: | + VERSION="${{ steps.version.outputs.VERSION }}" + cargo vendor vendor/ + tar czf neuron-${VERSION}-vendor.tar.gz vendor/ + rm -rf vendor/ + + - name: Build SRPM + run: | + rpmbuild -bs neuron.spec \ + --define "_sourcedir $(pwd)" \ + --define "_srcrpmdir $(pwd)" + + - name: Upload SRPM artifact + uses: actions/upload-artifact@v3 + with: + name: srpm-neuron + path: '*.src.rpm' + + copr-cortex: + name: Publish cortex to COPR + runs-on: fedora + needs: srpm-cortex if: startsWith(github.ref, 'refs/tags/v') steps: - name: Download SRPM uses: actions/download-artifact@v3 with: - name: srpm + name: srpm-cortex - name: Configure copr-cli run: | @@ -97,4 +147,23 @@ jobs: echo "${{ secrets.COPR_CONFIG }}" > ~/.config/copr - name: Submit build to COPR - run: copr-cli build cortex *.src.rpm + run: copr-cli build helexa/cortex *.src.rpm + + copr-neuron: + name: Publish neuron to COPR + runs-on: fedora + needs: srpm-neuron + if: startsWith(github.ref, 'refs/tags/v') + steps: + - name: Download SRPM + uses: actions/download-artifact@v3 + with: + name: srpm-neuron + + - name: Configure copr-cli + run: | + mkdir -p ~/.config + echo "${{ secrets.COPR_CONFIG }}" > ~/.config/copr + + - name: Submit build to COPR + run: copr-cli build helexa/neuron *.src.rpm diff --git a/CLAUDE.md b/CLAUDE.md index 47b1306..5fad1a8 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -590,30 +590,22 @@ Topology-aware placement (min_devices, min_device_vram_mb) deferred — the router currently routes based on polled model status. Catalogue placement matching can be added incrementally. -### Phase 10: neuron packaging (RPM) +### Phase 10: RPM packaging ✅ -**Goal:** `neuron` and `cortex` are installable via `dnf` from the -grenade COPR repo. +Completed. Both packages have RPM specs, systemd units, and example configs. +CI builds parallel SRPMs on tag push and publishes to separate COPR repos. -**Steps:** -1. `neuron.spec` — RPM spec file for the neuron binary. Install to - `/usr/libexec/cortex/neuron`. Systemd unit - `neuron.service`. Config at `/etc/cortex/neuron.toml`. -2. Update `cortex.spec` — ensure the cortex binary, config, and - `models.toml` are packaged correctly. -3. Gitea Actions CI job: on tag push, build SRPM, submit to COPR. -4. Document the install path: - ```sh - dnf copr enable grenade/cortex - # on the gateway host: - dnf install cortex - # on each GPU node: - dnf install neuron - ``` +- `cortex.spec` → `helexa/cortex` COPR: binary, systemd unit, config files +- `neuron.spec` → `helexa/neuron` COPR: binary, systemd unit, config +- `data/cortex.service`, `data/neuron.service` — systemd units +- `cortex.example.toml`, `neuron.example.toml`, `models.example.toml` +- CI: parallel `srpm-cortex` + `srpm-neuron` jobs, then parallel COPR publish -**Done when:** `dnf install neuron` on a Fedora 43 host drops the -binary, config, and systemd unit. `systemctl start neuron` runs -discovery and serves `/discovery`. +Install: +```sh +dnf copr enable helexa/cortex && dnf install cortex # gateway host +dnf copr enable helexa/neuron && dnf install neuron # GPU nodes +``` ### Phase 11: llama.cpp harness stub diff --git a/cortex.spec b/cortex.spec index 038f86a..10af6bd 100644 --- a/cortex.spec +++ b/cortex.spec @@ -1,7 +1,7 @@ Name: cortex Version: 0.1.0 Release: 1%{?dist} -Summary: Inference gateway for multi-node mistral.rs clusters +Summary: Inference gateway for multi-node GPU clusters License: GPL-3.0-or-later URL: https://git.lair.cafe/helexa/cortex @@ -15,11 +15,13 @@ BuildRequires: cargo BuildRequires: gcc BuildRequires: systemd-rpm-macros +Requires(pre): shadow-utils + %description -Cortex is a Rust reverse-proxy that sits in front of multiple mistral.rs -inference nodes and presents a unified OpenAI and Anthropic compatible -API surface. It handles model routing, lifecycle management, request -translation, and metrics collection. +Cortex is a Rust reverse-proxy that sits in front of multiple inference +nodes (via neuron daemons) and presents a unified OpenAI and Anthropic +compatible API surface. It handles model routing, lifecycle management, +request translation, and metrics collection. %prep %autosetup @@ -38,12 +40,33 @@ cargo build --release -p cortex-cli %install install -Dm755 target/release/cortex %{buildroot}%{_bindir}/cortex +install -Dm644 data/cortex.service %{buildroot}%{_unitdir}/cortex.service +install -dm750 %{buildroot}%{_sysconfdir}/cortex +install -Dm640 cortex.example.toml %{buildroot}%{_sysconfdir}/cortex/cortex.toml +install -Dm640 models.example.toml %{buildroot}%{_sysconfdir}/cortex/models.toml + +%pre +getent group cortex >/dev/null || groupadd -r cortex +getent passwd cortex >/dev/null || useradd -r -g cortex -d /var/lib/cortex -s /sbin/nologin cortex + +%post +%systemd_post cortex.service + +%preun +%systemd_preun cortex.service + +%postun +%systemd_postun_with_restart cortex.service %files %license LICENSE %doc README.md %{_bindir}/cortex +%{_unitdir}/cortex.service +%dir %attr(750,root,cortex) %{_sysconfdir}/cortex +%config(noreplace) %attr(640,root,cortex) %{_sysconfdir}/cortex/cortex.toml +%config(noreplace) %attr(640,root,cortex) %{_sysconfdir}/cortex/models.toml %changelog -* Mon Apr 14 2026 Rob Thijssen - 0.1.0-1 +* Tue Apr 15 2026 Rob Thijssen - 0.1.0-1 - Initial package diff --git a/data/cortex.service b/data/cortex.service new file mode 100644 index 0000000..efad175 --- /dev/null +++ b/data/cortex.service @@ -0,0 +1,15 @@ +[Unit] +Description=Cortex — inference gateway for multi-node GPU clusters +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +ExecStart=/usr/bin/cortex serve --config /etc/cortex/cortex.toml +Restart=on-failure +RestartSec=5 +User=cortex +Group=cortex + +[Install] +WantedBy=multi-user.target diff --git a/data/neuron.service b/data/neuron.service new file mode 100644 index 0000000..ae071ad --- /dev/null +++ b/data/neuron.service @@ -0,0 +1,15 @@ +[Unit] +Description=Neuron — per-node GPU discovery and harness daemon for cortex +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +ExecStart=/usr/bin/neuron --config /etc/cortex/neuron.toml +Restart=on-failure +RestartSec=5 +User=cortex +Group=cortex + +[Install] +WantedBy=multi-user.target diff --git a/models.example.toml b/models.example.toml new file mode 100644 index 0000000..073ece2 --- /dev/null +++ b/models.example.toml @@ -0,0 +1,29 @@ +# models.example.toml — model catalogue +# +# Copy to /etc/cortex/models.toml and adjust for your environment. +# Describes how to serve each model. Cortex matches these profiles +# against discovered neuron topologies for placement decisions. + +[[models]] +id = "your-org/large-model" +harness = "mistralrs" +quant = "Q4_K_M" +vram_mb = 19000 +min_devices = 2 +min_device_vram_mb = 10000 +pinned_on = ["gpu-large"] + +[[models]] +id = "your-org/medium-model" +harness = "mistralrs" +quant = "Q6_K" +vram_mb = 12000 +min_devices = 1 +pinned_on = ["gpu-medium"] + +[[models]] +id = "your-org/embedding-model" +harness = "mistralrs" +quant = "Q8_0" +vram_mb = 8000 +min_devices = 1 diff --git a/neuron.example.toml b/neuron.example.toml new file mode 100644 index 0000000..1e9afc2 --- /dev/null +++ b/neuron.example.toml @@ -0,0 +1,16 @@ +# neuron.example.toml — example configuration +# +# Copy to /etc/cortex/neuron.toml and adjust for your environment. +# +# Environment variable overrides use NEURON_ prefix with __ separators: +# NEURON_PORT=9090 + +port = 9090 + +# -- Harnesses --------------------------------------------------------------- +# Each [[harnesses]] entry declares an inference engine managed by neuron. + +[[harnesses]] +name = "mistralrs" +endpoint = "http://localhost:8080" +systemd_unit = "mistralrs.service" diff --git a/neuron.spec b/neuron.spec new file mode 100644 index 0000000..8e96714 --- /dev/null +++ b/neuron.spec @@ -0,0 +1,69 @@ +Name: neuron +Version: 0.1.0 +Release: 1%{?dist} +Summary: Per-node GPU discovery and harness management daemon for cortex + +License: GPL-3.0-or-later +URL: https://git.lair.cafe/helexa/cortex +Source0: %{name}-%{version}.tar.gz +Source1: %{name}-%{version}-vendor.tar.gz + +ExclusiveArch: x86_64 + +BuildRequires: rust >= 1.85 +BuildRequires: cargo +BuildRequires: gcc +BuildRequires: systemd-rpm-macros + +Requires(pre): shadow-utils + +%description +Neuron is a per-node daemon for cortex inference clusters. It discovers +local GPU hardware via nvidia-smi, manages inference harnesses (mistral.rs, +llama.cpp), and exposes an HTTP API for model lifecycle management. + +%prep +%autosetup +tar xf %{SOURCE1} +mkdir -p .cargo +cat > .cargo/config.toml << 'EOF' +[source.crates-io] +replace-with = "vendored-sources" + +[source.vendored-sources] +directory = "vendor" +EOF + +%build +cargo build --release -p neuron + +%install +install -Dm755 target/release/neuron %{buildroot}%{_bindir}/neuron +install -Dm644 data/neuron.service %{buildroot}%{_unitdir}/neuron.service +install -dm750 %{buildroot}%{_sysconfdir}/cortex +install -Dm640 neuron.example.toml %{buildroot}%{_sysconfdir}/cortex/neuron.toml + +%pre +getent group cortex >/dev/null || groupadd -r cortex +getent passwd cortex >/dev/null || useradd -r -g cortex -d /var/lib/cortex -s /sbin/nologin cortex + +%post +%systemd_post neuron.service + +%preun +%systemd_preun neuron.service + +%postun +%systemd_postun_with_restart neuron.service + +%files +%license LICENSE +%doc README.md +%{_bindir}/neuron +%{_unitdir}/neuron.service +%dir %attr(750,root,cortex) %{_sysconfdir}/cortex +%config(noreplace) %attr(640,root,cortex) %{_sysconfdir}/cortex/neuron.toml + +%changelog +* Tue Apr 15 2026 Rob Thijssen - 0.1.0-1 +- Initial package