helexa/.gitea/workflows/ci.yml

name: CI

# Pushes to main are deliberately excluded: build-prerelease.yml runs
# its own lint/test jobs there (gating publish), and running both
# workflows on the same push made them queue against each other on the
# same runner labels — ~12 minutes of added latency per deploy. Feature
# branches, PRs to main, and release tags keep the full gate here.
on:
  push:
    branches-ignore: [main]
    tags: ["v*"]
  pull_request:
    branches: [main]

# Coalesce same-ref pushes; a newer push supersedes the in-flight run.
# (The old shared `cortex-runner-pool` group with build-prerelease.yml
# is gone — the workflows no longer trigger on the same refs, and
# ephemeral one-VM-per-job runners removed the shared-workspace race
# that group existed to serialize.)
concurrency:
  group: ci-${{ github.ref }}
  cancel-in-progress: true

env:
  CARGO_INCREMENTAL: "0"
  RUSTC_WRAPPER: sccache
  SCCACHE_BUCKET: sccache
  SCCACHE_ENDPOINT: http://caveman.kosherinata.internal:9000
  SCCACHE_REGION: auto
  SCCACHE_S3_USE_SSL: "false"
  AWS_ACCESS_KEY_ID: ${{ secrets.SCCACHE_S3_ACCESS_KEY }}
  AWS_SECRET_ACCESS_KEY: ${{ secrets.SCCACHE_S3_SECRET_KEY }}
  # fmt, clippy, and test all run in parallel on the same `rust` runner
  # and would otherwise share /root/.cache/act/<hash>/hostexecutor/target/,
  # racing each other's cargo temp files (.tmpXXXXXX) and failing builds
  # mid-compile. Give each job its own target directory so the invocations
  # don't collide. sccache still backs the actual rustc cache, so the
  # rebuild penalty is small.
  CARGO_TARGET_DIR: target-${{ github.job }}

jobs:
  fmt:
    name: Format
    timeout-minutes: 15
    runs-on: rust
    steps:
      - uses: actions/checkout@v4
      - run: cargo fmt --check --all

  clippy:
    name: Clippy
    timeout-minutes: 25
    runs-on: rust
    steps:
      - uses: actions/checkout@v4
      # Failure-aware sccache escalation lives in the shared script (kept
      # in sync with build-prerelease.yml): a signal death (rustc SIGSEGV
      # / OOM-kill) keeps the cache and fails fast instead of an uncached
      # rebuild; only a real sccache fault drops the cache.
      - name: Clippy (sccache escalation)
        run: script/ci-cargo-escalate.sh cargo clippy --workspace -- -D warnings

  test:
    name: Test
    timeout-minutes: 25
    runs-on: rust
    steps:
      - uses: actions/checkout@v4
      # See script/ci-cargo-escalate.sh for the escalation rationale.
      - name: Test (sccache escalation)
        run: script/ci-cargo-escalate.sh cargo test --workspace

  # Type-check the CUDA-only code path. Borrow-check-only — we
  # never run the tests here (the runner has no GPU). This catches
  # the category of bug where a refactor compiles fine under the
  # default feature set (which is what the `clippy` and `test` jobs
  # exercise) but fails inside a `#[cfg(feature = "cuda")]` block.
  # `runs-on: cuda-13.0` selects the runner that ships nvcc /
  # cudarc's build prerequisites. The generic `rust` and `rpm`
  # runners don't have them (the previous label `rpm` was tried
  # first and tripped cudarc's `nvcc --version` build script —
  # see commit history).
  cuda-check:
    name: CUDA type-check
    timeout-minutes: 35
    runs-on: cuda-13.0
    # The workflow-level env sets `RUSTC_WRAPPER: sccache`
    # unconditionally, which hard-fails cargo if the CUDA image
    # doesn't ship sccache. Clear it at job level; the "Enable
    # sccache when available" step opts back in only after probing
    # for the binary. SCCACHE_*/AWS creds stay set — harmless when
    # the wrapper is off, required when it's on.
    env:
      RUSTC_WRAPPER: ""
      # candle-kernels' build script falls back to `nvidia-smi` for
      # compute-cap detection when this is unset — and the GPU-less
      # builder image doesn't ship nvidia-smi. Any valid cap works for
      # a borrow-check; the real per-flavour caps live in
      # build-prerelease.yml's matrix.
      CUDA_COMPUTE_CAP: "86"
    steps:
      - uses: actions/checkout@v4
      # sccache probing + failure classification lives in the shared
      # script (see build-prerelease.yml's neuron build for the same
      # pattern). It probes for sccache and, on a rustc SIGSEGV / OOM,
      # keeps the cache and fails fast rather than rebuilding uncached.
      - name: cargo check --features cuda (sccache escalation)
        run: |
          # act launches the step shell without /etc/profile, so the
          # gitea_runner user's inherited PATH lacks /usr/local/cuda-13.0/bin.
          # cudarc's build.rs shells out to `nvcc --version` (the neuron
          # crate enables cuda-version-from-build-system) and panics with
          # ENOENT if nvcc isn't resolvable — keep this export in sync
          # with build-prerelease.yml.
          export PATH="/usr/local/cuda-13.0/bin:${PATH}"
          export LD_LIBRARY_PATH="/usr/local/cuda-13.0/targets/x86_64-linux/lib:/usr/local/cuda-13.0/lib64:${LD_LIBRARY_PATH:-}"
          export LIBRARY_PATH="/usr/local/cuda-13.0/targets/x86_64-linux/lib:/usr/local/cuda-13.0/lib64:${LIBRARY_PATH:-}"
          script/ci-cargo-escalate.sh cargo check -p neuron --features cuda --all-targets

  srpm-cortex:
    name: Build cortex SRPM
    timeout-minutes: 25
    runs-on: rpm
    needs: [fmt, clippy, test, cuda-check]
    if: startsWith(github.ref, 'refs/tags/v')
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Determine version
        id: version
        run: |
          VERSION="${GITHUB_REF#refs/tags/v}"
          echo "VERSION=${VERSION}" >> "$GITHUB_OUTPUT"

      - name: Stamp version
        run: |
          VERSION="${{ steps.version.outputs.VERSION }}"
          sed -i '/\[workspace\.package\]/,/\[/{ s/^version = ".*"/version = "'"${VERSION}"'"/ }' Cargo.toml
          sed -i "s/^Version:.*/Version:        ${VERSION}/" cortex.spec

      - name: Generate changelog entry
        uses: https://git.lair.cafe/actions/rpm-changelog@v1
        with:
          spec: cortex.spec
          version: ${{ steps.version.outputs.VERSION }}

      - name: Generate source tarball
        run: |
          set -ex
          VERSION="${{ steps.version.outputs.VERSION }}"
          tar czf /tmp/cortex-${VERSION}.tar.gz \
            --transform "s,^\.,cortex-${VERSION}," \
            --exclude='./target' \
            --exclude='./.git' \
            --exclude='*.tar.gz' \
            --exclude='*.src.rpm' \
            .
          mv /tmp/cortex-${VERSION}.tar.gz .

      - name: Vendor Rust dependencies
        run: |
          VERSION="${{ steps.version.outputs.VERSION }}"
          cargo vendor vendor/
          tar czf cortex-${VERSION}-vendor.tar.gz vendor/
          rm -rf vendor/

      - name: Build SRPM
        run: |
          rpmbuild -bs cortex.spec \
            --define "_sourcedir $(pwd)" \
            --define "_srcrpmdir $(pwd)"

      - name: Upload SRPM artifact
        uses: actions/upload-artifact@v3
        with:
          name: srpm-cortex
          path: "*.src.rpm"

  srpm-neuron:
    name: Build neuron SRPM
    timeout-minutes: 25
    runs-on: rpm
    needs: [fmt, clippy, test, cuda-check]
    if: startsWith(github.ref, 'refs/tags/v')
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Determine version
        id: version
        run: |
          VERSION="${GITHUB_REF#refs/tags/v}"
          echo "VERSION=${VERSION}" >> "$GITHUB_OUTPUT"

      - name: Stamp version
        run: |
          VERSION="${{ steps.version.outputs.VERSION }}"
          sed -i '/\[workspace\.package\]/,/\[/{ s/^version = ".*"/version = "'"${VERSION}"'"/ }' Cargo.toml
          sed -i "s/^Version:.*/Version:        ${VERSION}/" helexa-neuron.spec

      - name: Generate changelog entry
        uses: https://git.lair.cafe/actions/rpm-changelog@v1
        with:
          spec: helexa-neuron.spec
          version: ${{ steps.version.outputs.VERSION }}

      - name: Generate source tarball
        run: |
          set -ex
          VERSION="${{ steps.version.outputs.VERSION }}"
          tar czf /tmp/helexa-neuron-${VERSION}.tar.gz \
            --transform "s,^\.,helexa-neuron-${VERSION}," \
            --exclude='./target' \
            --exclude='./.git' \
            --exclude='*.tar.gz' \
            --exclude='*.src.rpm' \
            .
          mv /tmp/helexa-neuron-${VERSION}.tar.gz .

      - name: Vendor Rust dependencies
        run: |
          VERSION="${{ steps.version.outputs.VERSION }}"
          cargo vendor vendor/
          tar czf helexa-neuron-${VERSION}-vendor.tar.gz vendor/
          rm -rf vendor/

      - name: Build SRPM
        run: |
          rpmbuild -bs helexa-neuron.spec \
            --define "_sourcedir $(pwd)" \
            --define "_srcrpmdir $(pwd)"

      - name: Upload SRPM artifact
        uses: actions/upload-artifact@v3
        with:
          name: srpm-neuron
          path: "*.src.rpm"

  copr-cortex:
    name: Publish cortex to COPR
    timeout-minutes: 60
    runs-on: fedora-43
    needs: srpm-cortex
    steps:
      - name: Download SRPM
        uses: actions/download-artifact@v3
        with:
          name: srpm-cortex

      - name: Publish to COPR
        uses: https://git.lair.cafe/actions/copr-publish@v1
        with:
          project: helexa/helexa
          srpm: "*.src.rpm"
          copr-config: ${{ secrets.COPR_CONFIG }}

  copr-neuron:
    name: Publish neuron to COPR
    timeout-minutes: 60
    runs-on: fedora-43
    needs: srpm-neuron
    steps:
      - name: Download SRPM
        uses: actions/download-artifact@v3
        with:
          name: srpm-neuron

      - name: Publish to COPR
        uses: https://git.lair.cafe/actions/copr-publish@v1
        with:
          project: helexa/helexa
          srpm: "*.src.rpm"
          copr-config: ${{ secrets.COPR_CONFIG }}

  bump-version:
    name: Bump version in source
    timeout-minutes: 15
    runs-on: rust
    needs: [copr-cortex, copr-neuron]
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Determine version
        id: version
        run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> "$GITHUB_OUTPUT"

      - name: Stamp version
        run: |
          VERSION="${{ steps.version.outputs.VERSION }}"
          sed -i '/\[workspace\.package\]/,/\[/{ s/^version = ".*"/version = "'"${VERSION}"'"/ }' Cargo.toml
          sed -i "s/^Version:.*/Version:        ${VERSION}/" cortex.spec
          sed -i "s/^Version:.*/Version:        ${VERSION}/" helexa-neuron.spec
          cargo check --workspace 2>/dev/null || true

      - name: Generate cortex changelog entry
        uses: https://git.lair.cafe/actions/rpm-changelog@v1
        with:
          spec: cortex.spec
          version: ${{ steps.version.outputs.VERSION }}

      - name: Generate helexa-neuron changelog entry
        uses: https://git.lair.cafe/actions/rpm-changelog@v1
        with:
          spec: helexa-neuron.spec
          version: ${{ steps.version.outputs.VERSION }}

      - name: Commit and push
        env:
          GITEA_TOKEN: ${{ secrets.GITEA_TOKEN }}
        run: |
          VERSION="${{ steps.version.outputs.VERSION }}"
          git config user.name "Gitea Actions"
          git config user.email "actions@git.lair.cafe"
          git add Cargo.toml Cargo.lock cortex.spec helexa-neuron.spec
          if git diff --cached --quiet; then
            echo "Nothing to commit for ${VERSION}"
          else
            git commit -m "chore: bump version to ${VERSION}"
            git remote set-url origin "https://gitea-actions:${GITEA_TOKEN}@git.lair.cafe/${{ github.repository }}.git"
            git push origin HEAD:main
          fi