Compare commits
2 Commits
602e8e1471
...
1866b99a89
| Author | SHA1 | Date | |
|---|---|---|---|
|
1866b99a89
|
|||
|
60176e7c2e
|
@@ -8,8 +8,13 @@ name: build-prerelease
|
|||||||
# Optionally provide a `ref` to build from a non-default branch.
|
# Optionally provide a `ref` to build from a non-default branch.
|
||||||
#
|
#
|
||||||
# The published packages are versioned as e.g.
|
# The published packages are versioned as e.g.
|
||||||
# helexa-neuron-blackwell-0.1.16-0.1.20260518gitabcdef0.fc43.x86_64
|
# helexa-neuron-blackwell-0.1.16-0.1.20260518T140530.gitabcdef0.fc43.x86_64
|
||||||
# so they sort BELOW the eventual 0.1.16-1 stable release.
|
# ^^^^^^^^^^^^^^^^^^ ^^^^^^^^
|
||||||
|
# commit time (s) commit sha
|
||||||
|
# so they sort BELOW the eventual 0.1.16-1 stable release, and so two
|
||||||
|
# commits on the same day are still strictly ordered by their commit
|
||||||
|
# timestamps (rather than by RPM-vercmp's alpha-vs-digit precedence
|
||||||
|
# on the SHA fragment).
|
||||||
|
|
||||||
on:
|
on:
|
||||||
# Auto-build on every push to main so the unstable channel tracks
|
# Auto-build on every push to main so the unstable channel tracks
|
||||||
@@ -25,10 +30,14 @@ on:
|
|||||||
default: ""
|
default: ""
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
# Coalesce on branch+event so successive pushes don't pile up; the
|
# Share the group with ci.yml so the two workflows can't run
|
||||||
# latest push wins.
|
# concurrently on the same `rust` runner (act reuses the workspace
|
||||||
group: prerelease-build-${{ github.ref }}
|
# cache and races destroy each other's build files mid-compile).
|
||||||
cancel-in-progress: true
|
# cancel-in-progress=false → workflows queue; if a newer push lands,
|
||||||
|
# the older run is still picked up by ci.yml's own ref-keyed
|
||||||
|
# concurrency (same group, queued).
|
||||||
|
group: cortex-runner-pool-${{ github.ref }}
|
||||||
|
cancel-in-progress: false
|
||||||
|
|
||||||
env:
|
env:
|
||||||
CARGO_INCREMENTAL: "0"
|
CARGO_INCREMENTAL: "0"
|
||||||
@@ -41,7 +50,7 @@ jobs:
|
|||||||
version: ${{ steps.info.outputs.version }}
|
version: ${{ steps.info.outputs.version }}
|
||||||
release: ${{ steps.info.outputs.release }}
|
release: ${{ steps.info.outputs.release }}
|
||||||
short_sha: ${{ steps.info.outputs.short_sha }}
|
short_sha: ${{ steps.info.outputs.short_sha }}
|
||||||
commit_date: ${{ steps.info.outputs.commit_date }}
|
commit_timestamp: ${{ steps.info.outputs.commit_timestamp }}
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
@@ -53,13 +62,20 @@ jobs:
|
|||||||
set -eux
|
set -eux
|
||||||
VERSION=$(awk -F\" '/^version[[:space:]]*=/ { print $2; exit }' Cargo.toml)
|
VERSION=$(awk -F\" '/^version[[:space:]]*=/ { print $2; exit }' Cargo.toml)
|
||||||
SHORT_SHA=$(git rev-parse --short=7 HEAD)
|
SHORT_SHA=$(git rev-parse --short=7 HEAD)
|
||||||
COMMIT_DATE=$(git log -1 --format=%cd --date=format:%Y%m%d HEAD)
|
# Second-precise commit timestamp gives the release stamp a
|
||||||
# Prerelease release stamp sorts before "1" (the stable release).
|
# strictly monotonic numeric prefix. The earlier %Y%m%d-only
|
||||||
RELEASE="0.1.${COMMIT_DATE}git${SHORT_SHA}"
|
# form let same-day builds be ordered by RPM's rpmvercmp
|
||||||
|
# rules over the SHA, which is non-chronological — e.g.
|
||||||
|
# "git602e8e1" sorts newer than "gitf9f5fa4" purely because
|
||||||
|
# rpmvercmp ranks digit-prefixed segments above alpha ones.
|
||||||
|
# The SHA stays only as a debug identifier; sort order is
|
||||||
|
# decided entirely by the timestamp.
|
||||||
|
COMMIT_TIMESTAMP=$(git log -1 --format=%cd --date=format:%Y%m%d%H%M%S HEAD)
|
||||||
|
RELEASE="0.1.${COMMIT_TIMESTAMP}.git${SHORT_SHA}"
|
||||||
echo "version=${VERSION}" >> "$GITHUB_OUTPUT"
|
echo "version=${VERSION}" >> "$GITHUB_OUTPUT"
|
||||||
echo "release=${RELEASE}" >> "$GITHUB_OUTPUT"
|
echo "release=${RELEASE}" >> "$GITHUB_OUTPUT"
|
||||||
echo "short_sha=${SHORT_SHA}" >> "$GITHUB_OUTPUT"
|
echo "short_sha=${SHORT_SHA}" >> "$GITHUB_OUTPUT"
|
||||||
echo "commit_date=${COMMIT_DATE}" >> "$GITHUB_OUTPUT"
|
echo "commit_timestamp=${COMMIT_TIMESTAMP}" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
build-cortex:
|
build-cortex:
|
||||||
name: Build cortex binary
|
name: Build cortex binary
|
||||||
|
|||||||
@@ -7,6 +7,16 @@ on:
|
|||||||
pull_request:
|
pull_request:
|
||||||
branches: [main]
|
branches: [main]
|
||||||
|
|
||||||
|
# Share a concurrency group with build-prerelease.yml so the two
|
||||||
|
# workflows don't race on the same `rust` runner workspace (act's
|
||||||
|
# /root/.cache/act/<hash>/hostexecutor/ is shared across concurrent
|
||||||
|
# jobs and one job's checkout step nukes another's in-flight build
|
||||||
|
# files). cancel-in-progress=false → they queue; same-ref pushes
|
||||||
|
# coalesce per workflow via cancel-in-progress on each.
|
||||||
|
concurrency:
|
||||||
|
group: cortex-runner-pool-${{ github.ref }}
|
||||||
|
cancel-in-progress: false
|
||||||
|
|
||||||
env:
|
env:
|
||||||
CARGO_INCREMENTAL: "0"
|
CARGO_INCREMENTAL: "0"
|
||||||
RUSTC_WRAPPER: sccache
|
RUSTC_WRAPPER: sccache
|
||||||
|
|||||||
@@ -6,7 +6,11 @@
|
|||||||
#
|
#
|
||||||
# Required defines at rpmbuild time:
|
# Required defines at rpmbuild time:
|
||||||
# cortex_version e.g. "0.1.16"
|
# cortex_version e.g. "0.1.16"
|
||||||
# cortex_prerelease e.g. "0.1.20260518gitabcdef0" (used as Release)
|
# cortex_prerelease e.g. "0.1.20260518140530.gitabcdef0"
|
||||||
|
# ^^^^^^^^^^^^^^^^^^ ^^^^^^^^
|
||||||
|
# commit time (sec) commit sha
|
||||||
|
# (used as Release; the timestamp prefix
|
||||||
|
# keeps same-day builds strictly ordered.)
|
||||||
|
|
||||||
%global _build_id_links none
|
%global _build_id_links none
|
||||||
%global debug_package %{nil}
|
%global debug_package %{nil}
|
||||||
|
|||||||
@@ -9,7 +9,11 @@
|
|||||||
# neuron_version e.g. "0.1.16"
|
# neuron_version e.g. "0.1.16"
|
||||||
# neuron_flavour e.g. "ada", "blackwell" — matches the CI build
|
# neuron_flavour e.g. "ada", "blackwell" — matches the CI build
|
||||||
# matrix's compute_cap label.
|
# matrix's compute_cap label.
|
||||||
# neuron_prerelease e.g. "0.1.20260518gitabcdef0" (used as Release)
|
# neuron_prerelease e.g. "0.1.20260518140530.gitabcdef0"
|
||||||
|
# ^^^^^^^^^^^^^^^^^^ ^^^^^^^^
|
||||||
|
# commit time (sec) commit sha
|
||||||
|
# (used as Release; the timestamp prefix
|
||||||
|
# keeps same-day builds strictly ordered.)
|
||||||
#
|
#
|
||||||
# One flavour can be installed at a time on a given host; flavour
|
# One flavour can be installed at a time on a given host; flavour
|
||||||
# packages Conflict with each other.
|
# packages Conflict with each other.
|
||||||
|
|||||||
@@ -31,7 +31,10 @@ BASE="http://${HOST}:${PORT}"
|
|||||||
# beyond gibberish.
|
# beyond gibberish.
|
||||||
PROBE_PROMPT='What is the capital of France? Respond with the city name only, no punctuation.'
|
PROBE_PROMPT='What is the capital of France? Respond with the city name only, no punctuation.'
|
||||||
EXPECT_SUBSTR='Paris'
|
EXPECT_SUBSTR='Paris'
|
||||||
MAX_TOKENS=32
|
# Qwen3 prepends <think>...</think> reasoning before the answer when the
|
||||||
|
# chat template enables thinking mode, which eats most of a small token
|
||||||
|
# budget. 256 leaves enough room for thinking + final answer.
|
||||||
|
MAX_TOKENS=256
|
||||||
|
|
||||||
# /models/load is synchronous — neuron blocks the response until the
|
# /models/load is synchronous — neuron blocks the response until the
|
||||||
# hf-hub download + GGUF parse + tensor materialisation is done. A
|
# hf-hub download + GGUF parse + tensor materialisation is done. A
|
||||||
@@ -40,7 +43,10 @@ MAX_TOKENS=32
|
|||||||
LOAD_TIMEOUT=600
|
LOAD_TIMEOUT=600
|
||||||
INFER_TIMEOUT=120
|
INFER_TIMEOUT=120
|
||||||
|
|
||||||
say() { printf '[%s] %s\n' "${HOST}" "$*"; }
|
# Status messages go to stderr so command substitutions like
|
||||||
|
# `raw=$(run_probe)` capture only the function's intended return value
|
||||||
|
# (an HTTP body), not the progress chatter.
|
||||||
|
say() { printf '[%s] %s\n' "${HOST}" "$*" >&2; }
|
||||||
die() { say "FAIL: $*"; exit 1; }
|
die() { say "FAIL: $*"; exit 1; }
|
||||||
|
|
||||||
probe_health() {
|
probe_health() {
|
||||||
@@ -49,7 +55,11 @@ probe_health() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
list_loaded_ids() {
|
list_loaded_ids() {
|
||||||
curl --silent --fail "${BASE}/models" | yq -r '.[].id'
|
# The manifest is YAML and uses yq; HTTP responses are JSON and use
|
||||||
|
# jq directly. pip-yq parses input as YAML by default, which trips
|
||||||
|
# on JSON content that happens to look like YAML aliases (chatcmpl
|
||||||
|
# ids, escaped quotes inside `<think>...</think>` blocks, etc.).
|
||||||
|
curl --silent --fail "${BASE}/models" | jq -r '.[].id'
|
||||||
}
|
}
|
||||||
|
|
||||||
is_loaded() {
|
is_loaded() {
|
||||||
@@ -88,7 +98,7 @@ EOF
|
|||||||
run_probe() {
|
run_probe() {
|
||||||
say "POST /v1/chat/completions (probe: ${PROBE_PROMPT})"
|
say "POST /v1/chat/completions (probe: ${PROBE_PROMPT})"
|
||||||
local payload
|
local payload
|
||||||
payload=$(yq -n -c \
|
payload=$(jq -n -c \
|
||||||
--arg model "${MODEL_ID}" \
|
--arg model "${MODEL_ID}" \
|
||||||
--arg content "${PROBE_PROMPT}" \
|
--arg content "${PROBE_PROMPT}" \
|
||||||
--argjson tokens "${MAX_TOKENS}" \
|
--argjson tokens "${MAX_TOKENS}" \
|
||||||
@@ -124,10 +134,15 @@ fi
|
|||||||
|
|
||||||
raw=$(run_probe)
|
raw=$(run_probe)
|
||||||
echo "---"
|
echo "---"
|
||||||
echo "${raw}" | yq -r '.'
|
# Dump the raw JSON. Don't pipe through `yq -r '.'` — yq's default
|
||||||
|
# YAML output mode chokes on JSON strings that contain `<` (and the
|
||||||
|
# `<think>` markers Qwen3 emits during reasoning are a perfect
|
||||||
|
# example). The targeted `yq -r '.path'` calls below work fine
|
||||||
|
# because jq's path filter mode bypasses the YAML re-emit.
|
||||||
|
echo "${raw}"
|
||||||
echo "---"
|
echo "---"
|
||||||
|
|
||||||
content=$(echo "${raw}" | yq -r '.choices[0].message.content // empty')
|
content=$(echo "${raw}" | jq -r '.choices[0].message.content // empty')
|
||||||
if [[ -z "${content}" ]]; then
|
if [[ -z "${content}" ]]; then
|
||||||
die "no content in chat completion response"
|
die "no content in chat completion response"
|
||||||
fi
|
fi
|
||||||
|
|||||||
Reference in New Issue
Block a user