Compare commits
2 Commits
602e8e1471
...
1866b99a89
| Author | SHA1 | Date | |
|---|---|---|---|
|
1866b99a89
|
|||
|
60176e7c2e
|
@@ -8,8 +8,13 @@ name: build-prerelease
|
||||
# Optionally provide a `ref` to build from a non-default branch.
|
||||
#
|
||||
# The published packages are versioned as e.g.
|
||||
# helexa-neuron-blackwell-0.1.16-0.1.20260518gitabcdef0.fc43.x86_64
|
||||
# so they sort BELOW the eventual 0.1.16-1 stable release.
|
||||
# helexa-neuron-blackwell-0.1.16-0.1.20260518T140530.gitabcdef0.fc43.x86_64
|
||||
# ^^^^^^^^^^^^^^^^^^ ^^^^^^^^
|
||||
# commit time (s) commit sha
|
||||
# so they sort BELOW the eventual 0.1.16-1 stable release, and so two
|
||||
# commits on the same day are still strictly ordered by their commit
|
||||
# timestamps (rather than by RPM-vercmp's alpha-vs-digit precedence
|
||||
# on the SHA fragment).
|
||||
|
||||
on:
|
||||
# Auto-build on every push to main so the unstable channel tracks
|
||||
@@ -25,10 +30,14 @@ on:
|
||||
default: ""
|
||||
|
||||
concurrency:
|
||||
# Coalesce on branch+event so successive pushes don't pile up; the
|
||||
# latest push wins.
|
||||
group: prerelease-build-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
# Share the group with ci.yml so the two workflows can't run
|
||||
# concurrently on the same `rust` runner (act reuses the workspace
|
||||
# cache and races destroy each other's build files mid-compile).
|
||||
# cancel-in-progress=false → workflows queue; if a newer push lands,
|
||||
# the older run is still picked up by ci.yml's own ref-keyed
|
||||
# concurrency (same group, queued).
|
||||
group: cortex-runner-pool-${{ github.ref }}
|
||||
cancel-in-progress: false
|
||||
|
||||
env:
|
||||
CARGO_INCREMENTAL: "0"
|
||||
@@ -41,7 +50,7 @@ jobs:
|
||||
version: ${{ steps.info.outputs.version }}
|
||||
release: ${{ steps.info.outputs.release }}
|
||||
short_sha: ${{ steps.info.outputs.short_sha }}
|
||||
commit_date: ${{ steps.info.outputs.commit_date }}
|
||||
commit_timestamp: ${{ steps.info.outputs.commit_timestamp }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
@@ -53,13 +62,20 @@ jobs:
|
||||
set -eux
|
||||
VERSION=$(awk -F\" '/^version[[:space:]]*=/ { print $2; exit }' Cargo.toml)
|
||||
SHORT_SHA=$(git rev-parse --short=7 HEAD)
|
||||
COMMIT_DATE=$(git log -1 --format=%cd --date=format:%Y%m%d HEAD)
|
||||
# Prerelease release stamp sorts before "1" (the stable release).
|
||||
RELEASE="0.1.${COMMIT_DATE}git${SHORT_SHA}"
|
||||
# Second-precise commit timestamp gives the release stamp a
|
||||
# strictly monotonic numeric prefix. The earlier %Y%m%d-only
|
||||
# form let same-day builds be ordered by RPM's rpmvercmp
|
||||
# rules over the SHA, which is non-chronological — e.g.
|
||||
# "git602e8e1" sorts newer than "gitf9f5fa4" purely because
|
||||
# rpmvercmp ranks digit-prefixed segments above alpha ones.
|
||||
# The SHA stays only as a debug identifier; sort order is
|
||||
# decided entirely by the timestamp.
|
||||
COMMIT_TIMESTAMP=$(git log -1 --format=%cd --date=format:%Y%m%d%H%M%S HEAD)
|
||||
RELEASE="0.1.${COMMIT_TIMESTAMP}.git${SHORT_SHA}"
|
||||
echo "version=${VERSION}" >> "$GITHUB_OUTPUT"
|
||||
echo "release=${RELEASE}" >> "$GITHUB_OUTPUT"
|
||||
echo "short_sha=${SHORT_SHA}" >> "$GITHUB_OUTPUT"
|
||||
echo "commit_date=${COMMIT_DATE}" >> "$GITHUB_OUTPUT"
|
||||
echo "commit_timestamp=${COMMIT_TIMESTAMP}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
build-cortex:
|
||||
name: Build cortex binary
|
||||
|
||||
@@ -7,6 +7,16 @@ on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
# Share a concurrency group with build-prerelease.yml so the two
|
||||
# workflows don't race on the same `rust` runner workspace (act's
|
||||
# /root/.cache/act/<hash>/hostexecutor/ is shared across concurrent
|
||||
# jobs and one job's checkout step nukes another's in-flight build
|
||||
# files). cancel-in-progress=false → they queue; same-ref pushes
|
||||
# coalesce per workflow via cancel-in-progress on each.
|
||||
concurrency:
|
||||
group: cortex-runner-pool-${{ github.ref }}
|
||||
cancel-in-progress: false
|
||||
|
||||
env:
|
||||
CARGO_INCREMENTAL: "0"
|
||||
RUSTC_WRAPPER: sccache
|
||||
|
||||
@@ -6,7 +6,11 @@
|
||||
#
|
||||
# Required defines at rpmbuild time:
|
||||
# cortex_version e.g. "0.1.16"
|
||||
# cortex_prerelease e.g. "0.1.20260518gitabcdef0" (used as Release)
|
||||
# cortex_prerelease e.g. "0.1.20260518140530.gitabcdef0"
|
||||
# ^^^^^^^^^^^^^^^^^^ ^^^^^^^^
|
||||
# commit time (sec) commit sha
|
||||
# (used as Release; the timestamp prefix
|
||||
# keeps same-day builds strictly ordered.)
|
||||
|
||||
%global _build_id_links none
|
||||
%global debug_package %{nil}
|
||||
|
||||
@@ -9,7 +9,11 @@
|
||||
# neuron_version e.g. "0.1.16"
|
||||
# neuron_flavour e.g. "ada", "blackwell" — matches the CI build
|
||||
# matrix's compute_cap label.
|
||||
# neuron_prerelease e.g. "0.1.20260518gitabcdef0" (used as Release)
|
||||
# neuron_prerelease e.g. "0.1.20260518140530.gitabcdef0"
|
||||
# ^^^^^^^^^^^^^^^^^^ ^^^^^^^^
|
||||
# commit time (sec) commit sha
|
||||
# (used as Release; the timestamp prefix
|
||||
# keeps same-day builds strictly ordered.)
|
||||
#
|
||||
# One flavour can be installed at a time on a given host; flavour
|
||||
# packages Conflict with each other.
|
||||
|
||||
@@ -31,7 +31,10 @@ BASE="http://${HOST}:${PORT}"
|
||||
# beyond gibberish.
|
||||
PROBE_PROMPT='What is the capital of France? Respond with the city name only, no punctuation.'
|
||||
EXPECT_SUBSTR='Paris'
|
||||
MAX_TOKENS=32
|
||||
# Qwen3 prepends <think>...</think> reasoning before the answer when the
|
||||
# chat template enables thinking mode, which eats most of a small token
|
||||
# budget. 256 leaves enough room for thinking + final answer.
|
||||
MAX_TOKENS=256
|
||||
|
||||
# /models/load is synchronous — neuron blocks the response until the
|
||||
# hf-hub download + GGUF parse + tensor materialisation is done. A
|
||||
@@ -40,7 +43,10 @@ MAX_TOKENS=32
|
||||
LOAD_TIMEOUT=600
|
||||
INFER_TIMEOUT=120
|
||||
|
||||
say() { printf '[%s] %s\n' "${HOST}" "$*"; }
|
||||
# Status messages go to stderr so command substitutions like
|
||||
# `raw=$(run_probe)` capture only the function's intended return value
|
||||
# (an HTTP body), not the progress chatter.
|
||||
say() { printf '[%s] %s\n' "${HOST}" "$*" >&2; }
|
||||
die() { say "FAIL: $*"; exit 1; }
|
||||
|
||||
probe_health() {
|
||||
@@ -49,7 +55,11 @@ probe_health() {
|
||||
}
|
||||
|
||||
list_loaded_ids() {
|
||||
curl --silent --fail "${BASE}/models" | yq -r '.[].id'
|
||||
# The manifest is YAML and uses yq; HTTP responses are JSON and use
|
||||
# jq directly. pip-yq parses input as YAML by default, which trips
|
||||
# on JSON content that happens to look like YAML aliases (chatcmpl
|
||||
# ids, escaped quotes inside `<think>...</think>` blocks, etc.).
|
||||
curl --silent --fail "${BASE}/models" | jq -r '.[].id'
|
||||
}
|
||||
|
||||
is_loaded() {
|
||||
@@ -88,7 +98,7 @@ EOF
|
||||
run_probe() {
|
||||
say "POST /v1/chat/completions (probe: ${PROBE_PROMPT})"
|
||||
local payload
|
||||
payload=$(yq -n -c \
|
||||
payload=$(jq -n -c \
|
||||
--arg model "${MODEL_ID}" \
|
||||
--arg content "${PROBE_PROMPT}" \
|
||||
--argjson tokens "${MAX_TOKENS}" \
|
||||
@@ -124,10 +134,15 @@ fi
|
||||
|
||||
raw=$(run_probe)
|
||||
echo "---"
|
||||
echo "${raw}" | yq -r '.'
|
||||
# Dump the raw JSON. Don't pipe through `yq -r '.'` — yq's default
|
||||
# YAML output mode chokes on JSON strings that contain `<` (and the
|
||||
# `<think>` markers Qwen3 emits during reasoning are a perfect
|
||||
# example). The targeted `yq -r '.path'` calls below work fine
|
||||
# because jq's path filter mode bypasses the YAML re-emit.
|
||||
echo "${raw}"
|
||||
echo "---"
|
||||
|
||||
content=$(echo "${raw}" | yq -r '.choices[0].message.content // empty')
|
||||
content=$(echo "${raw}" | jq -r '.choices[0].message.content // empty')
|
||||
if [[ -z "${content}" ]]; then
|
||||
die "no content in chat completion response"
|
||||
fi
|
||||
|
||||
Reference in New Issue
Block a user