Remove /dev/null redirects in hg-ingest.sh so errors are visible. cd to work dir before loop to prevent getcwd failures after rm. Use $HOME instead of ~ for proper expansion in default values. Reduce timeline entry title, subtitle, and body font sizes for a more compact activity feed. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
141 lines
3.8 KiB
Bash
Executable File
141 lines
3.8 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
#
|
|
# One-shot hg changeset ingestion via local clones.
|
|
#
|
|
# Bare-clones each hg repo, extracts changesets matching author terms,
|
|
# and inserts them into the moments database. Sets poller_state so the
|
|
# worker won't re-scan.
|
|
#
|
|
# Requirements: hg (mercurial), psql, jq
|
|
#
|
|
# Usage:
|
|
# DATABASE_URL="postgres://..." ./script/hg-ingest.sh
|
|
#
|
|
set -euo pipefail
|
|
|
|
DATABASE_URL="${DATABASE_URL:-postgres://moments_rw@magrathea.kosherinata.internal:5432/moments?sslmode=verify-full&sslrootcert=/etc/pki/ca-trust/source/anchors/root-internal.pem&sslcert=/etc/pki/tls/misc/$(hostname -f).pem&sslkey=/etc/pki/tls/private/$(hostname -f).pem}"
|
|
HG_HOST="${HG_HOST:-hg-edge.mozilla.org}"
|
|
WORK_DIR="${HG_WORK_DIR:-$HOME/hg}"
|
|
|
|
# Repos to clone (groups are expanded inline)
|
|
REPOS=(
|
|
integration/mozilla-inbound
|
|
integration/autoland
|
|
integration/fx-team
|
|
integration/b2g-inbound
|
|
build/puppet
|
|
build/tools
|
|
build/buildbot
|
|
build/buildbot-configs
|
|
build/slave_health
|
|
build/mozharness
|
|
build/braindump
|
|
build/cloud-tools
|
|
build/compare-locales
|
|
build/nagios-core
|
|
build/partner-repacks
|
|
build/preproduction
|
|
build/rpm-sources
|
|
build/talos
|
|
build/tupperware
|
|
build/ash-mozharness
|
|
build/autoland
|
|
build/opsi-package-sources
|
|
)
|
|
|
|
# Author terms — matched case-insensitively against changeset author fields
|
|
AUTHOR_TERMS=("rthijssen" "grenade")
|
|
|
|
: "${DATABASE_URL:?DATABASE_URL must be set}"
|
|
|
|
mkdir -p "$WORK_DIR"
|
|
|
|
total=0
|
|
|
|
CLONE_DIR="$WORK_DIR/clone"
|
|
CACHE_DIR="$WORK_DIR/cache"
|
|
mkdir -p "$CACHE_DIR"
|
|
cd "$WORK_DIR"
|
|
|
|
for repo in "${REPOS[@]}"; do
|
|
cache_file="$CACHE_DIR/$(echo "$repo" | tr '/' '_').tsv"
|
|
|
|
# Skip repos already cached (re-run safe)
|
|
if [ -f "$cache_file" ]; then
|
|
echo "[hg-ingest] $repo: using cached results"
|
|
else
|
|
# Remove any previous clone to keep only one on disk
|
|
rm -rf "$CLONE_DIR"
|
|
|
|
echo "[hg-ingest] cloning $repo"
|
|
if ! hg clone --noupdate "https://$HG_HOST/$repo" "$CLONE_DIR"; then
|
|
echo "[hg-ingest] clone failed: $repo (skipping)"
|
|
continue
|
|
fi
|
|
|
|
# Build revset: author(term1) or author(term2) ...
|
|
revset=""
|
|
for term in "${AUTHOR_TERMS[@]}"; do
|
|
if [ -z "$revset" ]; then
|
|
revset="author('$term')"
|
|
else
|
|
revset="$revset or author('$term')"
|
|
fi
|
|
done
|
|
|
|
# Extract matching changesets to cache file
|
|
hg log -R "$CLONE_DIR" -r "$revset" \
|
|
--template '{node}\t{author}\t{date|hgdate}\t{desc|firstline}\n' \
|
|
> "$cache_file" || true
|
|
|
|
# Free disk immediately
|
|
rm -rf "$CLONE_DIR"
|
|
fi
|
|
|
|
# Ingest cached results into the database
|
|
count=0
|
|
while IFS=$'\t' read -r node author date_raw desc; do
|
|
[ -z "$node" ] && continue
|
|
|
|
# {date|hgdate} outputs "timestamp offset" — take just the timestamp
|
|
date_ts="${date_raw%% *}"
|
|
|
|
# Build ISO timestamp from unix epoch
|
|
occurred_at=$(date -u -d "@${date_ts}" '+%Y-%m-%dT%H:%M:%SZ')
|
|
|
|
event_id="hg:${repo}:${node}"
|
|
|
|
# Build payload JSON (jq handles all escaping)
|
|
payload=$(jq -n \
|
|
--arg node "$node" \
|
|
--arg user "$author" \
|
|
--arg desc "$desc" \
|
|
--arg repo "$repo" \
|
|
--arg host "$HG_HOST" \
|
|
'{node: $node, user: $user, desc: $desc, _repo: $repo, _host: $host}')
|
|
|
|
# Upsert into events table
|
|
psql "$DATABASE_URL" -q -c "
|
|
INSERT INTO events (id, source, action, occurred_at, public, payload)
|
|
VALUES (\$\$${event_id}\$\$, 'hg', 'Commit', '${occurred_at}', true, \$\$${payload}\$\$::jsonb)
|
|
ON CONFLICT (id) DO NOTHING;
|
|
"
|
|
|
|
count=$((count + 1))
|
|
done < "$cache_file"
|
|
|
|
if [ "$count" -gt 0 ]; then
|
|
echo "[hg-ingest] $repo: $count changesets ingested"
|
|
fi
|
|
total=$((total + count))
|
|
done
|
|
|
|
# Mark poller state so the worker skips hg
|
|
psql "$DATABASE_URL" -q -c "
|
|
INSERT INTO poller_state (source, last_fetched)
|
|
VALUES ('hg', now())
|
|
ON CONFLICT (source) DO UPDATE SET last_fetched = now();
|
|
"
|
|
|
|
echo "[hg-ingest] done. total: $total changesets"
|