feat(worker): add hg-edge and bugzilla pollers

Wires two historical sources for completeness with the 2019 timeline:

- hg-edge.mozilla.org: scans json-pushes for a configured set of
  build/* repos and matches changeset author client-side, since the
  pushlog `user=` filter targets the pusher (sheriffs/reviewers in
  this case) rather than the author. Daily poll cadence — mozilla
  retired hg, no new events expected.
- bugzilla.mozilla.org: queries /rest/bug?creator=<email>. Without
  an api key the unauthenticated endpoint only returns public bugs,
  which is what the public timeline wants anyway.

Reshape renders "<author> committed <short_node> in <repo>" for hg
and "filed bug #<id> in <product>" for bugzilla, both linking back
to the canonical upstream URL via a stamped `_host` payload field.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-03 19:55:41 +03:00
parent f750e8de47
commit 7919a2d9ab
7 changed files with 721 additions and 18 deletions

View File

@@ -4,9 +4,11 @@ use clap::Parser;
use moments_core::{EventSource, run_poller};
use moments_data::{
PgStore,
bugzilla::{BugzillaConfig, BugzillaSource},
gitea::{GiteaConfig, GiteaSource},
github::{GithubConfig, GithubSource},
github_search::{GithubSearchConfig, GithubSearchSource},
hg::{HgConfig, HgSource},
};
use reqwest::Client;
use tracing::info;
@@ -45,6 +47,45 @@ struct Args {
/// Seconds between Gitea activity-feed polls.
#[arg(long, env = "GITEA_POLL_INTERVAL_SECS", default_value = "600")]
gitea_interval_secs: u64,
#[arg(long, env = "HG_HOST", default_value = "hg-edge.mozilla.org")]
hg_host: String,
/// Comma-separated mozilla hg repo paths to scan, e.g. "build/puppet,build/tools".
#[arg(
long,
env = "HG_REPOS",
value_delimiter = ',',
default_value = "build/puppet,build/tools,build/buildbot-configs"
)]
hg_repos: Vec<String>,
/// Comma-separated case-insensitive substrings matched against changeset author fields.
#[arg(
long,
env = "HG_AUTHOR_TERMS",
value_delimiter = ',',
default_value = "thijssen,grenade"
)]
hg_author_terms: Vec<String>,
/// Seconds between hg pushlog scans (defaults to 24h — historical data).
#[arg(long, env = "HG_POLL_INTERVAL_SECS", default_value = "86400")]
hg_interval_secs: u64,
#[arg(long, env = "BUGZILLA_HOST", default_value = "bugzilla.mozilla.org")]
bugzilla_host: String,
#[arg(long, env = "BUGZILLA_EMAIL", default_value = "rthijssen@mozilla.com")]
bugzilla_email: String,
/// Optional bugzilla API key. Without one, only public bugs are returned.
#[arg(long, env = "BUGZILLA_API_KEY")]
bugzilla_api_key: Option<String>,
/// Seconds between bugzilla creator-query polls (defaults to 24h).
#[arg(long, env = "BUGZILLA_POLL_INTERVAL_SECS", default_value = "86400")]
bugzilla_interval_secs: u64,
}
#[tokio::main]
@@ -93,30 +134,66 @@ async fn main() -> anyhow::Result<()> {
},
)) as Arc<dyn EventSource>;
let hg = Arc::new(HgSource::new(
http.clone(),
store.clone(),
store.clone(),
HgConfig {
host: args.hg_host.clone(),
author_terms: args.hg_author_terms.clone(),
repos: args.hg_repos.clone(),
},
)) as Arc<dyn EventSource>;
let bugzilla = Arc::new(BugzillaSource::new(
http.clone(),
store.clone(),
store.clone(),
BugzillaConfig {
host: args.bugzilla_host.clone(),
creator_email: args.bugzilla_email.clone(),
api_key: args.bugzilla_api_key.clone(),
..Default::default()
},
)) as Arc<dyn EventSource>;
info!(
github_user = args.github_user,
gitea_host = args.gitea_host,
gitea_user = args.gitea_user,
hg_host = args.hg_host,
hg_repos = ?args.hg_repos,
bugzilla_host = args.bugzilla_host,
bugzilla_email = args.bugzilla_email,
events_interval_secs = args.interval_secs,
search_interval_secs = args.search_interval_secs,
gitea_interval_secs = args.gitea_interval_secs,
hg_interval_secs = args.hg_interval_secs,
bugzilla_interval_secs = args.bugzilla_interval_secs,
"worker started"
);
let interval = Duration::from_secs(args.interval_secs);
let search_interval = Duration::from_secs(args.search_interval_secs);
let gitea_interval = Duration::from_secs(args.gitea_interval_secs);
let hg_interval = Duration::from_secs(args.hg_interval_secs);
let bugzilla_interval = Duration::from_secs(args.bugzilla_interval_secs);
let github_task = tokio::spawn(async move { run_poller(github, interval).await });
let github_search_task =
tokio::spawn(async move { run_poller(github_search, search_interval).await });
let gitea_task = tokio::spawn(async move { run_poller(gitea, gitea_interval).await });
let hg_task = tokio::spawn(async move { run_poller(hg, hg_interval).await });
let bugzilla_task =
tokio::spawn(async move { run_poller(bugzilla, bugzilla_interval).await });
tokio::signal::ctrl_c().await?;
info!("shutdown signal received");
github_task.abort();
github_search_task.abort();
gitea_task.abort();
hg_task.abort();
bugzilla_task.abort();
Ok(())
}