The new Gitea Actions build gate runs `cargo fmt --check`, `clippy -D warnings`, and `cargo test` — stricter than the old deploy.sh, which only `cargo build`d. That surfaced pre-existing drift that never compiled under the test/clippy profile: - apply rustfmt across the workspace (formatting only, no logic changes) - moments-data: add the missing `prune_events` to the test-only `NoopWriter` stub (the EventWriter trait gained it with the blog-prune feature; a plain `cargo build` never compiles the `#[cfg(test)]` stub, so it went stale) - moments-api: `.max().min()` -> `.clamp()`, and build `usvg::Options` with struct-update syntax instead of post-Default field assignment Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_01X7zF7Kf4JqDwa6M8Qgge9M
279 lines
9.3 KiB
Rust
279 lines
9.3 KiB
Rust
use std::{sync::Arc, time::Duration};
|
|
|
|
use clap::Parser;
|
|
use moments_core::{EventSource, run_poller};
|
|
use moments_data::{
|
|
PgStore,
|
|
blog::{BlogConfig, BlogSource},
|
|
bugzilla::{BugzillaConfig, BugzillaSource},
|
|
gitea::{GiteaConfig, GiteaSource},
|
|
github::{GithubConfig, GithubSource},
|
|
github_repo::{GithubRepoConfig, GithubRepoSource},
|
|
github_search::{GithubSearchConfig, GithubSearchSource},
|
|
hg::{HgConfig, HgSource},
|
|
};
|
|
use reqwest::Client;
|
|
use tracing::info;
|
|
|
|
#[derive(Parser, Debug)]
|
|
#[command(version, about = "moments ingestion worker")]
|
|
struct Args {
|
|
#[arg(long, env = "DATABASE_URL")]
|
|
database_url: String,
|
|
|
|
#[arg(long, env = "GITHUB_USER", default_value = "grenade")]
|
|
github_user: String,
|
|
|
|
/// Optional GitHub token. Higher rate limit and access to private events.
|
|
#[arg(long, env = "GITHUB_TOKEN")]
|
|
github_token: Option<String>,
|
|
|
|
/// Seconds between Events-API polls (live feed, last 90 days).
|
|
#[arg(long, env = "POLL_INTERVAL_SECS", default_value = "600")]
|
|
interval_secs: u64,
|
|
|
|
/// Seconds between Search-API polls (historical issue/PR backfill).
|
|
/// Defaults to 24h — this is a backfill, not a live feed.
|
|
#[arg(long, env = "SEARCH_POLL_INTERVAL_SECS", default_value = "86400")]
|
|
search_interval_secs: u64,
|
|
|
|
/// Seconds between per-repo commit enumeration polls (full history backfill).
|
|
/// Defaults to weekly — expensive initial scan, cheap afterwards.
|
|
#[arg(long, env = "REPO_POLL_INTERVAL_SECS", default_value = "604800")]
|
|
repo_interval_secs: u64,
|
|
|
|
#[arg(long, env = "GITEA_HOST", default_value = "git.lair.cafe")]
|
|
gitea_host: String,
|
|
|
|
#[arg(long, env = "GITEA_USER", default_value = "grenade")]
|
|
gitea_user: String,
|
|
|
|
#[arg(long, env = "GITEA_TOKEN")]
|
|
gitea_token: Option<String>,
|
|
|
|
/// Seconds between Gitea activity-feed polls.
|
|
#[arg(long, env = "GITEA_POLL_INTERVAL_SECS", default_value = "600")]
|
|
gitea_interval_secs: u64,
|
|
|
|
#[arg(long, env = "HG_HOST", default_value = "hg-edge.mozilla.org")]
|
|
hg_host: String,
|
|
|
|
/// Comma-separated repo groups to scan. Repos within each group are
|
|
/// discovered via `/{group}/?style=json`.
|
|
#[arg(
|
|
long,
|
|
env = "HG_GROUPS",
|
|
value_delimiter = ',',
|
|
default_value = "build,integration"
|
|
)]
|
|
hg_groups: Vec<String>,
|
|
|
|
/// Comma-separated individual repos to scan (e.g. `mozilla-central`).
|
|
#[arg(
|
|
long,
|
|
env = "HG_REPOS",
|
|
value_delimiter = ',',
|
|
default_value = "mozilla-central"
|
|
)]
|
|
hg_repos: Vec<String>,
|
|
|
|
/// Comma-separated author substrings for `author()` revset queries.
|
|
#[arg(
|
|
long,
|
|
env = "HG_AUTHOR_TERMS",
|
|
value_delimiter = ',',
|
|
default_value = "rthijssen,grenade"
|
|
)]
|
|
hg_author_terms: Vec<String>,
|
|
|
|
/// Seconds between hg pushlog scans (defaults to 24h — historical data).
|
|
#[arg(long, env = "HG_POLL_INTERVAL_SECS", default_value = "86400")]
|
|
hg_interval_secs: u64,
|
|
|
|
#[arg(long, env = "BUGZILLA_HOST", default_value = "bugzilla.mozilla.org")]
|
|
bugzilla_host: String,
|
|
|
|
#[arg(long, env = "BUGZILLA_EMAIL", default_value = "rthijssen@mozilla.com")]
|
|
bugzilla_email: String,
|
|
|
|
/// Optional bugzilla API key. Without one, only public bugs are returned.
|
|
#[arg(long, env = "BUGZILLA_API_KEY")]
|
|
bugzilla_api_key: Option<String>,
|
|
|
|
/// Seconds between bugzilla creator-query polls (defaults to 24h).
|
|
#[arg(long, env = "BUGZILLA_POLL_INTERVAL_SECS", default_value = "86400")]
|
|
bugzilla_interval_secs: u64,
|
|
|
|
/// Gitea repo holding blog posts (markdown + frontmatter at the repo
|
|
/// root, on `GITEA_HOST`). Empty string disables blog ingestion.
|
|
#[arg(long, env = "BLOG_REPO", default_value = "grenade/blog")]
|
|
blog_repo: String,
|
|
|
|
#[arg(long, env = "BLOG_BRANCH", default_value = "main")]
|
|
blog_branch: String,
|
|
|
|
/// Seconds between blog repo polls (cheap: one branch-tip request when
|
|
/// nothing changed).
|
|
#[arg(long, env = "BLOG_POLL_INTERVAL_SECS", default_value = "600")]
|
|
blog_interval_secs: u64,
|
|
}
|
|
|
|
#[tokio::main]
|
|
async fn main() -> anyhow::Result<()> {
|
|
init_tracing();
|
|
let args = Args::parse();
|
|
|
|
let store = Arc::new(PgStore::connect(&args.database_url).await?);
|
|
store.migrate().await?;
|
|
|
|
let http = Client::builder().timeout(Duration::from_secs(30)).build()?;
|
|
|
|
let github = Arc::new(GithubSource::new(
|
|
http.clone(),
|
|
store.clone(),
|
|
store.clone(),
|
|
GithubConfig {
|
|
user: args.github_user.clone(),
|
|
token: args.github_token.clone(),
|
|
per_page: 100,
|
|
},
|
|
)) as Arc<dyn EventSource>;
|
|
|
|
let github_search = Arc::new(GithubSearchSource::new(
|
|
http.clone(),
|
|
store.clone(),
|
|
store.clone(),
|
|
GithubSearchConfig {
|
|
user: args.github_user.clone(),
|
|
token: args.github_token.clone(),
|
|
..Default::default()
|
|
},
|
|
)) as Arc<dyn EventSource>;
|
|
|
|
let github_repo = Arc::new(GithubRepoSource::new(
|
|
http.clone(),
|
|
store.clone(),
|
|
store.clone(),
|
|
GithubRepoConfig {
|
|
user: args.github_user.clone(),
|
|
token: args.github_token.clone(),
|
|
..Default::default()
|
|
},
|
|
)) as Arc<dyn EventSource>;
|
|
|
|
let gitea = Arc::new(GiteaSource::new(
|
|
http.clone(),
|
|
store.clone(),
|
|
store.clone(),
|
|
GiteaConfig {
|
|
host: args.gitea_host.clone(),
|
|
user: args.gitea_user.clone(),
|
|
token: args.gitea_token.clone(),
|
|
..Default::default()
|
|
},
|
|
)) as Arc<dyn EventSource>;
|
|
|
|
let hg = Arc::new(HgSource::new(
|
|
http.clone(),
|
|
store.clone(),
|
|
store.clone(),
|
|
HgConfig {
|
|
host: args.hg_host.clone(),
|
|
author_terms: args.hg_author_terms.clone(),
|
|
groups: args.hg_groups.clone(),
|
|
repos: args.hg_repos.clone(),
|
|
},
|
|
)) as Arc<dyn EventSource>;
|
|
|
|
let bugzilla = Arc::new(BugzillaSource::new(
|
|
http.clone(),
|
|
store.clone(),
|
|
store.clone(),
|
|
BugzillaConfig {
|
|
host: args.bugzilla_host.clone(),
|
|
creator_email: args.bugzilla_email.clone(),
|
|
api_key: args.bugzilla_api_key.clone(),
|
|
..Default::default()
|
|
},
|
|
)) as Arc<dyn EventSource>;
|
|
|
|
let blog = (!args.blog_repo.is_empty()).then(|| {
|
|
Arc::new(BlogSource::new(
|
|
http.clone(),
|
|
store.clone(),
|
|
store.clone(),
|
|
BlogConfig {
|
|
host: args.gitea_host.clone(),
|
|
repo: args.blog_repo.clone(),
|
|
branch: args.blog_branch.clone(),
|
|
token: args.gitea_token.clone(),
|
|
},
|
|
)) as Arc<dyn EventSource>
|
|
});
|
|
|
|
info!(
|
|
github_user = args.github_user,
|
|
gitea_host = args.gitea_host,
|
|
gitea_user = args.gitea_user,
|
|
hg_host = args.hg_host,
|
|
hg_groups = ?args.hg_groups,
|
|
hg_repos = ?args.hg_repos,
|
|
hg_author_terms = ?args.hg_author_terms,
|
|
bugzilla_host = args.bugzilla_host,
|
|
bugzilla_email = args.bugzilla_email,
|
|
events_interval_secs = args.interval_secs,
|
|
search_interval_secs = args.search_interval_secs,
|
|
repo_interval_secs = args.repo_interval_secs,
|
|
gitea_interval_secs = args.gitea_interval_secs,
|
|
hg_interval_secs = args.hg_interval_secs,
|
|
bugzilla_interval_secs = args.bugzilla_interval_secs,
|
|
blog_repo = args.blog_repo,
|
|
blog_branch = args.blog_branch,
|
|
blog_interval_secs = args.blog_interval_secs,
|
|
"worker started"
|
|
);
|
|
|
|
let interval = Duration::from_secs(args.interval_secs);
|
|
let search_interval = Duration::from_secs(args.search_interval_secs);
|
|
let repo_interval = Duration::from_secs(args.repo_interval_secs);
|
|
let gitea_interval = Duration::from_secs(args.gitea_interval_secs);
|
|
let hg_interval = Duration::from_secs(args.hg_interval_secs);
|
|
let bugzilla_interval = Duration::from_secs(args.bugzilla_interval_secs);
|
|
let blog_interval = Duration::from_secs(args.blog_interval_secs);
|
|
|
|
let github_task = tokio::spawn(async move { run_poller(github, interval).await });
|
|
let github_search_task =
|
|
tokio::spawn(async move { run_poller(github_search, search_interval).await });
|
|
let github_repo_task =
|
|
tokio::spawn(async move { run_poller(github_repo, repo_interval).await });
|
|
let gitea_task = tokio::spawn(async move { run_poller(gitea, gitea_interval).await });
|
|
let hg_task = tokio::spawn(async move { run_poller(hg, hg_interval).await });
|
|
let bugzilla_task = tokio::spawn(async move { run_poller(bugzilla, bugzilla_interval).await });
|
|
let blog_task =
|
|
blog.map(|src| tokio::spawn(async move { run_poller(src, blog_interval).await }));
|
|
|
|
tokio::signal::ctrl_c().await?;
|
|
info!("shutdown signal received");
|
|
github_task.abort();
|
|
github_search_task.abort();
|
|
github_repo_task.abort();
|
|
gitea_task.abort();
|
|
hg_task.abort();
|
|
bugzilla_task.abort();
|
|
if let Some(task) = blog_task {
|
|
task.abort();
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn init_tracing() {
|
|
use tracing_subscriber::{EnvFilter, fmt};
|
|
let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
|
|
let json = std::env::var("JOURNAL_STREAM").is_ok();
|
|
if json {
|
|
fmt().with_env_filter(filter).json().init();
|
|
} else {
|
|
fmt().with_env_filter(filter).init();
|
|
}
|
|
}
|