feat(github): per-repo commit enumeration for full history backfill
Adds a new github-repo EventSource that enumerates all repos via
/user/repos and walks each repo's /commits?author= endpoint, which
has no 1000-result cap unlike the Search API. Events use the same
github-commit:{sha} ID scheme as github_search for dedup. Per-repo
poller state enables full backfill on first run, page-1-only on
subsequent polls. Weekly poll interval by default.
Closes #1
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -7,6 +7,7 @@ use moments_data::{
|
||||
bugzilla::{BugzillaConfig, BugzillaSource},
|
||||
gitea::{GiteaConfig, GiteaSource},
|
||||
github::{GithubConfig, GithubSource},
|
||||
github_repo::{GithubRepoConfig, GithubRepoSource},
|
||||
github_search::{GithubSearchConfig, GithubSearchSource},
|
||||
hg::{HgConfig, HgSource},
|
||||
};
|
||||
@@ -35,6 +36,11 @@ struct Args {
|
||||
#[arg(long, env = "SEARCH_POLL_INTERVAL_SECS", default_value = "86400")]
|
||||
search_interval_secs: u64,
|
||||
|
||||
/// Seconds between per-repo commit enumeration polls (full history backfill).
|
||||
/// Defaults to weekly — expensive initial scan, cheap afterwards.
|
||||
#[arg(long, env = "REPO_POLL_INTERVAL_SECS", default_value = "604800")]
|
||||
repo_interval_secs: u64,
|
||||
|
||||
#[arg(long, env = "GITEA_HOST", default_value = "git.lair.cafe")]
|
||||
gitea_host: String,
|
||||
|
||||
@@ -132,6 +138,17 @@ async fn main() -> anyhow::Result<()> {
|
||||
},
|
||||
)) as Arc<dyn EventSource>;
|
||||
|
||||
let github_repo = Arc::new(GithubRepoSource::new(
|
||||
http.clone(),
|
||||
store.clone(),
|
||||
store.clone(),
|
||||
GithubRepoConfig {
|
||||
user: args.github_user.clone(),
|
||||
token: args.github_token.clone(),
|
||||
..Default::default()
|
||||
},
|
||||
)) as Arc<dyn EventSource>;
|
||||
|
||||
let gitea = Arc::new(GiteaSource::new(
|
||||
http.clone(),
|
||||
store.clone(),
|
||||
@@ -180,6 +197,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
bugzilla_email = args.bugzilla_email,
|
||||
events_interval_secs = args.interval_secs,
|
||||
search_interval_secs = args.search_interval_secs,
|
||||
repo_interval_secs = args.repo_interval_secs,
|
||||
gitea_interval_secs = args.gitea_interval_secs,
|
||||
hg_interval_secs = args.hg_interval_secs,
|
||||
bugzilla_interval_secs = args.bugzilla_interval_secs,
|
||||
@@ -188,6 +206,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
let interval = Duration::from_secs(args.interval_secs);
|
||||
let search_interval = Duration::from_secs(args.search_interval_secs);
|
||||
let repo_interval = Duration::from_secs(args.repo_interval_secs);
|
||||
let gitea_interval = Duration::from_secs(args.gitea_interval_secs);
|
||||
let hg_interval = Duration::from_secs(args.hg_interval_secs);
|
||||
let bugzilla_interval = Duration::from_secs(args.bugzilla_interval_secs);
|
||||
@@ -195,6 +214,8 @@ async fn main() -> anyhow::Result<()> {
|
||||
let github_task = tokio::spawn(async move { run_poller(github, interval).await });
|
||||
let github_search_task =
|
||||
tokio::spawn(async move { run_poller(github_search, search_interval).await });
|
||||
let github_repo_task =
|
||||
tokio::spawn(async move { run_poller(github_repo, repo_interval).await });
|
||||
let gitea_task = tokio::spawn(async move { run_poller(gitea, gitea_interval).await });
|
||||
let hg_task = tokio::spawn(async move { run_poller(hg, hg_interval).await });
|
||||
let bugzilla_task =
|
||||
@@ -204,6 +225,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
info!("shutdown signal received");
|
||||
github_task.abort();
|
||||
github_search_task.abort();
|
||||
github_repo_task.abort();
|
||||
gitea_task.abort();
|
||||
hg_task.abort();
|
||||
bugzilla_task.abort();
|
||||
|
||||
Reference in New Issue
Block a user