From f77a8ab48f156e830408b2f93df1a863d9736aae Mon Sep 17 00:00:00 2001 From: rob thijssen Date: Wed, 6 May 2026 05:03:41 +0300 Subject: [PATCH] fix: use since cursor in github-repo polls to prevent missed commits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After initial backfill, scan_repo was fetching only page 1 (100 most recent commits) per repo. If more than 100 commits landed between 7-day polls, older ones in that window were permanently missed. Now stores the newest commit date in poller_state.last_modified and passes it as &since= on subsequent polls, with full pagination, so only genuinely new commits are fetched but none are skipped. On first poll after deploy, last_modified is NULL so no since filter is applied — triggering a full re-backfill that catches any previously missed commits. Co-Authored-By: Claude Opus 4.6 (1M context) --- crates/moments-data/src/github_repo.rs | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/crates/moments-data/src/github_repo.rs b/crates/moments-data/src/github_repo.rs index 807133f..cdbc03a 100644 --- a/crates/moments-data/src/github_repo.rs +++ b/crates/moments-data/src/github_repo.rs @@ -117,19 +117,23 @@ impl GithubRepoSource { Ok(repos) } - /// Fetch commits for a single repo, paginating fully on first run. + /// Fetch commits for a single repo, paginating fully on first run + /// and using `since` on subsequent runs to catch everything new. async fn scan_repo(&self, repo: &Repo) -> Result { let state_key = format!("github-repo:{}", repo.full_name); let prior = self.state.load(&state_key).await?; - let first_run = prior.is_none(); - let max_pages = if first_run { MAX_BACKFILL_PAGES } else { 1 }; + let since = prior.as_ref().and_then(|s| s.last_modified); let mut total = 0usize; - for page in 1..=max_pages { - let url = format!( + let mut newest: Option> = since; + for page in 1..=MAX_BACKFILL_PAGES { + let mut url = format!( "https://api.github.com/repos/{}/commits?author={}&per_page={}&page={}", repo.full_name, self.config.user, self.config.per_page, page ); + if let Some(since_dt) = since { + url.push_str(&format!("&since={}", since_dt.to_rfc3339())); + } let req = self.apply_headers(self.client.get(&url)); let resp = req .send() @@ -165,6 +169,13 @@ impl GithubRepoSource { .iter() .filter_map(|item| parse_commit(item, repo)) .collect(); + for ev in &events { + newest = Some(match newest { + Some(n) if ev.occurred_at > n => ev.occurred_at, + Some(n) => n, + None => ev.occurred_at, + }); + } total += self.writer.upsert_events(&events).await?; if items.len() < self.config.per_page as usize { @@ -172,7 +183,7 @@ impl GithubRepoSource { } } - self.state.touch(&state_key).await?; + self.state.save(&state_key, None, newest).await?; Ok(total) } }