diff --git a/crates/moments-data/src/github_repo.rs b/crates/moments-data/src/github_repo.rs index cdbc03a..bd2dc66 100644 --- a/crates/moments-data/src/github_repo.rs +++ b/crates/moments-data/src/github_repo.rs @@ -1,16 +1,20 @@ //! Per-repo commit enumeration for full GitHub history. //! -//! The Search API caps at 1000 results; this source enumerates all repos -//! the user can access via `/user/repos` and walks each repo's commit -//! history via `/repos/{owner}/{repo}/commits?author={user}` — no cap. +//! Discovers repos via two sources: +//! 1. REST `/user/repos` — repos where the user is owner, collaborator, +//! or org member. +//! 2. GraphQL `repositoriesContributedTo` — repos the user has committed +//! to, opened issues/PRs on, or reviewed, even without collaborator +//! status. No result cap (cursor-paginated). +//! +//! Then walks each repo's commit history via +//! `/repos/{owner}/{repo}/commits?author={user}` with a `since` cursor +//! to avoid re-fetching known commits. //! //! Events use `github-commit:{sha}` as their ID, matching the scheme in //! `github_search`, so duplicates are resolved via idempotent upsert. -//! -//! Per-repo poller state keys (`github-repo:{owner}/{repo}`) track which -//! repos have been fully backfilled. First run paginates the full history; -//! subsequent runs fetch only page 1. +use std::collections::HashSet; use std::sync::Arc; use async_trait::async_trait; @@ -114,6 +118,112 @@ impl GithubRepoSource { break; } } + + // Supplement with repos from GraphQL repositoriesContributedTo. + // This catches repos where the user contributed via PRs but isn't + // an owner, collaborator, or org member — no result cap. + let mut known: HashSet = repos.iter().map(|r| r.full_name.clone()).collect(); + let contributed = self.discover_contributed_repos().await; + match contributed { + Ok(extra) => { + for r in extra { + if known.insert(r.full_name.clone()) { + repos.push(r); + } + } + } + Err(e) => { + warn!(error = %e, "GraphQL contributed-repos discovery failed; continuing with known repos"); + } + } + + Ok(repos) + } + + /// Discover repos the user has contributed to via GraphQL. + /// Uses cursor-based pagination with no result cap. + async fn discover_contributed_repos(&self) -> Result, SourceError> { + let token = match &self.config.token { + Some(t) => t, + None => return Ok(vec![]), + }; + + let mut repos = Vec::new(); + let mut cursor: Option = None; + + loop { + let after = match &cursor { + Some(c) => format!(", after: \"{}\"", c), + None => String::new(), + }; + let query = format!( + r#"{{ user(login: "{}") {{ repositoriesContributedTo(first: 100, contributionTypes: [COMMIT, PULL_REQUEST, ISSUE]{}) {{ pageInfo {{ hasNextPage endCursor }} nodes {{ nameWithOwner isPrivate }} }} }} }}"#, + self.config.user, after + ); + let body = serde_json::json!({ "query": query }); + + let resp = self + .client + .post("https://api.github.com/graphql") + .header(header::AUTHORIZATION, format!("Bearer {token}")) + .header(header::USER_AGENT, USER_AGENT) + .header(header::CONTENT_TYPE, "application/json") + .json(&body) + .send() + .await + .map_err(|e| SourceError::Http(e.to_string()))?; + + if !resp.status().is_success() { + return Err(SourceError::Http(format!( + "{} POST graphql", + resp.status() + ))); + } + + let data: Value = resp + .json() + .await + .map_err(|e| SourceError::Parse(e.to_string()))?; + + // Check for GraphQL-level errors + if let Some(errors) = data.get("errors").and_then(Value::as_array) { + if let Some(msg) = errors.first().and_then(|e| e.get("message")).and_then(Value::as_str) { + return Err(SourceError::Http(format!("GraphQL error: {msg}"))); + } + } + + let contributed = &data["data"]["user"]["repositoriesContributedTo"]; + let nodes = contributed["nodes"].as_array(); + if let Some(nodes) = nodes { + for node in nodes { + let full_name = node + .get("nameWithOwner") + .and_then(Value::as_str); + let private = node + .get("isPrivate") + .and_then(Value::as_bool) + .unwrap_or(false); + if let Some(name) = full_name { + repos.push(Repo { + full_name: name.to_string(), + private, + }); + } + } + } + + let has_next = contributed["pageInfo"]["hasNextPage"] + .as_bool() + .unwrap_or(false); + if !has_next { + break; + } + cursor = contributed["pageInfo"]["endCursor"] + .as_str() + .map(String::from); + } + + debug!(repos = repos.len(), "discovered contributed repos via GraphQL"); Ok(repos) }