feat: discover contributed repos via GitHub GraphQL API

The REST /user/repos endpoint only returns repos where the user is
owner, collaborator, or org member. Repos contributed to via PRs
(e.g. polkadot-js/api, zed-industries/zed) were never discovered
and their commits were missing from moments.

Now supplements /user/repos with a GraphQL
repositoriesContributedTo query, which returns all repos the user
has committed to, opened issues/PRs on, or reviewed — with cursor-
based pagination and no result cap.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-06 05:38:57 +03:00
parent 2a20b47a29
commit c66aaeb268

View File

@@ -1,16 +1,20 @@
//! Per-repo commit enumeration for full GitHub history.
//!
//! The Search API caps at 1000 results; this source enumerates all repos
//! the user can access via `/user/repos` and walks each repo's commit
//! history via `/repos/{owner}/{repo}/commits?author={user}` — no cap.
//! Discovers repos via two sources:
//! 1. REST `/user/repos` — repos where the user is owner, collaborator,
//! or org member.
//! 2. GraphQL `repositoriesContributedTo` — repos the user has committed
//! to, opened issues/PRs on, or reviewed, even without collaborator
//! status. No result cap (cursor-paginated).
//!
//! Then walks each repo's commit history via
//! `/repos/{owner}/{repo}/commits?author={user}` with a `since` cursor
//! to avoid re-fetching known commits.
//!
//! Events use `github-commit:{sha}` as their ID, matching the scheme in
//! `github_search`, so duplicates are resolved via idempotent upsert.
//!
//! Per-repo poller state keys (`github-repo:{owner}/{repo}`) track which
//! repos have been fully backfilled. First run paginates the full history;
//! subsequent runs fetch only page 1.
use std::collections::HashSet;
use std::sync::Arc;
use async_trait::async_trait;
@@ -114,6 +118,112 @@ impl GithubRepoSource {
break;
}
}
// Supplement with repos from GraphQL repositoriesContributedTo.
// This catches repos where the user contributed via PRs but isn't
// an owner, collaborator, or org member — no result cap.
let mut known: HashSet<String> = repos.iter().map(|r| r.full_name.clone()).collect();
let contributed = self.discover_contributed_repos().await;
match contributed {
Ok(extra) => {
for r in extra {
if known.insert(r.full_name.clone()) {
repos.push(r);
}
}
}
Err(e) => {
warn!(error = %e, "GraphQL contributed-repos discovery failed; continuing with known repos");
}
}
Ok(repos)
}
/// Discover repos the user has contributed to via GraphQL.
/// Uses cursor-based pagination with no result cap.
async fn discover_contributed_repos(&self) -> Result<Vec<Repo>, SourceError> {
let token = match &self.config.token {
Some(t) => t,
None => return Ok(vec![]),
};
let mut repos = Vec::new();
let mut cursor: Option<String> = None;
loop {
let after = match &cursor {
Some(c) => format!(", after: \"{}\"", c),
None => String::new(),
};
let query = format!(
r#"{{ user(login: "{}") {{ repositoriesContributedTo(first: 100, contributionTypes: [COMMIT, PULL_REQUEST, ISSUE]{}) {{ pageInfo {{ hasNextPage endCursor }} nodes {{ nameWithOwner isPrivate }} }} }} }}"#,
self.config.user, after
);
let body = serde_json::json!({ "query": query });
let resp = self
.client
.post("https://api.github.com/graphql")
.header(header::AUTHORIZATION, format!("Bearer {token}"))
.header(header::USER_AGENT, USER_AGENT)
.header(header::CONTENT_TYPE, "application/json")
.json(&body)
.send()
.await
.map_err(|e| SourceError::Http(e.to_string()))?;
if !resp.status().is_success() {
return Err(SourceError::Http(format!(
"{} POST graphql",
resp.status()
)));
}
let data: Value = resp
.json()
.await
.map_err(|e| SourceError::Parse(e.to_string()))?;
// Check for GraphQL-level errors
if let Some(errors) = data.get("errors").and_then(Value::as_array) {
if let Some(msg) = errors.first().and_then(|e| e.get("message")).and_then(Value::as_str) {
return Err(SourceError::Http(format!("GraphQL error: {msg}")));
}
}
let contributed = &data["data"]["user"]["repositoriesContributedTo"];
let nodes = contributed["nodes"].as_array();
if let Some(nodes) = nodes {
for node in nodes {
let full_name = node
.get("nameWithOwner")
.and_then(Value::as_str);
let private = node
.get("isPrivate")
.and_then(Value::as_bool)
.unwrap_or(false);
if let Some(name) = full_name {
repos.push(Repo {
full_name: name.to_string(),
private,
});
}
}
}
let has_next = contributed["pageInfo"]["hasNextPage"]
.as_bool()
.unwrap_or(false);
if !has_next {
break;
}
cursor = contributed["pageInfo"]["endCursor"]
.as_str()
.map(String::from);
}
debug!(repos = repos.len(), "discovered contributed repos via GraphQL");
Ok(repos)
}