feat: discover contributed repos via GitHub GraphQL API
The REST /user/repos endpoint only returns repos where the user is owner, collaborator, or org member. Repos contributed to via PRs (e.g. polkadot-js/api, zed-industries/zed) were never discovered and their commits were missing from moments. Now supplements /user/repos with a GraphQL repositoriesContributedTo query, which returns all repos the user has committed to, opened issues/PRs on, or reviewed — with cursor- based pagination and no result cap. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,16 +1,20 @@
|
||||
//! Per-repo commit enumeration for full GitHub history.
|
||||
//!
|
||||
//! The Search API caps at 1000 results; this source enumerates all repos
|
||||
//! the user can access via `/user/repos` and walks each repo's commit
|
||||
//! history via `/repos/{owner}/{repo}/commits?author={user}` — no cap.
|
||||
//! Discovers repos via two sources:
|
||||
//! 1. REST `/user/repos` — repos where the user is owner, collaborator,
|
||||
//! or org member.
|
||||
//! 2. GraphQL `repositoriesContributedTo` — repos the user has committed
|
||||
//! to, opened issues/PRs on, or reviewed, even without collaborator
|
||||
//! status. No result cap (cursor-paginated).
|
||||
//!
|
||||
//! Then walks each repo's commit history via
|
||||
//! `/repos/{owner}/{repo}/commits?author={user}` with a `since` cursor
|
||||
//! to avoid re-fetching known commits.
|
||||
//!
|
||||
//! Events use `github-commit:{sha}` as their ID, matching the scheme in
|
||||
//! `github_search`, so duplicates are resolved via idempotent upsert.
|
||||
//!
|
||||
//! Per-repo poller state keys (`github-repo:{owner}/{repo}`) track which
|
||||
//! repos have been fully backfilled. First run paginates the full history;
|
||||
//! subsequent runs fetch only page 1.
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
@@ -114,6 +118,112 @@ impl GithubRepoSource {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Supplement with repos from GraphQL repositoriesContributedTo.
|
||||
// This catches repos where the user contributed via PRs but isn't
|
||||
// an owner, collaborator, or org member — no result cap.
|
||||
let mut known: HashSet<String> = repos.iter().map(|r| r.full_name.clone()).collect();
|
||||
let contributed = self.discover_contributed_repos().await;
|
||||
match contributed {
|
||||
Ok(extra) => {
|
||||
for r in extra {
|
||||
if known.insert(r.full_name.clone()) {
|
||||
repos.push(r);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(error = %e, "GraphQL contributed-repos discovery failed; continuing with known repos");
|
||||
}
|
||||
}
|
||||
|
||||
Ok(repos)
|
||||
}
|
||||
|
||||
/// Discover repos the user has contributed to via GraphQL.
|
||||
/// Uses cursor-based pagination with no result cap.
|
||||
async fn discover_contributed_repos(&self) -> Result<Vec<Repo>, SourceError> {
|
||||
let token = match &self.config.token {
|
||||
Some(t) => t,
|
||||
None => return Ok(vec![]),
|
||||
};
|
||||
|
||||
let mut repos = Vec::new();
|
||||
let mut cursor: Option<String> = None;
|
||||
|
||||
loop {
|
||||
let after = match &cursor {
|
||||
Some(c) => format!(", after: \"{}\"", c),
|
||||
None => String::new(),
|
||||
};
|
||||
let query = format!(
|
||||
r#"{{ user(login: "{}") {{ repositoriesContributedTo(first: 100, contributionTypes: [COMMIT, PULL_REQUEST, ISSUE]{}) {{ pageInfo {{ hasNextPage endCursor }} nodes {{ nameWithOwner isPrivate }} }} }} }}"#,
|
||||
self.config.user, after
|
||||
);
|
||||
let body = serde_json::json!({ "query": query });
|
||||
|
||||
let resp = self
|
||||
.client
|
||||
.post("https://api.github.com/graphql")
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header(header::USER_AGENT, USER_AGENT)
|
||||
.header(header::CONTENT_TYPE, "application/json")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| SourceError::Http(e.to_string()))?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
return Err(SourceError::Http(format!(
|
||||
"{} POST graphql",
|
||||
resp.status()
|
||||
)));
|
||||
}
|
||||
|
||||
let data: Value = resp
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| SourceError::Parse(e.to_string()))?;
|
||||
|
||||
// Check for GraphQL-level errors
|
||||
if let Some(errors) = data.get("errors").and_then(Value::as_array) {
|
||||
if let Some(msg) = errors.first().and_then(|e| e.get("message")).and_then(Value::as_str) {
|
||||
return Err(SourceError::Http(format!("GraphQL error: {msg}")));
|
||||
}
|
||||
}
|
||||
|
||||
let contributed = &data["data"]["user"]["repositoriesContributedTo"];
|
||||
let nodes = contributed["nodes"].as_array();
|
||||
if let Some(nodes) = nodes {
|
||||
for node in nodes {
|
||||
let full_name = node
|
||||
.get("nameWithOwner")
|
||||
.and_then(Value::as_str);
|
||||
let private = node
|
||||
.get("isPrivate")
|
||||
.and_then(Value::as_bool)
|
||||
.unwrap_or(false);
|
||||
if let Some(name) = full_name {
|
||||
repos.push(Repo {
|
||||
full_name: name.to_string(),
|
||||
private,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let has_next = contributed["pageInfo"]["hasNextPage"]
|
||||
.as_bool()
|
||||
.unwrap_or(false);
|
||||
if !has_next {
|
||||
break;
|
||||
}
|
||||
cursor = contributed["pageInfo"]["endCursor"]
|
||||
.as_str()
|
||||
.map(String::from);
|
||||
}
|
||||
|
||||
debug!(repos = repos.len(), "discovered contributed repos via GraphQL");
|
||||
Ok(repos)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user