feat: discover contributed repos via GitHub GraphQL API
The REST /user/repos endpoint only returns repos where the user is owner, collaborator, or org member. Repos contributed to via PRs (e.g. polkadot-js/api, zed-industries/zed) were never discovered and their commits were missing from moments. Now supplements /user/repos with a GraphQL repositoriesContributedTo query, which returns all repos the user has committed to, opened issues/PRs on, or reviewed — with cursor- based pagination and no result cap. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,16 +1,20 @@
|
|||||||
//! Per-repo commit enumeration for full GitHub history.
|
//! Per-repo commit enumeration for full GitHub history.
|
||||||
//!
|
//!
|
||||||
//! The Search API caps at 1000 results; this source enumerates all repos
|
//! Discovers repos via two sources:
|
||||||
//! the user can access via `/user/repos` and walks each repo's commit
|
//! 1. REST `/user/repos` — repos where the user is owner, collaborator,
|
||||||
//! history via `/repos/{owner}/{repo}/commits?author={user}` — no cap.
|
//! or org member.
|
||||||
|
//! 2. GraphQL `repositoriesContributedTo` — repos the user has committed
|
||||||
|
//! to, opened issues/PRs on, or reviewed, even without collaborator
|
||||||
|
//! status. No result cap (cursor-paginated).
|
||||||
|
//!
|
||||||
|
//! Then walks each repo's commit history via
|
||||||
|
//! `/repos/{owner}/{repo}/commits?author={user}` with a `since` cursor
|
||||||
|
//! to avoid re-fetching known commits.
|
||||||
//!
|
//!
|
||||||
//! Events use `github-commit:{sha}` as their ID, matching the scheme in
|
//! Events use `github-commit:{sha}` as their ID, matching the scheme in
|
||||||
//! `github_search`, so duplicates are resolved via idempotent upsert.
|
//! `github_search`, so duplicates are resolved via idempotent upsert.
|
||||||
//!
|
|
||||||
//! Per-repo poller state keys (`github-repo:{owner}/{repo}`) track which
|
|
||||||
//! repos have been fully backfilled. First run paginates the full history;
|
|
||||||
//! subsequent runs fetch only page 1.
|
|
||||||
|
|
||||||
|
use std::collections::HashSet;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
@@ -114,6 +118,112 @@ impl GithubRepoSource {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Supplement with repos from GraphQL repositoriesContributedTo.
|
||||||
|
// This catches repos where the user contributed via PRs but isn't
|
||||||
|
// an owner, collaborator, or org member — no result cap.
|
||||||
|
let mut known: HashSet<String> = repos.iter().map(|r| r.full_name.clone()).collect();
|
||||||
|
let contributed = self.discover_contributed_repos().await;
|
||||||
|
match contributed {
|
||||||
|
Ok(extra) => {
|
||||||
|
for r in extra {
|
||||||
|
if known.insert(r.full_name.clone()) {
|
||||||
|
repos.push(r);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
warn!(error = %e, "GraphQL contributed-repos discovery failed; continuing with known repos");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(repos)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Discover repos the user has contributed to via GraphQL.
|
||||||
|
/// Uses cursor-based pagination with no result cap.
|
||||||
|
async fn discover_contributed_repos(&self) -> Result<Vec<Repo>, SourceError> {
|
||||||
|
let token = match &self.config.token {
|
||||||
|
Some(t) => t,
|
||||||
|
None => return Ok(vec![]),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut repos = Vec::new();
|
||||||
|
let mut cursor: Option<String> = None;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let after = match &cursor {
|
||||||
|
Some(c) => format!(", after: \"{}\"", c),
|
||||||
|
None => String::new(),
|
||||||
|
};
|
||||||
|
let query = format!(
|
||||||
|
r#"{{ user(login: "{}") {{ repositoriesContributedTo(first: 100, contributionTypes: [COMMIT, PULL_REQUEST, ISSUE]{}) {{ pageInfo {{ hasNextPage endCursor }} nodes {{ nameWithOwner isPrivate }} }} }} }}"#,
|
||||||
|
self.config.user, after
|
||||||
|
);
|
||||||
|
let body = serde_json::json!({ "query": query });
|
||||||
|
|
||||||
|
let resp = self
|
||||||
|
.client
|
||||||
|
.post("https://api.github.com/graphql")
|
||||||
|
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||||
|
.header(header::USER_AGENT, USER_AGENT)
|
||||||
|
.header(header::CONTENT_TYPE, "application/json")
|
||||||
|
.json(&body)
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(|e| SourceError::Http(e.to_string()))?;
|
||||||
|
|
||||||
|
if !resp.status().is_success() {
|
||||||
|
return Err(SourceError::Http(format!(
|
||||||
|
"{} POST graphql",
|
||||||
|
resp.status()
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let data: Value = resp
|
||||||
|
.json()
|
||||||
|
.await
|
||||||
|
.map_err(|e| SourceError::Parse(e.to_string()))?;
|
||||||
|
|
||||||
|
// Check for GraphQL-level errors
|
||||||
|
if let Some(errors) = data.get("errors").and_then(Value::as_array) {
|
||||||
|
if let Some(msg) = errors.first().and_then(|e| e.get("message")).and_then(Value::as_str) {
|
||||||
|
return Err(SourceError::Http(format!("GraphQL error: {msg}")));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let contributed = &data["data"]["user"]["repositoriesContributedTo"];
|
||||||
|
let nodes = contributed["nodes"].as_array();
|
||||||
|
if let Some(nodes) = nodes {
|
||||||
|
for node in nodes {
|
||||||
|
let full_name = node
|
||||||
|
.get("nameWithOwner")
|
||||||
|
.and_then(Value::as_str);
|
||||||
|
let private = node
|
||||||
|
.get("isPrivate")
|
||||||
|
.and_then(Value::as_bool)
|
||||||
|
.unwrap_or(false);
|
||||||
|
if let Some(name) = full_name {
|
||||||
|
repos.push(Repo {
|
||||||
|
full_name: name.to_string(),
|
||||||
|
private,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let has_next = contributed["pageInfo"]["hasNextPage"]
|
||||||
|
.as_bool()
|
||||||
|
.unwrap_or(false);
|
||||||
|
if !has_next {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
cursor = contributed["pageInfo"]["endCursor"]
|
||||||
|
.as_str()
|
||||||
|
.map(String::from);
|
||||||
|
}
|
||||||
|
|
||||||
|
debug!(repos = repos.len(), "discovered contributed repos via GraphQL");
|
||||||
Ok(repos)
|
Ok(repos)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user