feat(worker): add commits to github search backfill
Walk back the earlier decision to skip /search/commits. The fork
inflation that worried me isn't misattribution — those commits
really were authored by the user; they just persist in forks after
the original repo went away. Skipping them dropped legitimate
historical work from the timeline.
The duplicate-SHA-across-forks issue is a pure dedup concern:
* keyed `github-commit:<sha>` (SHA only — globally unique by Git's
content addressing; same commit in two forks lands in one row);
* within a single page, dedup by id before INSERT (postgres ON
CONFLICT errors when the conflict target appears twice in one
statement);
* across pages and runs, last-write-wins via upsert. The repo
association may flip between forks but the commit content is
identical.
Visibility is read inline from `repository.private` on the search
item, no extra lookup needed. Also opportunistically populates the
shared visibility cache so the issue loop in the same poll skips
/repos/{full_name} GETs for any repo it already saw via commits.
Reshape: presentation/github.rs gains a Commit path — short SHA
linked, repo linked, first line of the commit message as subtitle.
GitCommit icon.
Tests: +3 in github_search (parse uses sha as id, marks private,
rejects non-github URL), +1 in presentation (commit reshape uses
short sha + first message line) — 18 total green.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -6,8 +6,10 @@ use serde_json::Value;
|
||||
pub(crate) fn reshape(event: &Event) -> TimelineItem {
|
||||
// Search-API items have a different payload shape (the search item itself
|
||||
// rather than a wrapped event), so dispatch them through a separate path.
|
||||
if matches!(event.action.as_str(), "Issue" | "PullRequest") {
|
||||
return search_reshape(event);
|
||||
match event.action.as_str() {
|
||||
"Issue" | "PullRequest" => return search_reshape(event),
|
||||
"Commit" => return commit_reshape(event),
|
||||
_ => {}
|
||||
}
|
||||
|
||||
let p = &event.payload;
|
||||
@@ -426,6 +428,58 @@ fn search_reshape(event: &Event) -> TimelineItem {
|
||||
}
|
||||
}
|
||||
|
||||
fn commit_reshape(event: &Event) -> TimelineItem {
|
||||
let p = &event.payload;
|
||||
let sha = p.get("sha").and_then(Value::as_str).unwrap_or("");
|
||||
let short_sha: String = sha.chars().take(7).collect();
|
||||
let html_url = p.get("html_url").and_then(Value::as_str).unwrap_or("");
|
||||
let message_first_line = p
|
||||
.get("commit")
|
||||
.and_then(|c| c.get("message"))
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or("")
|
||||
.lines()
|
||||
.next()
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
let repo = p
|
||||
.get("repository")
|
||||
.and_then(|r| r.get("full_name"))
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or("(unknown repo)");
|
||||
let author_login = p
|
||||
.get("author")
|
||||
.and_then(|a| a.get("login"))
|
||||
.and_then(Value::as_str);
|
||||
|
||||
let mut title = Vec::new();
|
||||
if let Some(actor) = author_login {
|
||||
title.push(TitleSegment::link(
|
||||
actor.to_string(),
|
||||
format!("https://github.com/{actor}"),
|
||||
));
|
||||
title.push(TitleSegment::text(" "));
|
||||
}
|
||||
title.push(TitleSegment::text("committed "));
|
||||
title.push(TitleSegment::link(short_sha, html_url.to_string()));
|
||||
title.push(TitleSegment::text(" in "));
|
||||
title.push(repo_link(repo));
|
||||
|
||||
let subtitle = (!message_first_line.is_empty())
|
||||
.then(|| vec![TitleSegment::text(message_first_line)]);
|
||||
|
||||
TimelineItem {
|
||||
id: event.id.clone(),
|
||||
source: Source::Github,
|
||||
action: event.action.clone(),
|
||||
occurred_at: event.occurred_at,
|
||||
icon: TimelineIcon::GitCommit,
|
||||
title,
|
||||
subtitle,
|
||||
body: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn repo_from_url(url: &str) -> Option<String> {
|
||||
let stripped = url.strip_prefix("https://github.com/")?;
|
||||
let mut parts = stripped.splitn(3, '/');
|
||||
@@ -600,6 +654,35 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn commit_reshape_uses_short_sha_and_first_message_line() {
|
||||
let raw = json!({
|
||||
"sha": "a6fcefbe909a97ad5a049b9fa48bc74309af10d9",
|
||||
"html_url": "https://github.com/faith1337z/Trade/commit/a6fcefbe909a97ad5a049b9fa48bc74309af10d9",
|
||||
"commit": {
|
||||
"message": "split multiline message into multiple irc messages\n\nbody body body"
|
||||
},
|
||||
"repository": { "full_name": "faith1337z/Trade" },
|
||||
"author": { "login": "grenade" }
|
||||
});
|
||||
let item = reshape(&ev("Commit", raw));
|
||||
assert_eq!(item.icon, TimelineIcon::GitCommit);
|
||||
let rendered: String = item
|
||||
.title
|
||||
.iter()
|
||||
.map(|s| match s {
|
||||
TitleSegment::Text { text } => text.clone(),
|
||||
TitleSegment::Link { text, .. } => text.clone(),
|
||||
})
|
||||
.collect();
|
||||
assert!(rendered.contains("committed a6fcefb in faith1337z/Trade"), "got: {rendered}");
|
||||
// body of the commit message is dropped; only first line in subtitle
|
||||
assert_eq!(
|
||||
item.subtitle.unwrap(),
|
||||
vec![TitleSegment::text("split multiline message into multiple irc messages")]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_event_falls_back() {
|
||||
let raw = json!({
|
||||
|
||||
Reference in New Issue
Block a user