Files
moments/crates/moments-core/src/presentation/github.rs
rob thijssen 7772393598 feat(worker): add commits to github search backfill
Walk back the earlier decision to skip /search/commits. The fork
inflation that worried me isn't misattribution — those commits
really were authored by the user; they just persist in forks after
the original repo went away. Skipping them dropped legitimate
historical work from the timeline.

The duplicate-SHA-across-forks issue is a pure dedup concern:
  * keyed `github-commit:<sha>` (SHA only — globally unique by Git's
    content addressing; same commit in two forks lands in one row);
  * within a single page, dedup by id before INSERT (postgres ON
    CONFLICT errors when the conflict target appears twice in one
    statement);
  * across pages and runs, last-write-wins via upsert. The repo
    association may flip between forks but the commit content is
    identical.

Visibility is read inline from `repository.private` on the search
item, no extra lookup needed. Also opportunistically populates the
shared visibility cache so the issue loop in the same poll skips
/repos/{full_name} GETs for any repo it already saw via commits.

Reshape: presentation/github.rs gains a Commit path — short SHA
linked, repo linked, first line of the commit message as subtitle.
GitCommit icon.

Tests: +3 in github_search (parse uses sha as id, marks private,
rejects non-github URL), +1 in presentation (commit reshape uses
short sha + first message line) — 18 total green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 18:54:32 +03:00

698 lines
24 KiB
Rust

use moments_entities::{
CommitSummary, Event, Source, TimelineBody, TimelineIcon, TimelineItem, TitleSegment,
};
use serde_json::Value;
pub(crate) fn reshape(event: &Event) -> TimelineItem {
// Search-API items have a different payload shape (the search item itself
// rather than a wrapped event), so dispatch them through a separate path.
match event.action.as_str() {
"Issue" | "PullRequest" => return search_reshape(event),
"Commit" => return commit_reshape(event),
_ => {}
}
let p = &event.payload;
let repo_name = p.get("repo").and_then(|r| r.get("name")).and_then(Value::as_str);
let actor_login = p
.get("actor")
.and_then(|a| a.get("display_login").or_else(|| a.get("login")))
.and_then(Value::as_str);
let inner = p.get("payload");
let (icon, title, subtitle, body) = match event.action.as_str() {
"PushEvent" => push(repo_name, inner),
"PullRequestEvent" => pull_request(repo_name, inner),
"PullRequestReviewEvent" => pull_request_review(repo_name, inner),
"PullRequestReviewCommentEvent" => pull_request_review_comment(repo_name, inner),
"IssuesEvent" => issues(repo_name, inner),
"IssueCommentEvent" => issue_comment(repo_name, inner),
"CreateEvent" => create(repo_name, inner),
"DeleteEvent" => delete(repo_name, inner),
"ForkEvent" => fork(repo_name, inner),
"WatchEvent" => watch(repo_name),
"ReleaseEvent" => release(repo_name, inner),
"CommitCommentEvent" => commit_comment(repo_name, inner),
"PublicEvent" => public(repo_name),
_ => fallback(repo_name, &event.action),
};
let title = if let Some(actor) = actor_login {
let mut segs = Vec::with_capacity(title.len() + 1);
segs.push(TitleSegment::link(
actor.to_string(),
format!("https://github.com/{actor}"),
));
segs.push(TitleSegment::text(" "));
segs.extend(title);
segs
} else {
title
};
TimelineItem {
id: event.id.clone(),
source: Source::Github,
action: event.action.clone(),
occurred_at: event.occurred_at,
icon,
title,
subtitle,
body,
}
}
fn repo_link(repo: &str) -> TitleSegment {
TitleSegment::link(repo.to_string(), format!("https://github.com/{repo}"))
}
fn pr_url(repo: &str, number: i64) -> String {
format!("https://github.com/{repo}/pull/{number}")
}
fn issue_url(repo: &str, number: i64) -> String {
format!("https://github.com/{repo}/issues/{number}")
}
fn commit_url(repo: &str, sha: &str) -> String {
format!("https://github.com/{repo}/commit/{sha}")
}
fn ref_branch(r: &str) -> &str {
r.strip_prefix("refs/heads/").unwrap_or(r)
}
type Reshaped = (
TimelineIcon,
Vec<TitleSegment>,
Option<Vec<TitleSegment>>,
Option<TimelineBody>,
);
fn push(repo: Option<&str>, p: Option<&Value>) -> Reshaped {
let repo = repo.unwrap_or("(unknown repo)");
let size = p
.and_then(|v| v.get("distinct_size").or_else(|| v.get("size")))
.and_then(Value::as_i64)
.unwrap_or(0);
let branch = p
.and_then(|v| v.get("ref"))
.and_then(Value::as_str)
.map(ref_branch)
.unwrap_or("");
let title = vec![
TitleSegment::text(format!(
"pushed {size} commit{} to ",
if size == 1 { "" } else { "s" }
)),
repo_link(repo),
TitleSegment::text(format!(":{branch}")),
];
let commits: Vec<CommitSummary> = p
.and_then(|v| v.get("commits"))
.and_then(Value::as_array)
.map(|arr| {
arr.iter()
.filter_map(|c| {
let sha = c.get("sha").and_then(Value::as_str)?;
let message = c
.get("message")
.and_then(Value::as_str)
.unwrap_or("")
.lines()
.next()
.unwrap_or("")
.to_string();
let author = c
.get("author")
.and_then(|a| a.get("name"))
.and_then(Value::as_str)
.map(str::to_string);
Some(CommitSummary {
short_sha: sha.chars().take(7).collect(),
sha: sha.to_string(),
message,
url: commit_url(repo, sha),
author,
})
})
.collect()
})
.unwrap_or_default();
let body = if commits.is_empty() {
None
} else {
Some(TimelineBody::Commits { commits })
};
(TimelineIcon::GitPush, title, None, body)
}
fn pull_request(repo: Option<&str>, p: Option<&Value>) -> Reshaped {
let repo = repo.unwrap_or("(unknown repo)");
let action = p
.and_then(|v| v.get("action"))
.and_then(Value::as_str)
.unwrap_or("touched");
let pr = p.and_then(|v| v.get("pull_request"));
let number = p.and_then(|v| v.get("number")).and_then(Value::as_i64).unwrap_or(0);
let pr_title = pr.and_then(|v| v.get("title")).and_then(Value::as_str).unwrap_or("");
let merged = pr
.and_then(|v| v.get("merged"))
.and_then(Value::as_bool)
.unwrap_or(false);
let verb = if action == "closed" && merged {
"merged"
} else {
action
};
let icon = if verb == "merged" {
TimelineIcon::GitMerge
} else {
TimelineIcon::PullRequest
};
let title = vec![
TitleSegment::text(format!("{verb} pull request ")),
TitleSegment::link(format!("#{number}"), pr_url(repo, number)),
TitleSegment::text(" in "),
repo_link(repo),
];
let subtitle = (!pr_title.is_empty()).then(|| vec![TitleSegment::text(pr_title.to_string())]);
(icon, title, subtitle, None)
}
fn pull_request_review(repo: Option<&str>, p: Option<&Value>) -> Reshaped {
let repo = repo.unwrap_or("(unknown repo)");
let pr = p.and_then(|v| v.get("pull_request"));
let number = pr.and_then(|v| v.get("number")).and_then(Value::as_i64).unwrap_or(0);
let pr_title = pr.and_then(|v| v.get("title")).and_then(Value::as_str).unwrap_or("");
let state = p
.and_then(|v| v.get("review"))
.and_then(|r| r.get("state"))
.and_then(Value::as_str)
.unwrap_or("commented");
let title = vec![
TitleSegment::text(format!("{state} review on ")),
TitleSegment::link(format!("#{number}"), pr_url(repo, number)),
TitleSegment::text(" in "),
repo_link(repo),
];
let subtitle = (!pr_title.is_empty()).then(|| vec![TitleSegment::text(pr_title.to_string())]);
(TimelineIcon::PullRequest, title, subtitle, None)
}
fn pull_request_review_comment(repo: Option<&str>, p: Option<&Value>) -> Reshaped {
let repo = repo.unwrap_or("(unknown repo)");
let pr = p.and_then(|v| v.get("pull_request"));
let number = pr.and_then(|v| v.get("number")).and_then(Value::as_i64).unwrap_or(0);
let pr_title = pr.and_then(|v| v.get("title")).and_then(Value::as_str).unwrap_or("");
let body_text = p
.and_then(|v| v.get("comment"))
.and_then(|c| c.get("body"))
.and_then(Value::as_str)
.unwrap_or("");
let title = vec![
TitleSegment::text("commented on review of "),
TitleSegment::link(format!("#{number}"), pr_url(repo, number)),
TitleSegment::text(" in "),
repo_link(repo),
];
let subtitle = (!pr_title.is_empty()).then(|| vec![TitleSegment::text(pr_title.to_string())]);
let body = (!body_text.is_empty()).then(|| TimelineBody::Markdown {
text: body_text.to_string(),
});
(TimelineIcon::Comment, title, subtitle, body)
}
fn issues(repo: Option<&str>, p: Option<&Value>) -> Reshaped {
let repo = repo.unwrap_or("(unknown repo)");
let action = p
.and_then(|v| v.get("action"))
.and_then(Value::as_str)
.unwrap_or("touched");
let issue = p.and_then(|v| v.get("issue"));
let number = issue.and_then(|v| v.get("number")).and_then(Value::as_i64).unwrap_or(0);
let issue_title = issue.and_then(|v| v.get("title")).and_then(Value::as_str).unwrap_or("");
let title = vec![
TitleSegment::text(format!("{action} issue ")),
TitleSegment::link(format!("#{number}"), issue_url(repo, number)),
TitleSegment::text(" in "),
repo_link(repo),
];
let subtitle = (!issue_title.is_empty()).then(|| vec![TitleSegment::text(issue_title.to_string())]);
(TimelineIcon::Issue, title, subtitle, None)
}
fn issue_comment(repo: Option<&str>, p: Option<&Value>) -> Reshaped {
let repo = repo.unwrap_or("(unknown repo)");
let issue = p.and_then(|v| v.get("issue"));
let number = issue.and_then(|v| v.get("number")).and_then(Value::as_i64).unwrap_or(0);
let issue_title = issue.and_then(|v| v.get("title")).and_then(Value::as_str).unwrap_or("");
let body_text = p
.and_then(|v| v.get("comment"))
.and_then(|c| c.get("body"))
.and_then(Value::as_str)
.unwrap_or("");
let title = vec![
TitleSegment::text("commented on "),
TitleSegment::link(format!("#{number}"), issue_url(repo, number)),
TitleSegment::text(" in "),
repo_link(repo),
];
let subtitle = (!issue_title.is_empty()).then(|| vec![TitleSegment::text(issue_title.to_string())]);
let body = (!body_text.is_empty()).then(|| TimelineBody::Markdown {
text: body_text.to_string(),
});
(TimelineIcon::Comment, title, subtitle, body)
}
fn create(repo: Option<&str>, p: Option<&Value>) -> Reshaped {
let repo = repo.unwrap_or("(unknown repo)");
let ref_type = p.and_then(|v| v.get("ref_type")).and_then(Value::as_str).unwrap_or("ref");
let ref_name = p.and_then(|v| v.get("ref")).and_then(Value::as_str);
let mut title = vec![TitleSegment::text(format!("created {ref_type} "))];
if let Some(name) = ref_name {
title.push(TitleSegment::text(format!("{name} in ")));
} else {
title.push(TitleSegment::text("in "));
}
title.push(repo_link(repo));
(TimelineIcon::GitBranchCreate, title, None, None)
}
fn delete(repo: Option<&str>, p: Option<&Value>) -> Reshaped {
let repo = repo.unwrap_or("(unknown repo)");
let ref_type = p.and_then(|v| v.get("ref_type")).and_then(Value::as_str).unwrap_or("ref");
let ref_name = p.and_then(|v| v.get("ref")).and_then(Value::as_str).unwrap_or("");
let title = vec![
TitleSegment::text(format!("deleted {ref_type} {ref_name} in ")),
repo_link(repo),
];
(TimelineIcon::GitBranchDelete, title, None, None)
}
fn fork(repo: Option<&str>, p: Option<&Value>) -> Reshaped {
let repo = repo.unwrap_or("(unknown repo)");
let forkee = p.and_then(|v| v.get("forkee"));
let forkee_full = forkee.and_then(|f| f.get("full_name")).and_then(Value::as_str);
let mut title = vec![TitleSegment::text("forked "), repo_link(repo)];
if let Some(full) = forkee_full {
title.push(TitleSegment::text(" to "));
title.push(TitleSegment::link(
full.to_string(),
format!("https://github.com/{full}"),
));
}
(TimelineIcon::GitFork, title, None, None)
}
fn watch(repo: Option<&str>) -> Reshaped {
let repo = repo.unwrap_or("(unknown repo)");
let title = vec![TitleSegment::text("starred "), repo_link(repo)];
(TimelineIcon::Star, title, None, None)
}
fn release(repo: Option<&str>, p: Option<&Value>) -> Reshaped {
let repo = repo.unwrap_or("(unknown repo)");
let release = p.and_then(|v| v.get("release"));
let name = release
.and_then(|r| r.get("name").or_else(|| r.get("tag_name")))
.and_then(Value::as_str)
.unwrap_or("(release)");
let url = release.and_then(|r| r.get("html_url")).and_then(Value::as_str);
let label = if let Some(u) = url {
TitleSegment::link(name.to_string(), u.to_string())
} else {
TitleSegment::text(name.to_string())
};
let title = vec![
TitleSegment::text("released "),
label,
TitleSegment::text(" in "),
repo_link(repo),
];
(TimelineIcon::Release, title, None, None)
}
fn commit_comment(repo: Option<&str>, p: Option<&Value>) -> Reshaped {
let repo = repo.unwrap_or("(unknown repo)");
let body_text = p
.and_then(|v| v.get("comment"))
.and_then(|c| c.get("body"))
.and_then(Value::as_str)
.unwrap_or("");
let title = vec![TitleSegment::text("commented on a commit in "), repo_link(repo)];
let body = (!body_text.is_empty()).then(|| TimelineBody::Markdown {
text: body_text.to_string(),
});
(TimelineIcon::Comment, title, None, body)
}
fn public(repo: Option<&str>) -> Reshaped {
let repo = repo.unwrap_or("(unknown repo)");
let title = vec![TitleSegment::text("made "), repo_link(repo), TitleSegment::text(" public")];
(TimelineIcon::Generic, title, None, None)
}
fn search_reshape(event: &Event) -> TimelineItem {
let p = &event.payload;
let html_url = p.get("html_url").and_then(Value::as_str).unwrap_or("");
let number = p.get("number").and_then(Value::as_i64).unwrap_or(0);
let issue_title = p.get("title").and_then(Value::as_str).unwrap_or("");
let state = p.get("state").and_then(Value::as_str).unwrap_or("");
let pr_obj = p.get("pull_request");
let is_pr = pr_obj.is_some();
let merged = pr_obj
.and_then(|pr| pr.get("merged_at"))
.map(|v| !v.is_null())
.unwrap_or(false);
let user_login = p
.get("user")
.and_then(|u| u.get("login"))
.and_then(Value::as_str);
let repo = repo_from_url(html_url).unwrap_or_else(|| "(unknown repo)".into());
let verb = match (is_pr, state, merged) {
(true, "closed", true) => "merged",
(true, "closed", false) => "closed",
(true, _, _) => "opened",
(false, "closed", _) => "closed",
(false, _, _) => "opened",
};
let kind = if is_pr { "pull request" } else { "issue" };
let icon = match (is_pr, verb) {
(true, "merged") => TimelineIcon::GitMerge,
(true, _) => TimelineIcon::PullRequest,
(false, _) => TimelineIcon::Issue,
};
let mut title = Vec::new();
if let Some(actor) = user_login {
title.push(TitleSegment::link(
actor.to_string(),
format!("https://github.com/{actor}"),
));
title.push(TitleSegment::text(" "));
}
title.push(TitleSegment::text(format!("{verb} {kind} ")));
title.push(TitleSegment::link(format!("#{number}"), html_url.to_string()));
title.push(TitleSegment::text(" in "));
title.push(repo_link(&repo));
let subtitle = (!issue_title.is_empty()).then(|| vec![TitleSegment::text(issue_title.to_string())]);
TimelineItem {
id: event.id.clone(),
source: Source::Github,
action: event.action.clone(),
occurred_at: event.occurred_at,
icon,
title,
subtitle,
body: None,
}
}
fn commit_reshape(event: &Event) -> TimelineItem {
let p = &event.payload;
let sha = p.get("sha").and_then(Value::as_str).unwrap_or("");
let short_sha: String = sha.chars().take(7).collect();
let html_url = p.get("html_url").and_then(Value::as_str).unwrap_or("");
let message_first_line = p
.get("commit")
.and_then(|c| c.get("message"))
.and_then(Value::as_str)
.unwrap_or("")
.lines()
.next()
.unwrap_or("")
.to_string();
let repo = p
.get("repository")
.and_then(|r| r.get("full_name"))
.and_then(Value::as_str)
.unwrap_or("(unknown repo)");
let author_login = p
.get("author")
.and_then(|a| a.get("login"))
.and_then(Value::as_str);
let mut title = Vec::new();
if let Some(actor) = author_login {
title.push(TitleSegment::link(
actor.to_string(),
format!("https://github.com/{actor}"),
));
title.push(TitleSegment::text(" "));
}
title.push(TitleSegment::text("committed "));
title.push(TitleSegment::link(short_sha, html_url.to_string()));
title.push(TitleSegment::text(" in "));
title.push(repo_link(repo));
let subtitle = (!message_first_line.is_empty())
.then(|| vec![TitleSegment::text(message_first_line)]);
TimelineItem {
id: event.id.clone(),
source: Source::Github,
action: event.action.clone(),
occurred_at: event.occurred_at,
icon: TimelineIcon::GitCommit,
title,
subtitle,
body: None,
}
}
fn repo_from_url(url: &str) -> Option<String> {
let stripped = url.strip_prefix("https://github.com/")?;
let mut parts = stripped.splitn(3, '/');
let owner = parts.next()?;
let repo = parts.next()?;
(!owner.is_empty() && !repo.is_empty()).then(|| format!("{owner}/{repo}"))
}
fn fallback(repo: Option<&str>, action: &str) -> Reshaped {
let title = match repo {
Some(r) => vec![
TitleSegment::text(format!("{action} on ")),
repo_link(r),
],
None => vec![TitleSegment::text(action.to_string())],
};
(TimelineIcon::Generic, title, None, None)
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::{TimeZone, Utc};
use serde_json::json;
fn ev(action: &str, payload: Value) -> Event {
Event {
id: "github:1".into(),
source: Source::Github,
action: action.into(),
occurred_at: Utc.with_ymd_and_hms(2026, 4, 14, 10, 0, 0).unwrap(),
public: true,
payload,
}
}
#[test]
fn push_event_reshape() {
let raw = json!({
"actor": { "login": "grenade", "display_login": "grenade" },
"repo": { "name": "grenade/vortex" },
"payload": {
"ref": "refs/heads/main",
"size": 2,
"distinct_size": 2,
"commits": [
{ "sha": "abcdef1234567890", "message": "fix the thing", "author": { "name": "rob" } },
{ "sha": "1111111111111111", "message": "and another\nbody", "author": { "name": "rob" } }
]
}
});
let item = reshape(&ev("PushEvent", raw));
assert_eq!(item.icon, TimelineIcon::GitPush);
// first segment is the actor link, then "pushed N commits to <repo>:<branch>"
assert!(matches!(item.title[0], TitleSegment::Link { .. }));
let rendered: String = item
.title
.iter()
.map(|s| match s {
TitleSegment::Text { text } => text.clone(),
TitleSegment::Link { text, .. } => text.clone(),
})
.collect();
assert!(rendered.contains("pushed 2 commits to grenade/vortex:main"), "got: {rendered}");
match item.body.unwrap() {
TimelineBody::Commits { commits } => {
assert_eq!(commits.len(), 2);
assert_eq!(commits[0].short_sha, "abcdef1");
// multi-line message gets first line only
assert_eq!(commits[1].message, "and another");
}
_ => panic!("expected Commits body"),
}
}
#[test]
fn merged_pr_uses_merge_icon() {
let raw = json!({
"actor": { "login": "grenade" },
"repo": { "name": "grenade/moments" },
"payload": {
"action": "closed",
"number": 7,
"pull_request": { "title": "wire it up", "merged": true }
}
});
let item = reshape(&ev("PullRequestEvent", raw));
assert_eq!(item.icon, TimelineIcon::GitMerge);
let rendered: String = item
.title
.iter()
.map(|s| match s {
TitleSegment::Text { text } => text.clone(),
TitleSegment::Link { text, .. } => text.clone(),
})
.collect();
assert!(rendered.contains("merged pull request #7 in grenade/moments"));
assert_eq!(
item.subtitle.unwrap(),
vec![TitleSegment::text("wire it up")]
);
}
#[test]
fn issue_comment_carries_markdown_body() {
let raw = json!({
"actor": { "login": "grenade" },
"repo": { "name": "Nehliin/vortex" },
"payload": {
"issue": { "number": 42, "title": "perf regression" },
"comment": { "body": "looks like the io_uring batching changed" }
}
});
let item = reshape(&ev("IssueCommentEvent", raw));
assert_eq!(item.icon, TimelineIcon::Comment);
match item.body.unwrap() {
TimelineBody::Markdown { text } => {
assert!(text.contains("io_uring"));
}
_ => panic!("expected Markdown body"),
}
}
#[test]
fn search_issue_reshape_open() {
let raw = json!({
"number": 125,
"title": "Feature: peer blocklist",
"state": "open",
"html_url": "https://github.com/Nehliin/vortex/issues/125",
"user": { "login": "grenade" }
});
let item = reshape(&ev("Issue", raw));
assert_eq!(item.icon, TimelineIcon::Issue);
let rendered: String = item
.title
.iter()
.map(|s| match s {
TitleSegment::Text { text } => text.clone(),
TitleSegment::Link { text, .. } => text.clone(),
})
.collect();
assert!(
rendered.contains("opened issue #125 in Nehliin/vortex"),
"got: {rendered}"
);
}
#[test]
fn search_pr_reshape_merged_uses_merge_icon() {
let raw = json!({
"number": 42,
"title": "wire it up",
"state": "closed",
"html_url": "https://github.com/grenade/moments/pull/42",
"user": { "login": "grenade" },
"pull_request": { "merged_at": "2026-04-15T10:00:00Z" }
});
let item = reshape(&ev("PullRequest", raw));
assert_eq!(item.icon, TimelineIcon::GitMerge);
let rendered: String = item
.title
.iter()
.map(|s| match s {
TitleSegment::Text { text } => text.clone(),
TitleSegment::Link { text, .. } => text.clone(),
})
.collect();
assert!(
rendered.contains("merged pull request #42 in grenade/moments"),
"got: {rendered}"
);
}
#[test]
fn commit_reshape_uses_short_sha_and_first_message_line() {
let raw = json!({
"sha": "a6fcefbe909a97ad5a049b9fa48bc74309af10d9",
"html_url": "https://github.com/faith1337z/Trade/commit/a6fcefbe909a97ad5a049b9fa48bc74309af10d9",
"commit": {
"message": "split multiline message into multiple irc messages\n\nbody body body"
},
"repository": { "full_name": "faith1337z/Trade" },
"author": { "login": "grenade" }
});
let item = reshape(&ev("Commit", raw));
assert_eq!(item.icon, TimelineIcon::GitCommit);
let rendered: String = item
.title
.iter()
.map(|s| match s {
TitleSegment::Text { text } => text.clone(),
TitleSegment::Link { text, .. } => text.clone(),
})
.collect();
assert!(rendered.contains("committed a6fcefb in faith1337z/Trade"), "got: {rendered}");
// body of the commit message is dropped; only first line in subtitle
assert_eq!(
item.subtitle.unwrap(),
vec![TitleSegment::text("split multiline message into multiple irc messages")]
);
}
#[test]
fn unknown_event_falls_back() {
let raw = json!({
"actor": { "login": "grenade" },
"repo": { "name": "grenade/x" },
"payload": {}
});
let item = reshape(&ev("SponsorshipEvent", raw));
assert_eq!(item.icon, TimelineIcon::Generic);
assert_eq!(item.action, "SponsorshipEvent");
}
}