feat(worker): add github search api source for historical backfill

The Events API is hard-capped at 90 days (15 events for grenade
right now). The Search API has its own 1000-result-per-query cap
but reaches the start of the user's GitHub history — for grenade,
430 issues/PRs going back to 2012-08-08.

  GET /search/issues?q=author:<user>&sort=created&order=desc

Polled hourly by default but defaults to 24h interval since this is
backfill, not a live feed. After the first run most upserts are
no-ops. Stored as Source::Github with action "Issue" or "PullRequest"
(distinguished by the .pull_request field on the search item),
keyed `github-issue:<owner>/<repo>#<n>`.

/search/commits is deliberately not used: GitHub matches the same
commit across every fork that contains it, so 275k of grenade's
"commits" are mostly duplicated fork hits in repos he never authored
to. If commit history becomes valuable we should enumerate his repos
and walk per-repo /commits?author= instead.

Visibility: search/issues items don't carry .private, so we lookup
/repos/{full_name} once per unique repo encountered (cached for the
duration of the poll). Failure to resolve is treated as private —
better to under-expose than over-expose on the public timeline.

Reshape: presentation/github.rs gains an Issue/PullRequest path that
extracts from the search item shape (html_url, number, title, state,
.pull_request.merged_at) rather than the events-API wrapper. Merged
PRs use the GitMerge icon, mirroring the events-API path.

Worker now spawns two tokio tasks (events + search), aborts both
on SIGINT. New env: SEARCH_POLL_INTERVAL_SECS (default 86400).

Tests: +2 in moments-data (URL parsing), +2 in moments-core
(search Issue + merged-PR reshape) — 14 total green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-03 18:49:06 +03:00
parent 3c0253519f
commit e4052c4c9a
4 changed files with 400 additions and 1 deletions

View File

@@ -4,6 +4,12 @@ use moments_entities::{
use serde_json::Value;
pub(crate) fn reshape(event: &Event) -> TimelineItem {
// Search-API items have a different payload shape (the search item itself
// rather than a wrapped event), so dispatch them through a separate path.
if matches!(event.action.as_str(), "Issue" | "PullRequest") {
return search_reshape(event);
}
let p = &event.payload;
let repo_name = p.get("repo").and_then(|r| r.get("name")).and_then(Value::as_str);
let actor_login = p
@@ -360,6 +366,74 @@ fn public(repo: Option<&str>) -> Reshaped {
(TimelineIcon::Generic, title, None, None)
}
fn search_reshape(event: &Event) -> TimelineItem {
let p = &event.payload;
let html_url = p.get("html_url").and_then(Value::as_str).unwrap_or("");
let number = p.get("number").and_then(Value::as_i64).unwrap_or(0);
let issue_title = p.get("title").and_then(Value::as_str).unwrap_or("");
let state = p.get("state").and_then(Value::as_str).unwrap_or("");
let pr_obj = p.get("pull_request");
let is_pr = pr_obj.is_some();
let merged = pr_obj
.and_then(|pr| pr.get("merged_at"))
.map(|v| !v.is_null())
.unwrap_or(false);
let user_login = p
.get("user")
.and_then(|u| u.get("login"))
.and_then(Value::as_str);
let repo = repo_from_url(html_url).unwrap_or_else(|| "(unknown repo)".into());
let verb = match (is_pr, state, merged) {
(true, "closed", true) => "merged",
(true, "closed", false) => "closed",
(true, _, _) => "opened",
(false, "closed", _) => "closed",
(false, _, _) => "opened",
};
let kind = if is_pr { "pull request" } else { "issue" };
let icon = match (is_pr, verb) {
(true, "merged") => TimelineIcon::GitMerge,
(true, _) => TimelineIcon::PullRequest,
(false, _) => TimelineIcon::Issue,
};
let mut title = Vec::new();
if let Some(actor) = user_login {
title.push(TitleSegment::link(
actor.to_string(),
format!("https://github.com/{actor}"),
));
title.push(TitleSegment::text(" "));
}
title.push(TitleSegment::text(format!("{verb} {kind} ")));
title.push(TitleSegment::link(format!("#{number}"), html_url.to_string()));
title.push(TitleSegment::text(" in "));
title.push(repo_link(&repo));
let subtitle = (!issue_title.is_empty()).then(|| vec![TitleSegment::text(issue_title.to_string())]);
TimelineItem {
id: event.id.clone(),
source: Source::Github,
action: event.action.clone(),
occurred_at: event.occurred_at,
icon,
title,
subtitle,
body: None,
}
}
fn repo_from_url(url: &str) -> Option<String> {
let stripped = url.strip_prefix("https://github.com/")?;
let mut parts = stripped.splitn(3, '/');
let owner = parts.next()?;
let repo = parts.next()?;
(!owner.is_empty() && !repo.is_empty()).then(|| format!("{owner}/{repo}"))
}
fn fallback(repo: Option<&str>, action: &str) -> Reshaped {
let title = match repo {
Some(r) => vec![
@@ -475,6 +549,57 @@ mod tests {
}
}
#[test]
fn search_issue_reshape_open() {
let raw = json!({
"number": 125,
"title": "Feature: peer blocklist",
"state": "open",
"html_url": "https://github.com/Nehliin/vortex/issues/125",
"user": { "login": "grenade" }
});
let item = reshape(&ev("Issue", raw));
assert_eq!(item.icon, TimelineIcon::Issue);
let rendered: String = item
.title
.iter()
.map(|s| match s {
TitleSegment::Text { text } => text.clone(),
TitleSegment::Link { text, .. } => text.clone(),
})
.collect();
assert!(
rendered.contains("opened issue #125 in Nehliin/vortex"),
"got: {rendered}"
);
}
#[test]
fn search_pr_reshape_merged_uses_merge_icon() {
let raw = json!({
"number": 42,
"title": "wire it up",
"state": "closed",
"html_url": "https://github.com/grenade/moments/pull/42",
"user": { "login": "grenade" },
"pull_request": { "merged_at": "2026-04-15T10:00:00Z" }
});
let item = reshape(&ev("PullRequest", raw));
assert_eq!(item.icon, TimelineIcon::GitMerge);
let rendered: String = item
.title
.iter()
.map(|s| match s {
TitleSegment::Text { text } => text.clone(),
TitleSegment::Link { text, .. } => text.clone(),
})
.collect();
assert!(
rendered.contains("merged pull request #42 in grenade/moments"),
"got: {rendered}"
);
}
#[test]
fn unknown_event_falls_back() {
let raw = json!({