feat(worker): add hg-edge and bugzilla pollers
Wires two historical sources for completeness with the 2019 timeline: - hg-edge.mozilla.org: scans json-pushes for a configured set of build/* repos and matches changeset author client-side, since the pushlog `user=` filter targets the pusher (sheriffs/reviewers in this case) rather than the author. Daily poll cadence — mozilla retired hg, no new events expected. - bugzilla.mozilla.org: queries /rest/bug?creator=<email>. Without an api key the unauthenticated endpoint only returns public bugs, which is what the public timeline wants anyway. Reshape renders "<author> committed <short_node> in <repo>" for hg and "filed bug #<id> in <product>" for bugzilla, both linking back to the canonical upstream URL via a stamped `_host` payload field. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -3,31 +3,18 @@
|
|||||||
//! Storage holds the upstream payload verbatim; transformation lives here so
|
//! Storage holds the upstream payload verbatim; transformation lives here so
|
||||||
//! the rendering can evolve without re-fetching upstream data.
|
//! the rendering can evolve without re-fetching upstream data.
|
||||||
|
|
||||||
use moments_entities::{Event, Source, TimelineIcon, TimelineItem, TitleSegment};
|
use moments_entities::{Event, Source, TimelineItem};
|
||||||
|
|
||||||
|
mod bugzilla;
|
||||||
mod gitea;
|
mod gitea;
|
||||||
mod github;
|
mod github;
|
||||||
|
mod hg;
|
||||||
|
|
||||||
pub fn reshape(event: &Event) -> TimelineItem {
|
pub fn reshape(event: &Event) -> TimelineItem {
|
||||||
match event.source {
|
match event.source {
|
||||||
Source::Github => github::reshape(event),
|
Source::Github => github::reshape(event),
|
||||||
Source::Gitea => gitea::reshape(event),
|
Source::Gitea => gitea::reshape(event),
|
||||||
Source::Hg | Source::Bugzilla => generic_fallback(event),
|
Source::Hg => hg::reshape(event),
|
||||||
}
|
Source::Bugzilla => bugzilla::reshape(event),
|
||||||
}
|
|
||||||
|
|
||||||
fn generic_fallback(event: &Event) -> TimelineItem {
|
|
||||||
TimelineItem {
|
|
||||||
id: event.id.clone(),
|
|
||||||
source: event.source,
|
|
||||||
action: event.action.clone(),
|
|
||||||
occurred_at: event.occurred_at,
|
|
||||||
icon: TimelineIcon::Generic,
|
|
||||||
title: vec![
|
|
||||||
TitleSegment::text(format!("{} from ", event.action)),
|
|
||||||
TitleSegment::text(event.source.as_str().to_string()),
|
|
||||||
],
|
|
||||||
subtitle: None,
|
|
||||||
body: None,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
79
crates/moments-core/src/presentation/bugzilla.rs
Normal file
79
crates/moments-core/src/presentation/bugzilla.rs
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
use moments_entities::{Event, Source, TimelineIcon, TimelineItem, TitleSegment};
|
||||||
|
use serde_json::Value;
|
||||||
|
|
||||||
|
const FALLBACK_HOST: &str = "bugzilla.mozilla.org";
|
||||||
|
|
||||||
|
pub(crate) fn reshape(event: &Event) -> TimelineItem {
|
||||||
|
let p = &event.payload;
|
||||||
|
let host = p
|
||||||
|
.get("_host")
|
||||||
|
.and_then(Value::as_str)
|
||||||
|
.unwrap_or(FALLBACK_HOST);
|
||||||
|
let id = p.get("id").and_then(Value::as_i64).unwrap_or(0);
|
||||||
|
let summary = p.get("summary").and_then(Value::as_str).unwrap_or("");
|
||||||
|
let product = p.get("product").and_then(Value::as_str);
|
||||||
|
|
||||||
|
let mut title = vec![
|
||||||
|
TitleSegment::text("filed bug "),
|
||||||
|
TitleSegment::link(
|
||||||
|
format!("#{id}"),
|
||||||
|
format!("https://{host}/show_bug.cgi?id={id}"),
|
||||||
|
),
|
||||||
|
];
|
||||||
|
if let Some(prod) = product {
|
||||||
|
title.push(TitleSegment::text(format!(" in {prod}")));
|
||||||
|
}
|
||||||
|
|
||||||
|
let subtitle = (!summary.is_empty()).then(|| vec![TitleSegment::text(summary.to_string())]);
|
||||||
|
|
||||||
|
TimelineItem {
|
||||||
|
id: event.id.clone(),
|
||||||
|
source: Source::Bugzilla,
|
||||||
|
action: event.action.clone(),
|
||||||
|
occurred_at: event.occurred_at,
|
||||||
|
icon: TimelineIcon::Bug,
|
||||||
|
title,
|
||||||
|
subtitle,
|
||||||
|
body: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use chrono::{TimeZone, Utc};
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn reshape_bug_create() {
|
||||||
|
let raw = json!({
|
||||||
|
"_host": "bugzilla.mozilla.org",
|
||||||
|
"id": 1158879,
|
||||||
|
"summary": "Commit Access (Level 1) for Rob Thijssen",
|
||||||
|
"product": "mozilla.org"
|
||||||
|
});
|
||||||
|
let event = Event {
|
||||||
|
id: "bugzilla:1158879".into(),
|
||||||
|
source: Source::Bugzilla,
|
||||||
|
action: "BugCreate".into(),
|
||||||
|
occurred_at: Utc.with_ymd_and_hms(2015, 4, 27, 16, 29, 59).unwrap(),
|
||||||
|
public: true,
|
||||||
|
payload: raw,
|
||||||
|
};
|
||||||
|
let item = reshape(&event);
|
||||||
|
assert_eq!(item.icon, TimelineIcon::Bug);
|
||||||
|
let r: String = item
|
||||||
|
.title
|
||||||
|
.iter()
|
||||||
|
.map(|s| match s {
|
||||||
|
TitleSegment::Text { text } => text.clone(),
|
||||||
|
TitleSegment::Link { text, .. } => text.clone(),
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
assert!(r.contains("filed bug #1158879 in mozilla.org"), "got: {r}");
|
||||||
|
assert_eq!(
|
||||||
|
item.subtitle.unwrap(),
|
||||||
|
vec![TitleSegment::text("Commit Access (Level 1) for Rob Thijssen")]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
126
crates/moments-core/src/presentation/hg.rs
Normal file
126
crates/moments-core/src/presentation/hg.rs
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
use moments_entities::{Event, Source, TimelineIcon, TimelineItem, TitleSegment};
|
||||||
|
use serde_json::Value;
|
||||||
|
|
||||||
|
const FALLBACK_HOST: &str = "hg-edge.mozilla.org";
|
||||||
|
|
||||||
|
pub(crate) fn reshape(event: &Event) -> TimelineItem {
|
||||||
|
let p = &event.payload;
|
||||||
|
let host = p
|
||||||
|
.get("_host")
|
||||||
|
.and_then(Value::as_str)
|
||||||
|
.unwrap_or(FALLBACK_HOST);
|
||||||
|
let repo = p
|
||||||
|
.get("_repo")
|
||||||
|
.and_then(Value::as_str)
|
||||||
|
.unwrap_or("(unknown repo)");
|
||||||
|
let node = p.get("node").and_then(Value::as_str).unwrap_or("");
|
||||||
|
let short_node: String = node.chars().take(12).collect();
|
||||||
|
let desc = p
|
||||||
|
.get("desc")
|
||||||
|
.and_then(Value::as_str)
|
||||||
|
.unwrap_or("")
|
||||||
|
.lines()
|
||||||
|
.next()
|
||||||
|
.unwrap_or("")
|
||||||
|
.to_string();
|
||||||
|
let author = p
|
||||||
|
.get("author")
|
||||||
|
.and_then(Value::as_str)
|
||||||
|
.map(author_name);
|
||||||
|
|
||||||
|
let mut title = Vec::new();
|
||||||
|
if let Some(name) = author {
|
||||||
|
title.push(TitleSegment::text(format!("{name} ")));
|
||||||
|
}
|
||||||
|
title.push(TitleSegment::text("committed "));
|
||||||
|
title.push(TitleSegment::link(
|
||||||
|
short_node,
|
||||||
|
format!("https://{host}/{repo}/rev/{node}"),
|
||||||
|
));
|
||||||
|
title.push(TitleSegment::text(" in "));
|
||||||
|
title.push(TitleSegment::link(
|
||||||
|
repo.to_string(),
|
||||||
|
format!("https://{host}/{repo}"),
|
||||||
|
));
|
||||||
|
|
||||||
|
let subtitle = (!desc.is_empty()).then(|| vec![TitleSegment::text(desc)]);
|
||||||
|
|
||||||
|
TimelineItem {
|
||||||
|
id: event.id.clone(),
|
||||||
|
source: Source::Hg,
|
||||||
|
action: event.action.clone(),
|
||||||
|
occurred_at: event.occurred_at,
|
||||||
|
icon: TimelineIcon::GitCommit,
|
||||||
|
title,
|
||||||
|
subtitle,
|
||||||
|
body: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Drop the `<email>` portion of an hg author string ("Name <email>") and
|
||||||
|
/// trim — leaves just the display name. If there's no email, return the
|
||||||
|
/// trimmed input.
|
||||||
|
fn author_name(s: &str) -> String {
|
||||||
|
if let Some(idx) = s.find('<') {
|
||||||
|
s[..idx].trim().to_string()
|
||||||
|
} else {
|
||||||
|
s.trim().to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use chrono::{TimeZone, Utc};
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
fn ev(payload: Value) -> Event {
|
||||||
|
Event {
|
||||||
|
id: "hg:build/puppet:abc".into(),
|
||||||
|
source: Source::Hg,
|
||||||
|
action: "Commit".into(),
|
||||||
|
occurred_at: Utc.with_ymd_and_hms(2018, 5, 1, 12, 0, 0).unwrap(),
|
||||||
|
public: true,
|
||||||
|
payload,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn render(item: &TimelineItem) -> String {
|
||||||
|
item.title
|
||||||
|
.iter()
|
||||||
|
.map(|s| match s {
|
||||||
|
TitleSegment::Text { text } => text.clone(),
|
||||||
|
TitleSegment::Link { text, .. } => text.clone(),
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn reshape_hg_commit() {
|
||||||
|
let raw = json!({
|
||||||
|
"_host": "hg-edge.mozilla.org",
|
||||||
|
"_repo": "build/puppet",
|
||||||
|
"node": "abcdef1234567890abcdef",
|
||||||
|
"desc": "Bug 1234 - fix something\n\nlonger body",
|
||||||
|
"author": "Rob Thijssen <rthijssen@mozilla.com>"
|
||||||
|
});
|
||||||
|
let item = reshape(&ev(raw));
|
||||||
|
assert_eq!(item.icon, TimelineIcon::GitCommit);
|
||||||
|
let r = render(&item);
|
||||||
|
assert!(
|
||||||
|
r.contains("Rob Thijssen committed abcdef123456 in build/puppet"),
|
||||||
|
"got: {r}"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
item.subtitle.unwrap(),
|
||||||
|
vec![TitleSegment::text("Bug 1234 - fix something")]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn drops_email_from_author() {
|
||||||
|
assert_eq!(author_name("Rob Thijssen <rob@example>"), "Rob Thijssen");
|
||||||
|
assert_eq!(author_name("nobody"), "nobody");
|
||||||
|
assert_eq!(author_name(" spaced "), "spaced");
|
||||||
|
}
|
||||||
|
}
|
||||||
176
crates/moments-data/src/bugzilla.rs
Normal file
176
crates/moments-data/src/bugzilla.rs
Normal file
@@ -0,0 +1,176 @@
|
|||||||
|
//! Bugzilla REST API ingestion.
|
||||||
|
//!
|
||||||
|
//! Hits `/rest/bug?creator=<email>` to pull bugs the user filed. Without
|
||||||
|
//! an api key, only public bugs are returned, which is what we want for
|
||||||
|
//! the public timeline anyway.
|
||||||
|
//!
|
||||||
|
//! No incremental story for v1 — each tick refetches the full list and
|
||||||
|
//! relies on idempotent upsert by `bugzilla:<id>`. Comments and bug
|
||||||
|
//! changes (status flips, assignment, etc.) aren't ingested for v1
|
||||||
|
//! because they require per-bug API calls; revisit if that history
|
||||||
|
//! becomes desirable.
|
||||||
|
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use chrono::{DateTime, Utc};
|
||||||
|
use moments_core::{EventSource, EventWriter, PollerStateStore, SourceError};
|
||||||
|
use moments_entities::{Event, Source};
|
||||||
|
use reqwest::{Client, header};
|
||||||
|
use serde_json::Value;
|
||||||
|
use tracing::debug;
|
||||||
|
|
||||||
|
const SOURCE_NAME: &str = "bugzilla";
|
||||||
|
const USER_AGENT: &str = concat!(
|
||||||
|
"moments/",
|
||||||
|
env!("CARGO_PKG_VERSION"),
|
||||||
|
" (+https://rob.tn)"
|
||||||
|
);
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct BugzillaConfig {
|
||||||
|
pub host: String,
|
||||||
|
pub creator_email: String,
|
||||||
|
pub api_key: Option<String>,
|
||||||
|
/// Bugs requested per page; bugzilla allows up to 1000.
|
||||||
|
pub limit: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for BugzillaConfig {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
host: "bugzilla.mozilla.org".into(),
|
||||||
|
creator_email: "rthijssen@mozilla.com".into(),
|
||||||
|
api_key: None,
|
||||||
|
limit: 500,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct BugzillaSource {
|
||||||
|
client: Client,
|
||||||
|
writer: Arc<dyn EventWriter>,
|
||||||
|
state: Arc<dyn PollerStateStore>,
|
||||||
|
config: BugzillaConfig,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BugzillaSource {
|
||||||
|
pub fn new(
|
||||||
|
client: Client,
|
||||||
|
writer: Arc<dyn EventWriter>,
|
||||||
|
state: Arc<dyn PollerStateStore>,
|
||||||
|
config: BugzillaConfig,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
client,
|
||||||
|
writer,
|
||||||
|
state,
|
||||||
|
config,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl EventSource for BugzillaSource {
|
||||||
|
fn name(&self) -> &'static str {
|
||||||
|
SOURCE_NAME
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn poll(&self) -> Result<usize, SourceError> {
|
||||||
|
let url = format!(
|
||||||
|
"https://{}/rest/bug?creator={}&limit={}\
|
||||||
|
&include_fields=id,summary,creation_time,last_change_time,status,resolution,product,component",
|
||||||
|
self.config.host, self.config.creator_email, self.config.limit
|
||||||
|
);
|
||||||
|
let mut req = self
|
||||||
|
.client
|
||||||
|
.get(&url)
|
||||||
|
.header(header::USER_AGENT, USER_AGENT)
|
||||||
|
.header(header::ACCEPT, "application/json");
|
||||||
|
if let Some(key) = &self.config.api_key {
|
||||||
|
req = req.header("X-BUGZILLA-API-KEY", key);
|
||||||
|
}
|
||||||
|
|
||||||
|
let resp = req
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(|e| SourceError::Http(e.to_string()))?;
|
||||||
|
if !resp.status().is_success() {
|
||||||
|
return Err(SourceError::Http(format!("{} GET {}", resp.status(), url)));
|
||||||
|
}
|
||||||
|
let body: Value = resp
|
||||||
|
.json()
|
||||||
|
.await
|
||||||
|
.map_err(|e| SourceError::Parse(e.to_string()))?;
|
||||||
|
|
||||||
|
let bugs = body
|
||||||
|
.get("bugs")
|
||||||
|
.and_then(Value::as_array)
|
||||||
|
.cloned()
|
||||||
|
.unwrap_or_default();
|
||||||
|
let events: Vec<Event> = bugs
|
||||||
|
.iter()
|
||||||
|
.filter_map(|b| parse_bug(b, &self.config.host))
|
||||||
|
.collect();
|
||||||
|
let n = self.writer.upsert_events(&events).await?;
|
||||||
|
self.state.touch(SOURCE_NAME).await?;
|
||||||
|
debug!(ingested = n, total = bugs.len(), "bugzilla poll complete");
|
||||||
|
Ok(n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_bug(bug: &Value, host: &str) -> Option<Event> {
|
||||||
|
let id = bug.get("id").and_then(Value::as_i64)?;
|
||||||
|
let creation_time = bug.get("creation_time").and_then(Value::as_str)?;
|
||||||
|
let occurred_at = DateTime::parse_from_rfc3339(creation_time)
|
||||||
|
.ok()?
|
||||||
|
.with_timezone(&Utc);
|
||||||
|
|
||||||
|
let mut payload = bug.clone();
|
||||||
|
if let Some(obj) = payload.as_object_mut() {
|
||||||
|
obj.insert("_host".into(), Value::String(host.into()));
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(Event {
|
||||||
|
id: format!("bugzilla:{id}"),
|
||||||
|
source: Source::Bugzilla,
|
||||||
|
action: "BugCreate".into(),
|
||||||
|
occurred_at,
|
||||||
|
// The unauth REST API only returns publicly visible bugs.
|
||||||
|
public: true,
|
||||||
|
payload,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parses_bug_with_creation_time() {
|
||||||
|
let raw = json!({
|
||||||
|
"id": 1158879,
|
||||||
|
"summary": "Commit Access (Level 1) for Rob Thijssen",
|
||||||
|
"creation_time": "2015-04-27T16:29:59Z",
|
||||||
|
"last_change_time": "2015-04-28T14:06:48Z",
|
||||||
|
"status": "RESOLVED",
|
||||||
|
"product": "mozilla.org"
|
||||||
|
});
|
||||||
|
let ev = parse_bug(&raw, "bugzilla.mozilla.org").expect("parses");
|
||||||
|
assert_eq!(ev.id, "bugzilla:1158879");
|
||||||
|
assert_eq!(ev.action, "BugCreate");
|
||||||
|
assert_eq!(ev.source, Source::Bugzilla);
|
||||||
|
assert!(ev.public);
|
||||||
|
assert_eq!(
|
||||||
|
ev.payload.get("_host").and_then(|v| v.as_str()),
|
||||||
|
Some("bugzilla.mozilla.org")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn rejects_bug_missing_id_or_creation_time() {
|
||||||
|
let raw = json!({ "summary": "x" });
|
||||||
|
assert!(parse_bug(&raw, "bugzilla.mozilla.org").is_none());
|
||||||
|
}
|
||||||
|
}
|
||||||
256
crates/moments-data/src/hg.rs
Normal file
256
crates/moments-data/src/hg.rs
Normal file
@@ -0,0 +1,256 @@
|
|||||||
|
//! hg-edge.mozilla.org pushlog ingestion.
|
||||||
|
//!
|
||||||
|
//! mozilla's hg pushlog filter `user=` matches the *pusher*, not the
|
||||||
|
//! changeset author. As a community-level contributor whose code was
|
||||||
|
//! reviewed and pushed by sheriffs/reviewers, the user filter returns 0
|
||||||
|
//! results. So this source pulls full pushlogs from a configured set of
|
||||||
|
//! repos and filters changeset authors client-side by substring match.
|
||||||
|
//!
|
||||||
|
//! The result set is historical (mozilla retired hg) — no new events
|
||||||
|
//! expected after the initial backfill. Daily polling keeps the upserts
|
||||||
|
//! cheap and idempotent.
|
||||||
|
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use chrono::{DateTime, TimeZone, Utc};
|
||||||
|
use moments_core::{EventSource, EventWriter, PollerStateStore, SourceError};
|
||||||
|
use moments_entities::{Event, Source};
|
||||||
|
use reqwest::{Client, header};
|
||||||
|
use serde_json::Value;
|
||||||
|
use tracing::{debug, warn};
|
||||||
|
|
||||||
|
const SOURCE_NAME: &str = "hg";
|
||||||
|
const USER_AGENT: &str = concat!(
|
||||||
|
"moments/",
|
||||||
|
env!("CARGO_PKG_VERSION"),
|
||||||
|
" (+https://rob.tn)"
|
||||||
|
);
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct HgConfig {
|
||||||
|
pub host: String,
|
||||||
|
/// Case-insensitive substrings matched against changeset author fields.
|
||||||
|
pub author_terms: Vec<String>,
|
||||||
|
/// Repo paths under host, e.g. "build/puppet".
|
||||||
|
pub repos: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for HgConfig {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
host: "hg-edge.mozilla.org".into(),
|
||||||
|
author_terms: vec!["thijssen".into(), "grenade".into()],
|
||||||
|
repos: vec![
|
||||||
|
"build/puppet".into(),
|
||||||
|
"build/tools".into(),
|
||||||
|
"build/buildbot-configs".into(),
|
||||||
|
],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct HgSource {
|
||||||
|
client: Client,
|
||||||
|
writer: Arc<dyn EventWriter>,
|
||||||
|
state: Arc<dyn PollerStateStore>,
|
||||||
|
config: HgConfig,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl HgSource {
|
||||||
|
pub fn new(
|
||||||
|
client: Client,
|
||||||
|
writer: Arc<dyn EventWriter>,
|
||||||
|
state: Arc<dyn PollerStateStore>,
|
||||||
|
config: HgConfig,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
client,
|
||||||
|
writer,
|
||||||
|
state,
|
||||||
|
config,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn pushlog_url(&self, repo: &str) -> String {
|
||||||
|
format!(
|
||||||
|
"https://{}/{}/json-pushes?version=2&full=1",
|
||||||
|
self.config.host, repo
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn matches_author(&self, author: &str) -> bool {
|
||||||
|
let lower = author.to_lowercase();
|
||||||
|
self.config
|
||||||
|
.author_terms
|
||||||
|
.iter()
|
||||||
|
.any(|t| lower.contains(&t.to_lowercase()))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn scan_repo(&self, repo: &str) -> Result<usize, SourceError> {
|
||||||
|
let url = self.pushlog_url(repo);
|
||||||
|
let resp = self
|
||||||
|
.client
|
||||||
|
.get(&url)
|
||||||
|
.header(header::USER_AGENT, USER_AGENT)
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(|e| SourceError::Http(e.to_string()))?;
|
||||||
|
if !resp.status().is_success() {
|
||||||
|
return Err(SourceError::Http(format!("{} GET {}", resp.status(), url)));
|
||||||
|
}
|
||||||
|
let body: Value = resp
|
||||||
|
.json()
|
||||||
|
.await
|
||||||
|
.map_err(|e| SourceError::Parse(e.to_string()))?;
|
||||||
|
|
||||||
|
let mut events = Vec::new();
|
||||||
|
if let Some(pushes) = body.get("pushes").and_then(Value::as_object) {
|
||||||
|
for (pushid, push) in pushes {
|
||||||
|
let pushed_at_secs = push.get("date").and_then(Value::as_i64).unwrap_or(0);
|
||||||
|
let pushed_at = Utc
|
||||||
|
.timestamp_opt(pushed_at_secs, 0)
|
||||||
|
.single()
|
||||||
|
.unwrap_or_else(Utc::now);
|
||||||
|
let pusher = push
|
||||||
|
.get("user")
|
||||||
|
.and_then(Value::as_str)
|
||||||
|
.unwrap_or("")
|
||||||
|
.to_string();
|
||||||
|
if let Some(changesets) = push.get("changesets").and_then(Value::as_array) {
|
||||||
|
for cs in changesets {
|
||||||
|
let author = cs.get("author").and_then(Value::as_str).unwrap_or("");
|
||||||
|
if !self.matches_author(author) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let node = cs.get("node").and_then(Value::as_str).unwrap_or("");
|
||||||
|
if node.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let occurred_at = cs
|
||||||
|
.get("date")
|
||||||
|
.and_then(Value::as_array)
|
||||||
|
.and_then(|a| parse_hg_date(a))
|
||||||
|
.unwrap_or(pushed_at);
|
||||||
|
|
||||||
|
let mut payload = cs.clone();
|
||||||
|
if let Some(obj) = payload.as_object_mut() {
|
||||||
|
obj.insert("_repo".into(), Value::String(repo.into()));
|
||||||
|
obj.insert(
|
||||||
|
"_host".into(),
|
||||||
|
Value::String(self.config.host.clone()),
|
||||||
|
);
|
||||||
|
obj.insert("_pusher".into(), Value::String(pusher.clone()));
|
||||||
|
obj.insert(
|
||||||
|
"_pushid".into(),
|
||||||
|
Value::String(pushid.clone()),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
events.push(Event {
|
||||||
|
id: format!("hg:{repo}:{node}"),
|
||||||
|
source: Source::Hg,
|
||||||
|
action: "Commit".into(),
|
||||||
|
occurred_at,
|
||||||
|
// mozilla hg-edge is exclusively public.
|
||||||
|
public: true,
|
||||||
|
payload,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(self.writer.upsert_events(&events).await?)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl EventSource for HgSource {
|
||||||
|
fn name(&self) -> &'static str {
|
||||||
|
SOURCE_NAME
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn poll(&self) -> Result<usize, SourceError> {
|
||||||
|
let mut total = 0usize;
|
||||||
|
for repo in &self.config.repos {
|
||||||
|
match self.scan_repo(repo).await {
|
||||||
|
Ok(n) => {
|
||||||
|
debug!(repo, ingested = n, "hg repo scan complete");
|
||||||
|
total += n;
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
warn!(repo, error = %e, "hg repo scan failed; continuing");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.state.touch(SOURCE_NAME).await?;
|
||||||
|
Ok(total)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a pushlog date array `[seconds, tz_offset_secs]` into UTC.
|
||||||
|
fn parse_hg_date(arr: &[Value]) -> Option<DateTime<Utc>> {
|
||||||
|
let secs = arr.first()?.as_f64()? as i64;
|
||||||
|
Utc.timestamp_opt(secs, 0).single()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn matches_author_substring_case_insensitive() {
|
||||||
|
let s = HgSource {
|
||||||
|
client: Client::new(),
|
||||||
|
writer: Arc::new(NoopWriter),
|
||||||
|
state: Arc::new(NoopState),
|
||||||
|
config: HgConfig {
|
||||||
|
author_terms: vec!["thijssen".into(), "grenade".into()],
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
};
|
||||||
|
assert!(s.matches_author("Rob Thijssen <rob@example.com>"));
|
||||||
|
assert!(s.matches_author("grenade@example"));
|
||||||
|
assert!(s.matches_author("THIJSSEN"));
|
||||||
|
assert!(!s.matches_author("Other Person <other@example>"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_hg_date_handles_seconds() {
|
||||||
|
let arr = vec![Value::from(1_700_000_000_f64), Value::from(0_f64)];
|
||||||
|
let dt = parse_hg_date(&arr).expect("parses");
|
||||||
|
assert_eq!(dt.timestamp(), 1_700_000_000);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tiny stub impls just so we can construct an HgSource for unit tests.
|
||||||
|
struct NoopWriter;
|
||||||
|
#[async_trait]
|
||||||
|
impl EventWriter for NoopWriter {
|
||||||
|
async fn upsert_events(
|
||||||
|
&self,
|
||||||
|
_events: &[Event],
|
||||||
|
) -> Result<usize, moments_core::StoreError> {
|
||||||
|
Ok(0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
struct NoopState;
|
||||||
|
#[async_trait]
|
||||||
|
impl PollerStateStore for NoopState {
|
||||||
|
async fn load(
|
||||||
|
&self,
|
||||||
|
_source: &str,
|
||||||
|
) -> Result<Option<moments_core::PollerState>, moments_core::StoreError> {
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
async fn save(
|
||||||
|
&self,
|
||||||
|
_source: &str,
|
||||||
|
_etag: Option<&str>,
|
||||||
|
_last_modified: Option<DateTime<Utc>>,
|
||||||
|
) -> Result<(), moments_core::StoreError> {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
async fn touch(&self, _source: &str) -> Result<(), moments_core::StoreError> {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,6 +1,8 @@
|
|||||||
|
pub mod bugzilla;
|
||||||
pub mod gitea;
|
pub mod gitea;
|
||||||
pub mod github;
|
pub mod github;
|
||||||
pub mod github_search;
|
pub mod github_search;
|
||||||
|
pub mod hg;
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
|
|||||||
@@ -4,9 +4,11 @@ use clap::Parser;
|
|||||||
use moments_core::{EventSource, run_poller};
|
use moments_core::{EventSource, run_poller};
|
||||||
use moments_data::{
|
use moments_data::{
|
||||||
PgStore,
|
PgStore,
|
||||||
|
bugzilla::{BugzillaConfig, BugzillaSource},
|
||||||
gitea::{GiteaConfig, GiteaSource},
|
gitea::{GiteaConfig, GiteaSource},
|
||||||
github::{GithubConfig, GithubSource},
|
github::{GithubConfig, GithubSource},
|
||||||
github_search::{GithubSearchConfig, GithubSearchSource},
|
github_search::{GithubSearchConfig, GithubSearchSource},
|
||||||
|
hg::{HgConfig, HgSource},
|
||||||
};
|
};
|
||||||
use reqwest::Client;
|
use reqwest::Client;
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
@@ -45,6 +47,45 @@ struct Args {
|
|||||||
/// Seconds between Gitea activity-feed polls.
|
/// Seconds between Gitea activity-feed polls.
|
||||||
#[arg(long, env = "GITEA_POLL_INTERVAL_SECS", default_value = "600")]
|
#[arg(long, env = "GITEA_POLL_INTERVAL_SECS", default_value = "600")]
|
||||||
gitea_interval_secs: u64,
|
gitea_interval_secs: u64,
|
||||||
|
|
||||||
|
#[arg(long, env = "HG_HOST", default_value = "hg-edge.mozilla.org")]
|
||||||
|
hg_host: String,
|
||||||
|
|
||||||
|
/// Comma-separated mozilla hg repo paths to scan, e.g. "build/puppet,build/tools".
|
||||||
|
#[arg(
|
||||||
|
long,
|
||||||
|
env = "HG_REPOS",
|
||||||
|
value_delimiter = ',',
|
||||||
|
default_value = "build/puppet,build/tools,build/buildbot-configs"
|
||||||
|
)]
|
||||||
|
hg_repos: Vec<String>,
|
||||||
|
|
||||||
|
/// Comma-separated case-insensitive substrings matched against changeset author fields.
|
||||||
|
#[arg(
|
||||||
|
long,
|
||||||
|
env = "HG_AUTHOR_TERMS",
|
||||||
|
value_delimiter = ',',
|
||||||
|
default_value = "thijssen,grenade"
|
||||||
|
)]
|
||||||
|
hg_author_terms: Vec<String>,
|
||||||
|
|
||||||
|
/// Seconds between hg pushlog scans (defaults to 24h — historical data).
|
||||||
|
#[arg(long, env = "HG_POLL_INTERVAL_SECS", default_value = "86400")]
|
||||||
|
hg_interval_secs: u64,
|
||||||
|
|
||||||
|
#[arg(long, env = "BUGZILLA_HOST", default_value = "bugzilla.mozilla.org")]
|
||||||
|
bugzilla_host: String,
|
||||||
|
|
||||||
|
#[arg(long, env = "BUGZILLA_EMAIL", default_value = "rthijssen@mozilla.com")]
|
||||||
|
bugzilla_email: String,
|
||||||
|
|
||||||
|
/// Optional bugzilla API key. Without one, only public bugs are returned.
|
||||||
|
#[arg(long, env = "BUGZILLA_API_KEY")]
|
||||||
|
bugzilla_api_key: Option<String>,
|
||||||
|
|
||||||
|
/// Seconds between bugzilla creator-query polls (defaults to 24h).
|
||||||
|
#[arg(long, env = "BUGZILLA_POLL_INTERVAL_SECS", default_value = "86400")]
|
||||||
|
bugzilla_interval_secs: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
@@ -93,30 +134,66 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
},
|
},
|
||||||
)) as Arc<dyn EventSource>;
|
)) as Arc<dyn EventSource>;
|
||||||
|
|
||||||
|
let hg = Arc::new(HgSource::new(
|
||||||
|
http.clone(),
|
||||||
|
store.clone(),
|
||||||
|
store.clone(),
|
||||||
|
HgConfig {
|
||||||
|
host: args.hg_host.clone(),
|
||||||
|
author_terms: args.hg_author_terms.clone(),
|
||||||
|
repos: args.hg_repos.clone(),
|
||||||
|
},
|
||||||
|
)) as Arc<dyn EventSource>;
|
||||||
|
|
||||||
|
let bugzilla = Arc::new(BugzillaSource::new(
|
||||||
|
http.clone(),
|
||||||
|
store.clone(),
|
||||||
|
store.clone(),
|
||||||
|
BugzillaConfig {
|
||||||
|
host: args.bugzilla_host.clone(),
|
||||||
|
creator_email: args.bugzilla_email.clone(),
|
||||||
|
api_key: args.bugzilla_api_key.clone(),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
)) as Arc<dyn EventSource>;
|
||||||
|
|
||||||
info!(
|
info!(
|
||||||
github_user = args.github_user,
|
github_user = args.github_user,
|
||||||
gitea_host = args.gitea_host,
|
gitea_host = args.gitea_host,
|
||||||
gitea_user = args.gitea_user,
|
gitea_user = args.gitea_user,
|
||||||
|
hg_host = args.hg_host,
|
||||||
|
hg_repos = ?args.hg_repos,
|
||||||
|
bugzilla_host = args.bugzilla_host,
|
||||||
|
bugzilla_email = args.bugzilla_email,
|
||||||
events_interval_secs = args.interval_secs,
|
events_interval_secs = args.interval_secs,
|
||||||
search_interval_secs = args.search_interval_secs,
|
search_interval_secs = args.search_interval_secs,
|
||||||
gitea_interval_secs = args.gitea_interval_secs,
|
gitea_interval_secs = args.gitea_interval_secs,
|
||||||
|
hg_interval_secs = args.hg_interval_secs,
|
||||||
|
bugzilla_interval_secs = args.bugzilla_interval_secs,
|
||||||
"worker started"
|
"worker started"
|
||||||
);
|
);
|
||||||
|
|
||||||
let interval = Duration::from_secs(args.interval_secs);
|
let interval = Duration::from_secs(args.interval_secs);
|
||||||
let search_interval = Duration::from_secs(args.search_interval_secs);
|
let search_interval = Duration::from_secs(args.search_interval_secs);
|
||||||
let gitea_interval = Duration::from_secs(args.gitea_interval_secs);
|
let gitea_interval = Duration::from_secs(args.gitea_interval_secs);
|
||||||
|
let hg_interval = Duration::from_secs(args.hg_interval_secs);
|
||||||
|
let bugzilla_interval = Duration::from_secs(args.bugzilla_interval_secs);
|
||||||
|
|
||||||
let github_task = tokio::spawn(async move { run_poller(github, interval).await });
|
let github_task = tokio::spawn(async move { run_poller(github, interval).await });
|
||||||
let github_search_task =
|
let github_search_task =
|
||||||
tokio::spawn(async move { run_poller(github_search, search_interval).await });
|
tokio::spawn(async move { run_poller(github_search, search_interval).await });
|
||||||
let gitea_task = tokio::spawn(async move { run_poller(gitea, gitea_interval).await });
|
let gitea_task = tokio::spawn(async move { run_poller(gitea, gitea_interval).await });
|
||||||
|
let hg_task = tokio::spawn(async move { run_poller(hg, hg_interval).await });
|
||||||
|
let bugzilla_task =
|
||||||
|
tokio::spawn(async move { run_poller(bugzilla, bugzilla_interval).await });
|
||||||
|
|
||||||
tokio::signal::ctrl_c().await?;
|
tokio::signal::ctrl_c().await?;
|
||||||
info!("shutdown signal received");
|
info!("shutdown signal received");
|
||||||
github_task.abort();
|
github_task.abort();
|
||||||
github_search_task.abort();
|
github_search_task.abort();
|
||||||
gitea_task.abort();
|
gitea_task.abort();
|
||||||
|
hg_task.abort();
|
||||||
|
bugzilla_task.abort();
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user