feat(worker): add hg-edge and bugzilla pollers
Wires two historical sources for completeness with the 2019 timeline: - hg-edge.mozilla.org: scans json-pushes for a configured set of build/* repos and matches changeset author client-side, since the pushlog `user=` filter targets the pusher (sheriffs/reviewers in this case) rather than the author. Daily poll cadence — mozilla retired hg, no new events expected. - bugzilla.mozilla.org: queries /rest/bug?creator=<email>. Without an api key the unauthenticated endpoint only returns public bugs, which is what the public timeline wants anyway. Reshape renders "<author> committed <short_node> in <repo>" for hg and "filed bug #<id> in <product>" for bugzilla, both linking back to the canonical upstream URL via a stamped `_host` payload field. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
256
crates/moments-data/src/hg.rs
Normal file
256
crates/moments-data/src/hg.rs
Normal file
@@ -0,0 +1,256 @@
|
||||
//! hg-edge.mozilla.org pushlog ingestion.
|
||||
//!
|
||||
//! mozilla's hg pushlog filter `user=` matches the *pusher*, not the
|
||||
//! changeset author. As a community-level contributor whose code was
|
||||
//! reviewed and pushed by sheriffs/reviewers, the user filter returns 0
|
||||
//! results. So this source pulls full pushlogs from a configured set of
|
||||
//! repos and filters changeset authors client-side by substring match.
|
||||
//!
|
||||
//! The result set is historical (mozilla retired hg) — no new events
|
||||
//! expected after the initial backfill. Daily polling keeps the upserts
|
||||
//! cheap and idempotent.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use chrono::{DateTime, TimeZone, Utc};
|
||||
use moments_core::{EventSource, EventWriter, PollerStateStore, SourceError};
|
||||
use moments_entities::{Event, Source};
|
||||
use reqwest::{Client, header};
|
||||
use serde_json::Value;
|
||||
use tracing::{debug, warn};
|
||||
|
||||
const SOURCE_NAME: &str = "hg";
|
||||
const USER_AGENT: &str = concat!(
|
||||
"moments/",
|
||||
env!("CARGO_PKG_VERSION"),
|
||||
" (+https://rob.tn)"
|
||||
);
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct HgConfig {
|
||||
pub host: String,
|
||||
/// Case-insensitive substrings matched against changeset author fields.
|
||||
pub author_terms: Vec<String>,
|
||||
/// Repo paths under host, e.g. "build/puppet".
|
||||
pub repos: Vec<String>,
|
||||
}
|
||||
|
||||
impl Default for HgConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
host: "hg-edge.mozilla.org".into(),
|
||||
author_terms: vec!["thijssen".into(), "grenade".into()],
|
||||
repos: vec![
|
||||
"build/puppet".into(),
|
||||
"build/tools".into(),
|
||||
"build/buildbot-configs".into(),
|
||||
],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct HgSource {
|
||||
client: Client,
|
||||
writer: Arc<dyn EventWriter>,
|
||||
state: Arc<dyn PollerStateStore>,
|
||||
config: HgConfig,
|
||||
}
|
||||
|
||||
impl HgSource {
|
||||
pub fn new(
|
||||
client: Client,
|
||||
writer: Arc<dyn EventWriter>,
|
||||
state: Arc<dyn PollerStateStore>,
|
||||
config: HgConfig,
|
||||
) -> Self {
|
||||
Self {
|
||||
client,
|
||||
writer,
|
||||
state,
|
||||
config,
|
||||
}
|
||||
}
|
||||
|
||||
fn pushlog_url(&self, repo: &str) -> String {
|
||||
format!(
|
||||
"https://{}/{}/json-pushes?version=2&full=1",
|
||||
self.config.host, repo
|
||||
)
|
||||
}
|
||||
|
||||
fn matches_author(&self, author: &str) -> bool {
|
||||
let lower = author.to_lowercase();
|
||||
self.config
|
||||
.author_terms
|
||||
.iter()
|
||||
.any(|t| lower.contains(&t.to_lowercase()))
|
||||
}
|
||||
|
||||
async fn scan_repo(&self, repo: &str) -> Result<usize, SourceError> {
|
||||
let url = self.pushlog_url(repo);
|
||||
let resp = self
|
||||
.client
|
||||
.get(&url)
|
||||
.header(header::USER_AGENT, USER_AGENT)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| SourceError::Http(e.to_string()))?;
|
||||
if !resp.status().is_success() {
|
||||
return Err(SourceError::Http(format!("{} GET {}", resp.status(), url)));
|
||||
}
|
||||
let body: Value = resp
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| SourceError::Parse(e.to_string()))?;
|
||||
|
||||
let mut events = Vec::new();
|
||||
if let Some(pushes) = body.get("pushes").and_then(Value::as_object) {
|
||||
for (pushid, push) in pushes {
|
||||
let pushed_at_secs = push.get("date").and_then(Value::as_i64).unwrap_or(0);
|
||||
let pushed_at = Utc
|
||||
.timestamp_opt(pushed_at_secs, 0)
|
||||
.single()
|
||||
.unwrap_or_else(Utc::now);
|
||||
let pusher = push
|
||||
.get("user")
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
if let Some(changesets) = push.get("changesets").and_then(Value::as_array) {
|
||||
for cs in changesets {
|
||||
let author = cs.get("author").and_then(Value::as_str).unwrap_or("");
|
||||
if !self.matches_author(author) {
|
||||
continue;
|
||||
}
|
||||
let node = cs.get("node").and_then(Value::as_str).unwrap_or("");
|
||||
if node.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let occurred_at = cs
|
||||
.get("date")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|a| parse_hg_date(a))
|
||||
.unwrap_or(pushed_at);
|
||||
|
||||
let mut payload = cs.clone();
|
||||
if let Some(obj) = payload.as_object_mut() {
|
||||
obj.insert("_repo".into(), Value::String(repo.into()));
|
||||
obj.insert(
|
||||
"_host".into(),
|
||||
Value::String(self.config.host.clone()),
|
||||
);
|
||||
obj.insert("_pusher".into(), Value::String(pusher.clone()));
|
||||
obj.insert(
|
||||
"_pushid".into(),
|
||||
Value::String(pushid.clone()),
|
||||
);
|
||||
}
|
||||
events.push(Event {
|
||||
id: format!("hg:{repo}:{node}"),
|
||||
source: Source::Hg,
|
||||
action: "Commit".into(),
|
||||
occurred_at,
|
||||
// mozilla hg-edge is exclusively public.
|
||||
public: true,
|
||||
payload,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(self.writer.upsert_events(&events).await?)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl EventSource for HgSource {
|
||||
fn name(&self) -> &'static str {
|
||||
SOURCE_NAME
|
||||
}
|
||||
|
||||
async fn poll(&self) -> Result<usize, SourceError> {
|
||||
let mut total = 0usize;
|
||||
for repo in &self.config.repos {
|
||||
match self.scan_repo(repo).await {
|
||||
Ok(n) => {
|
||||
debug!(repo, ingested = n, "hg repo scan complete");
|
||||
total += n;
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(repo, error = %e, "hg repo scan failed; continuing");
|
||||
}
|
||||
}
|
||||
}
|
||||
self.state.touch(SOURCE_NAME).await?;
|
||||
Ok(total)
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a pushlog date array `[seconds, tz_offset_secs]` into UTC.
|
||||
fn parse_hg_date(arr: &[Value]) -> Option<DateTime<Utc>> {
|
||||
let secs = arr.first()?.as_f64()? as i64;
|
||||
Utc.timestamp_opt(secs, 0).single()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn matches_author_substring_case_insensitive() {
|
||||
let s = HgSource {
|
||||
client: Client::new(),
|
||||
writer: Arc::new(NoopWriter),
|
||||
state: Arc::new(NoopState),
|
||||
config: HgConfig {
|
||||
author_terms: vec!["thijssen".into(), "grenade".into()],
|
||||
..Default::default()
|
||||
},
|
||||
};
|
||||
assert!(s.matches_author("Rob Thijssen <rob@example.com>"));
|
||||
assert!(s.matches_author("grenade@example"));
|
||||
assert!(s.matches_author("THIJSSEN"));
|
||||
assert!(!s.matches_author("Other Person <other@example>"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_hg_date_handles_seconds() {
|
||||
let arr = vec![Value::from(1_700_000_000_f64), Value::from(0_f64)];
|
||||
let dt = parse_hg_date(&arr).expect("parses");
|
||||
assert_eq!(dt.timestamp(), 1_700_000_000);
|
||||
}
|
||||
|
||||
// Tiny stub impls just so we can construct an HgSource for unit tests.
|
||||
struct NoopWriter;
|
||||
#[async_trait]
|
||||
impl EventWriter for NoopWriter {
|
||||
async fn upsert_events(
|
||||
&self,
|
||||
_events: &[Event],
|
||||
) -> Result<usize, moments_core::StoreError> {
|
||||
Ok(0)
|
||||
}
|
||||
}
|
||||
struct NoopState;
|
||||
#[async_trait]
|
||||
impl PollerStateStore for NoopState {
|
||||
async fn load(
|
||||
&self,
|
||||
_source: &str,
|
||||
) -> Result<Option<moments_core::PollerState>, moments_core::StoreError> {
|
||||
Ok(None)
|
||||
}
|
||||
async fn save(
|
||||
&self,
|
||||
_source: &str,
|
||||
_etag: Option<&str>,
|
||||
_last_modified: Option<DateTime<Utc>>,
|
||||
) -> Result<(), moments_core::StoreError> {
|
||||
Ok(())
|
||||
}
|
||||
async fn touch(&self, _source: &str) -> Result<(), moments_core::StoreError> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user