feat(worker): add github events poller
Adds the first ingestion source. Page-1 polling is ETag-conditional
(304s don't count against rate limit); the very first run paginates
back through Link "next" pages up to a 10-page safety cap so the
table starts populated rather than waiting for new activity.
Hits /users/{user}/events/public — works without auth, returns the
right scope for a public timeline. Token (GITHUB_TOKEN) is optional;
when present it raises the rate limit from 60 to 5000/hr.
New plumbing:
moments-core::sources
- EventSource trait (poll() -> count)
- PollerStateStore trait (etag persistence port)
- run_poller driver: tokio interval + jittered exponential backoff
moments-data::github
- GithubSource impl, raw payload preserved as JSONB
- parse_link_next for pagination
- 4 unit tests covering parser + Link parsing
migration 0002_poller_state.sql
- one row per source: source, etag, last_modified, last_fetched
Worker binary spawns one tokio task per source (just github for now)
and aborts on SIGINT. Verified by smoke-curling the upstream endpoint:
ETag and Link headers are present; payload shape matches the parser.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -12,4 +12,6 @@ serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
chrono.workspace = true
|
||||
thiserror.workspace = true
|
||||
async-trait = "0.1"
|
||||
async-trait.workspace = true
|
||||
tokio = { workspace = true, features = ["rt", "time"] }
|
||||
tracing.workspace = true
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
pub mod sources;
|
||||
|
||||
pub use sources::{EventSource, PollerState, PollerStateStore, SourceError, run_poller};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use moments_entities::{Event, EventQuery, SourceSummary};
|
||||
|
||||
|
||||
96
crates/moments-core/src/sources.rs
Normal file
96
crates/moments-core/src/sources.rs
Normal file
@@ -0,0 +1,96 @@
|
||||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use chrono::{DateTime, Utc};
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::StoreError;
|
||||
|
||||
/// A pollable upstream activity feed (github, gitea, hg, bugzilla).
|
||||
///
|
||||
/// Implementations are responsible for: fetching from upstream, persisting
|
||||
/// any incremental-fetch bookkeeping (etag, since-cursor), transforming
|
||||
/// raw payloads into [`moments_entities::Event`] rows, and writing them.
|
||||
/// `poll` returns the count of rows ingested on this tick (0 if nothing
|
||||
/// changed upstream).
|
||||
#[async_trait]
|
||||
pub trait EventSource: Send + Sync {
|
||||
fn name(&self) -> &'static str;
|
||||
async fn poll(&self) -> Result<usize, SourceError>;
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum SourceError {
|
||||
#[error("http: {0}")]
|
||||
Http(String),
|
||||
#[error("parse: {0}")]
|
||||
Parse(String),
|
||||
#[error("storage: {0}")]
|
||||
Storage(#[from] StoreError),
|
||||
}
|
||||
|
||||
/// Persisted per-source bookkeeping for incremental fetch (etag, last-modified).
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PollerState {
|
||||
pub source: String,
|
||||
pub etag: Option<String>,
|
||||
pub last_modified: Option<DateTime<Utc>>,
|
||||
pub last_fetched: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait PollerStateStore: Send + Sync {
|
||||
async fn load(&self, source: &str) -> Result<Option<PollerState>, StoreError>;
|
||||
async fn save(
|
||||
&self,
|
||||
source: &str,
|
||||
etag: Option<&str>,
|
||||
last_modified: Option<DateTime<Utc>>,
|
||||
) -> Result<(), StoreError>;
|
||||
async fn touch(&self, source: &str) -> Result<(), StoreError>;
|
||||
}
|
||||
|
||||
/// Drive a single source on a fixed interval until cancelled. Backs off
|
||||
/// (with jitter) on consecutive failures up to a 64-second ceiling.
|
||||
pub async fn run_poller(source: Arc<dyn EventSource>, interval: Duration) {
|
||||
let mut ticker = tokio::time::interval(interval);
|
||||
ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
|
||||
|
||||
let mut consecutive_failures: u32 = 0;
|
||||
loop {
|
||||
ticker.tick().await;
|
||||
match source.poll().await {
|
||||
Ok(0) => {
|
||||
consecutive_failures = 0;
|
||||
debug!(source = source.name(), "no new events");
|
||||
}
|
||||
Ok(count) => {
|
||||
consecutive_failures = 0;
|
||||
info!(source = source.name(), count, "ingested");
|
||||
}
|
||||
Err(e) => {
|
||||
consecutive_failures = consecutive_failures.saturating_add(1);
|
||||
let backoff = backoff_with_jitter(consecutive_failures);
|
||||
warn!(
|
||||
source = source.name(),
|
||||
error = %e,
|
||||
attempt = consecutive_failures,
|
||||
backoff_ms = backoff.as_millis() as u64,
|
||||
"poll failed; backing off"
|
||||
);
|
||||
tokio::time::sleep(backoff).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn backoff_with_jitter(attempt: u32) -> Duration {
|
||||
// 1s, 2s, 4s, 8s, 16s, 32s, 64s ... capped
|
||||
let base_ms: u64 = 1_000u64.saturating_mul(1u64 << attempt.min(6));
|
||||
// pseudo-random jitter from system time nanos — fine for backoff smoothing.
|
||||
let jitter_ms = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.map(|d| (d.subsec_nanos() % 1_000) as u64)
|
||||
.unwrap_or(0);
|
||||
Duration::from_millis(base_ms + jitter_ms)
|
||||
}
|
||||
Reference in New Issue
Block a user