feat(worker): add github events poller
Adds the first ingestion source. Page-1 polling is ETag-conditional
(304s don't count against rate limit); the very first run paginates
back through Link "next" pages up to a 10-page safety cap so the
table starts populated rather than waiting for new activity.
Hits /users/{user}/events/public — works without auth, returns the
right scope for a public timeline. Token (GITHUB_TOKEN) is optional;
when present it raises the rate limit from 60 to 5000/hr.
New plumbing:
moments-core::sources
- EventSource trait (poll() -> count)
- PollerStateStore trait (etag persistence port)
- run_poller driver: tokio interval + jittered exponential backoff
moments-data::github
- GithubSource impl, raw payload preserved as JSONB
- parse_link_next for pagination
- 4 unit tests covering parser + Link parsing
migration 0002_poller_state.sql
- one row per source: source, etag, last_modified, last_fetched
Worker binary spawns one tokio task per source (just github for now)
and aborts on SIGINT. Verified by smoke-curling the upstream endpoint:
ETag and Link headers are present; payload shape matches the parser.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,12 @@
|
||||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
use clap::Parser;
|
||||
use moments_data::PgStore;
|
||||
use moments_core::{EventSource, run_poller};
|
||||
use moments_data::{
|
||||
PgStore,
|
||||
github::{GithubConfig, GithubSource},
|
||||
};
|
||||
use reqwest::Client;
|
||||
use tracing::info;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
@@ -7,6 +14,17 @@ use tracing::info;
|
||||
struct Args {
|
||||
#[arg(long, env = "DATABASE_URL")]
|
||||
database_url: String,
|
||||
|
||||
#[arg(long, env = "GITHUB_USER", default_value = "grenade")]
|
||||
github_user: String,
|
||||
|
||||
/// Optional GitHub token. Higher rate limit and access to private events.
|
||||
#[arg(long, env = "GITHUB_TOKEN")]
|
||||
github_token: Option<String>,
|
||||
|
||||
/// Seconds between poll attempts per source.
|
||||
#[arg(long, env = "POLL_INTERVAL_SECS", default_value = "600")]
|
||||
interval_secs: u64,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
@@ -14,14 +32,36 @@ async fn main() -> anyhow::Result<()> {
|
||||
init_tracing();
|
||||
let args = Args::parse();
|
||||
|
||||
let store = PgStore::connect(&args.database_url).await?;
|
||||
let store = Arc::new(PgStore::connect(&args.database_url).await?);
|
||||
store.migrate().await?;
|
||||
|
||||
info!("worker started — pollers will land in step 2");
|
||||
let http = Client::builder()
|
||||
.timeout(Duration::from_secs(30))
|
||||
.build()?;
|
||||
|
||||
// Pollers (github, gitea, hg, bugzilla) land in subsequent steps.
|
||||
// For now this binary only verifies it can reach the database.
|
||||
let _ = store;
|
||||
let github = Arc::new(GithubSource::new(
|
||||
http.clone(),
|
||||
store.clone(),
|
||||
store.clone(),
|
||||
GithubConfig {
|
||||
user: args.github_user.clone(),
|
||||
token: args.github_token.clone(),
|
||||
per_page: 100,
|
||||
},
|
||||
)) as Arc<dyn EventSource>;
|
||||
|
||||
info!(
|
||||
github_user = args.github_user,
|
||||
interval_secs = args.interval_secs,
|
||||
"worker started"
|
||||
);
|
||||
|
||||
let interval = Duration::from_secs(args.interval_secs);
|
||||
let github_task = tokio::spawn(async move { run_poller(github, interval).await });
|
||||
|
||||
tokio::signal::ctrl_c().await?;
|
||||
info!("shutdown signal received");
|
||||
github_task.abort();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user