Initial commit: rbv workspace with ingest, API, UI, and ML client

Rust workspace with crates for entity types, hashing, database access,
ML client (immich-ml compatible), ingest pipeline, clustering, auth,
search, CLI, and axum API server. Vite/React UI. SQL migrations.
Includes retry/backoff on transient ML API connection errors.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-22 16:51:50 +02:00
commit a27d636b88
114 changed files with 11434 additions and 0 deletions

23
.gitignore vendored Normal file
View File

@@ -0,0 +1,23 @@
# Rust build artifacts
/target/
# Environment / secrets
.env
.env.local
.env.*.local
# TLS certificates and private keys
*.pem
*.key
*.crt
*.p12
*.pfx
# UI production build
/dist/
# Editor
.idea/
.vscode/
*.swp
*~

3823
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

79
Cargo.toml Normal file
View File

@@ -0,0 +1,79 @@
[workspace]
resolver = "2"
members = [
"crates/rbv-entity",
"crates/rbv-hash",
"crates/rbv-data",
"crates/rbv-ml",
"crates/rbv-cluster",
"crates/rbv-ingest",
"crates/rbv-auth",
"crates/rbv-search",
"crates/rbv-cli",
"crates/rbv-api",
]
[workspace.package]
edition = "2024"
version = "0.1.0"
license = "MIT"
[workspace.dependencies]
# Serialization
serde = { version = "1", features = ["derive"] }
serde_json = "1"
# Async runtime
tokio = { version = "1", features = ["full"] }
# HTTP client (for ML API)
reqwest = { version = "0.12", default-features = false, features = ["multipart", "json", "rustls-tls"] }
# Web framework
axum = { version = "0.8", features = ["macros"] }
tower = "0.5"
tower-http = { version = "0.6", features = ["fs", "cors", "trace"] }
# TLS
rustls = { version = "0.23", features = ["ring"] }
rustls-pemfile = "2"
tokio-rustls = "0.26"
x509-parser = "0.16"
# Database
sqlx = { version = "0.8", features = ["runtime-tokio-rustls", "postgres", "uuid", "chrono", "migrate"] }
pgvector = { version = "0.4", features = ["sqlx"] }
# Hashing
blake3 = "1"
argon2 = "0.5"
# CLI
clap = { version = "4", features = ["derive"] }
# Logging / tracing
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
# Utilities
uuid = { version = "1", features = ["v4", "serde"] }
chrono = { version = "0.4", features = ["serde"] }
thiserror = "2"
anyhow = "1"
glob = "0.3"
rand = "0.8"
hyper-util = { version = "0.1", features = ["tokio", "server", "server-auto", "http1", "http2"] }
axum-server = { version = "0.7", features = ["tls-rustls"] }
# Image
image = { version = "0.25", default-features = false, features = ["jpeg", "png", "webp", "tiff", "gif", "bmp"] }
# Internal crates
rbv-entity = { path = "crates/rbv-entity" }
rbv-hash = { path = "crates/rbv-hash" }
rbv-data = { path = "crates/rbv-data" }
rbv-ml = { path = "crates/rbv-ml" }
rbv-cluster = { path = "crates/rbv-cluster" }
rbv-ingest = { path = "crates/rbv-ingest" }
rbv-auth = { path = "crates/rbv-auth" }
rbv-search = { path = "crates/rbv-search" }

View File

@@ -0,0 +1,20 @@
[Unit]
Description=rbv postgresql (vectorchord)
After=network-online.target
Wants=network-online.target
[Container]
Image=ghcr.io/immich-app/postgres:14-vectorchord0.4.3-pgvectors0.2.0
ContainerName=rbv-postgres
EnvironmentFile=/tank/containers/rbv/.env
Volume=/tank/containers/rbv/db:/var/lib/postgresql/data:Z
PublishPort=4432:5432
ShmSize=128m
AutoUpdate=registry
[Service]
Restart=always
TimeoutStartSec=120
[Install]
WantedBy=multi-user.target default.target

15
asset/systemd/rbv.service Normal file
View File

@@ -0,0 +1,15 @@
[Unit]
Description=rbv-api
After=network.target
[Service]
ExecStart=/usr/local/bin/rbv-api \
--ca-cert /etc/pki/ca-trust/source/anchors/root-internal.pem \
--server-cert /etc/pki/tls/misc/%H.pem \
--server-key /etc/pki/tls/private/%H.pem \
--database postgres://rbv:password@localhost:4432/rbv \
--ml-uri http://127.0.0.1:3003
Restart=always
[Install]
WantedBy=multi-user.target

38
crates/rbv-api/Cargo.toml Normal file
View File

@@ -0,0 +1,38 @@
[package]
name = "rbv-api"
version.workspace = true
edition.workspace = true
license.workspace = true
[[bin]]
name = "rbv-api"
path = "src/main.rs"
[dependencies]
rbv-entity = { workspace = true }
rbv-hash = { workspace = true }
rbv-data = { workspace = true }
rbv-ml = { workspace = true }
rbv-auth = { workspace = true }
rbv-search = { workspace = true }
sqlx = { workspace = true }
axum = { workspace = true }
tower = { workspace = true }
tower-http = { workspace = true }
tokio = { workspace = true }
tokio-rustls = { workspace = true }
rustls = { workspace = true }
rustls-pemfile = { workspace = true }
x509-parser = { workspace = true }
clap = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
uuid = { workspace = true }
chrono = { workspace = true }
anyhow = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }
tracing-subscriber = { workspace = true }
image = { workspace = true }
hyper-util = { workspace = true }
axum-server = { workspace = true }

View File

@@ -0,0 +1,42 @@
use std::path::PathBuf;
use clap::Parser;
#[derive(Parser)]
#[command(name = "rbv-api", about = "RBV HTTPS API server")]
pub struct ApiArgs {
/// Path to the CA certificate PEM file
#[arg(long)]
pub ca_cert: PathBuf,
/// Path to the server certificate PEM file
#[arg(long)]
pub server_cert: PathBuf,
/// Path to the server private key PEM file
#[arg(long)]
pub server_key: PathBuf,
/// Allowed client CNs. If unset, any valid client cert is accepted.
#[arg(long)]
pub client_cn: Option<Vec<String>>,
/// PostgreSQL connection string
#[arg(long)]
pub database: String,
/// Base URL of the machine learning API
#[arg(long)]
pub ml_uri: String,
/// Address to listen on
#[arg(long, default_value = "0.0.0.0:8443")]
pub listen: String,
/// Path to the built UI static files directory
#[arg(long)]
pub ui_dir: Option<PathBuf>,
/// Directory for caching face crop thumbnails. Created automatically if it doesn't exist.
#[arg(long)]
pub face_cache: Option<PathBuf>,
}

View File

@@ -0,0 +1,55 @@
use axum::http::StatusCode;
use axum::response::{IntoResponse, Response};
use axum::Json;
use serde_json::json;
#[derive(Debug)]
pub struct ApiError {
pub status: StatusCode,
pub code: &'static str,
pub message: String,
}
impl ApiError {
pub fn not_found(msg: impl Into<String>) -> Self {
Self { status: StatusCode::NOT_FOUND, code: "not_found", message: msg.into() }
}
pub fn bad_request(msg: impl Into<String>) -> Self {
Self { status: StatusCode::BAD_REQUEST, code: "bad_request", message: msg.into() }
}
pub fn unauthorized(code: &'static str, msg: impl Into<String>) -> Self {
Self { status: StatusCode::UNAUTHORIZED, code, message: msg.into() }
}
#[allow(dead_code)]
pub fn forbidden(code: &'static str, msg: impl Into<String>) -> Self {
Self { status: StatusCode::FORBIDDEN, code, message: msg.into() }
}
pub fn internal(msg: impl Into<String>) -> Self {
Self { status: StatusCode::INTERNAL_SERVER_ERROR, code: "internal_error", message: msg.into() }
}
}
impl IntoResponse for ApiError {
fn into_response(self) -> Response {
let body = json!({ "error": self.code, "message": self.message });
(self.status, Json(body)).into_response()
}
}
impl From<anyhow::Error> for ApiError {
fn from(e: anyhow::Error) -> Self {
Self::internal(e.to_string())
}
}
impl From<sqlx::Error> for ApiError {
fn from(e: sqlx::Error) -> Self {
Self::internal(e.to_string())
}
}
pub type ApiResult<T> = Result<T, ApiError>;

View File

@@ -0,0 +1,26 @@
mod args;
mod middleware;
mod server;
mod tls;
mod state;
mod error;
mod routes;
use anyhow::Result;
use clap::Parser;
use args::ApiArgs;
#[tokio::main]
async fn main() -> Result<()> {
rustls::crypto::ring::default_provider()
.install_default()
.expect("failed to install rustls ring crypto provider");
tracing_subscriber::fmt()
.with_env_filter(
tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")),
)
.init();
let args = ApiArgs::parse();
server::run(args).await
}

View File

@@ -0,0 +1,49 @@
use axum::{
extract::{Request, State},
middleware::Next,
response::Response,
};
use crate::{error::ApiError, state::AppState};
/// Validated session identity, inserted into request extensions by `require_session`.
#[derive(Clone)]
pub struct AuthUser {
pub id: uuid::Uuid,
pub username: String,
}
/// Axum middleware: reads the `session` cookie, validates it against the DB,
/// and inserts `AuthUser` into request extensions. Returns 401 on failure.
pub async fn require_session(
State(state): State<AppState>,
mut req: Request,
next: Next,
) -> Result<Response, ApiError> {
let token = extract_session_cookie(req.headers())
.ok_or_else(|| ApiError::unauthorized("no_session", "not authenticated"))?;
let session = rbv_data::user::validate_session(&state.pool, &token)
.await
.map_err(|e| ApiError::internal(e.to_string()))?
.ok_or_else(|| ApiError::unauthorized("invalid_session", "session expired or invalid"))?;
let user = rbv_data::user::get_user_by_id(&state.pool, session.user_id)
.await
.map_err(|e| ApiError::internal(e.to_string()))?
.ok_or_else(|| ApiError::unauthorized("invalid_session", "user not found"))?;
req.extensions_mut().insert(AuthUser { id: user.id, username: user.username });
Ok(next.run(req).await)
}
/// Parse `session=<token>` out of the Cookie request header.
pub fn extract_session_cookie(headers: &axum::http::HeaderMap) -> Option<String> {
let cookie_str = headers.get("cookie")?.to_str().ok()?;
for part in cookie_str.split(';') {
let part = part.trim();
if let Some(val) = part.strip_prefix("session=") {
return Some(val.to_string());
}
}
None
}

View File

@@ -0,0 +1,125 @@
use axum::{
extract::State,
http::{header::SET_COOKIE, HeaderMap, StatusCode},
routing::{get, post},
Extension, Json, Router,
};
use chrono::Utc;
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use rbv_entity::{Session, User};
use rbv_auth::{hash_password, verify_password, generate_session_token};
use crate::{
error::{ApiError, ApiResult},
middleware::{extract_session_cookie, AuthUser},
state::AppState,
};
const SESSION_TTL_DAYS: i64 = 7;
/// Public routes: login, register, logout — no session required.
pub fn router() -> Router<AppState> {
Router::new()
.route("/register", post(register))
.route("/login", post(login))
.route("/logout", post(logout))
}
/// Protected route: /me — requires a valid session (injected by middleware).
pub fn me_router() -> Router<AppState> {
Router::new()
.route("/me", get(me))
}
#[derive(Deserialize)]
pub struct AuthBody {
pub username: String,
pub password: String,
}
#[derive(Serialize)]
pub struct UserResponse {
pub id: String,
pub username: String,
}
async fn register(
State(state): State<AppState>,
Json(body): Json<AuthBody>,
) -> ApiResult<(StatusCode, HeaderMap, Json<UserResponse>)> {
if body.username.len() < 3 {
return Err(ApiError::bad_request("username too short"));
}
if body.password.len() < 8 {
return Err(ApiError::bad_request("password must be at least 8 characters"));
}
if rbv_data::user::get_user_by_username(&state.pool, &body.username).await?.is_some() {
return Err(ApiError::bad_request("username already taken"));
}
let password_hash = hash_password(&body.password)
.map_err(|e| ApiError::internal(e.to_string()))?;
let user = User {
id: Uuid::new_v4(),
username: body.username.clone(),
password_hash,
created_at: Utc::now(),
};
rbv_data::user::create_user(&state.pool, &user).await?;
let (token, headers) = issue_session_cookie(&state, user.id).await?;
drop(token);
Ok((StatusCode::CREATED, headers, Json(UserResponse { id: user.id.to_string(), username: user.username })))
}
async fn login(
State(state): State<AppState>,
Json(body): Json<AuthBody>,
) -> ApiResult<(HeaderMap, Json<UserResponse>)> {
let user = rbv_data::user::get_user_by_username(&state.pool, &body.username)
.await?
.ok_or_else(|| ApiError::unauthorized("invalid_credentials", "invalid username or password"))?;
verify_password(&body.password, &user.password_hash)
.map_err(|_| ApiError::unauthorized("invalid_credentials", "invalid username or password"))?;
let (_token, headers) = issue_session_cookie(&state, user.id).await?;
Ok((headers, Json(UserResponse { id: user.id.to_string(), username: user.username })))
}
async fn logout(
State(state): State<AppState>,
req_headers: HeaderMap,
) -> ApiResult<(StatusCode, HeaderMap)> {
if let Some(token) = extract_session_cookie(&req_headers) {
rbv_data::user::delete_session(&state.pool, &token).await?;
}
let mut headers = HeaderMap::new();
headers.insert(SET_COOKIE, "session=; HttpOnly; SameSite=Strict; Path=/; Max-Age=0".parse().unwrap());
Ok((StatusCode::NO_CONTENT, headers))
}
async fn me(Extension(auth): Extension<AuthUser>) -> Json<UserResponse> {
Json(UserResponse { id: auth.id.to_string(), username: auth.username })
}
async fn issue_session_cookie(state: &AppState, user_id: Uuid) -> ApiResult<(String, HeaderMap)> {
let token = generate_session_token();
let session = Session {
token: token.clone(),
user_id,
created_at: Utc::now(),
expires_at: Utc::now() + chrono::Duration::days(SESSION_TTL_DAYS),
};
rbv_data::user::create_session(&state.pool, &session).await?;
let cookie = format!(
"session={}; HttpOnly; SameSite=Strict; Path=/; Max-Age={}",
token,
SESSION_TTL_DAYS * 86400,
);
let mut headers = HeaderMap::new();
headers.insert(SET_COOKIE, cookie.parse().unwrap());
Ok((token, headers))
}

View File

@@ -0,0 +1,86 @@
use axum::{body::Body, extract::{Path, State}, http::{header, StatusCode}, response::Response, routing::get, Router};
use sqlx::Row;
use crate::{error::{ApiError, ApiResult}, state::AppState};
pub fn router() -> Router<AppState> {
Router::new()
.route("/{id}/crop", get(serve_face_crop))
}
async fn serve_face_crop(
State(state): State<AppState>,
Path(id): Path<String>,
) -> ApiResult<Response> {
let bytes = rbv_hash::from_hex(&id).map_err(|_| ApiError::bad_request("invalid face id"))?;
let fid = rbv_entity::FaceId(bytes);
// Serve from disk cache if available.
if let Some(cache_dir) = &state.face_cache {
let cache_path = cache_dir.join(format!("{id}.jpg"));
if let Ok(data) = tokio::fs::read(&cache_path).await {
return Ok(jpeg_response(data));
}
}
let row = sqlx::query(
"SELECT image_id, x1, y1, x2, y2 FROM face_detections WHERE id = $1",
)
.bind(fid.as_bytes())
.fetch_optional(&state.pool)
.await
.map_err(|e| ApiError::internal(e.to_string()))?
.ok_or_else(|| ApiError::not_found("face not found"))?;
let iid_bytes: Vec<u8> = row.get("image_id");
let iid = rbv_entity::ImageId(iid_bytes.try_into().expect("32-byte id"));
let (gallery_path, filename) = rbv_data::image::find_image_gallery_path(&state.pool, &iid)
.await?
.ok_or_else(|| ApiError::not_found("source image not found"))?;
let file_path = std::path::PathBuf::from(&gallery_path).join(&filename);
let raw = tokio::fs::read(&file_path)
.await
.map_err(|_| ApiError::not_found("image file not found on disk"))?;
let x1: i32 = row.get("x1");
let y1: i32 = row.get("y1");
let x2: i32 = row.get("x2");
let y2: i32 = row.get("y2");
let img = image::load_from_memory(&raw)
.map_err(|e| ApiError::internal(format!("image decode: {e}")))?;
let x = x1.max(0) as u32;
let y = y1.max(0) as u32;
let w = (x2 - x1).max(1) as u32;
let h = (y2 - y1).max(1) as u32;
let cropped = img.crop_imm(x, y, w, h);
let resized = cropped.resize(256, 256, image::imageops::FilterType::Lanczos3);
let mut out_bytes: Vec<u8> = Vec::new();
resized
.write_to(&mut std::io::Cursor::new(&mut out_bytes), image::ImageFormat::Jpeg)
.map_err(|e| ApiError::internal(format!("encode: {e}")))?;
// Write to disk cache asynchronously — don't fail the request if this errors.
if let Some(cache_dir) = &state.face_cache {
let cache_path = cache_dir.join(format!("{id}.jpg"));
let data = out_bytes.clone();
tokio::spawn(async move {
let _ = tokio::fs::write(&cache_path, &data).await;
});
}
Ok(jpeg_response(out_bytes))
}
fn jpeg_response(data: Vec<u8>) -> Response {
Response::builder()
.status(StatusCode::OK)
.header(header::CONTENT_TYPE, "image/jpeg")
.header(header::CACHE_CONTROL, "public, max-age=31536000, immutable")
.body(Body::from(data))
.unwrap()
}

View File

@@ -0,0 +1,103 @@
use axum::{extract::{Path, Query, State}, routing::get, Json, Router};
use serde::{Deserialize, Serialize};
use rbv_entity::Gallery;
use crate::{error::ApiResult, state::AppState};
pub fn router() -> Router<AppState> {
Router::new()
.route("/", get(list_galleries))
.route("/random", get(random_galleries))
.route("/{id}", get(get_gallery))
.route("/{id}/images", get(get_gallery_images))
}
#[derive(Deserialize)]
pub struct Pagination {
#[serde(default = "default_page")]
pub page: i64,
#[serde(default = "default_per_page")]
pub per_page: i64,
}
fn default_page() -> i64 { 1 }
fn default_per_page() -> i64 { 24 }
#[derive(Serialize)]
pub struct GalleryResponse {
pub id: String,
pub source_id: u64,
pub collection: String,
pub source_name: String,
pub source_url: String,
pub subjects: Vec<String>,
pub tags: Vec<String>,
pub path: String,
}
impl From<Gallery> for GalleryResponse {
fn from(g: Gallery) -> Self {
Self {
id: g.id.to_hex(),
source_id: g.source_id,
collection: g.collection,
source_name: g.source_name,
source_url: g.source_url,
subjects: g.subjects,
tags: g.tags,
path: g.path.to_string_lossy().into_owned(),
}
}
}
async fn list_galleries(
State(state): State<AppState>,
Query(q): Query<Pagination>,
) -> ApiResult<Json<Vec<GalleryResponse>>> {
let galleries = rbv_data::gallery::list_galleries(&state.pool, q.page, q.per_page).await?;
Ok(Json(galleries.into_iter().map(Into::into).collect()))
}
#[derive(Deserialize)]
pub struct RandomQuery {
#[serde(default = "default_random_count")]
pub count: i64,
}
fn default_random_count() -> i64 { 20 }
async fn random_galleries(
State(state): State<AppState>,
Query(q): Query<RandomQuery>,
) -> ApiResult<Json<Vec<GalleryResponse>>> {
let galleries = rbv_data::gallery::random_galleries(&state.pool, q.count).await?;
Ok(Json(galleries.into_iter().map(Into::into).collect()))
}
async fn get_gallery(
State(state): State<AppState>,
Path(id): Path<String>,
) -> ApiResult<Json<GalleryResponse>> {
let bytes = rbv_hash::from_hex(&id)
.map_err(|_| crate::error::ApiError::bad_request("invalid gallery id"))?;
let gid = rbv_entity::GalleryId(bytes);
let gallery = rbv_data::gallery::get_gallery(&state.pool, &gid)
.await?
.ok_or_else(|| crate::error::ApiError::not_found("gallery not found"))?;
Ok(Json(gallery.into()))
}
async fn get_gallery_images(
State(state): State<AppState>,
Path(id): Path<String>,
) -> ApiResult<Json<Vec<serde_json::Value>>> {
let bytes = rbv_hash::from_hex(&id)
.map_err(|_| crate::error::ApiError::bad_request("invalid gallery id"))?;
let gid = rbv_entity::GalleryId(bytes);
let images = rbv_data::image::list_gallery_images(&state.pool, &gid).await?;
let out: Vec<_> = images.into_iter().map(|(gi, img)| serde_json::json!({
"image_id": img.id.to_hex(),
"filename": gi.filename,
"ordering": gi.ordering,
"width": img.width,
"height": img.height,
})).collect();
Ok(Json(out))
}

View File

@@ -0,0 +1,80 @@
use axum::{body::Body, extract::{Path, State}, http::{header, StatusCode}, response::Response, routing::get, Router};
use crate::{error::{ApiError, ApiResult}, state::AppState};
pub fn router() -> Router<AppState> {
Router::new()
.route("/{id}", get(get_image_meta))
.route("/{id}/file", get(serve_image_file))
.route("/{id}/thumbnail", get(serve_thumbnail))
}
async fn get_image_meta(
State(state): State<AppState>,
Path(id): Path<String>,
) -> ApiResult<axum::Json<serde_json::Value>> {
let bytes = rbv_hash::from_hex(&id).map_err(|_| ApiError::bad_request("invalid image id"))?;
let iid = rbv_entity::ImageId(bytes);
let image = rbv_data::image::get_image(&state.pool, &iid)
.await?
.ok_or_else(|| ApiError::not_found("image not found"))?;
Ok(axum::Json(serde_json::json!({
"id": image.id.to_hex(),
"width": image.width,
"height": image.height,
})))
}
async fn serve_image_file(
State(state): State<AppState>,
Path(id): Path<String>,
) -> ApiResult<Response> {
serve_file(&state, &id, false).await
}
async fn serve_thumbnail(
State(state): State<AppState>,
Path(id): Path<String>,
) -> ApiResult<Response> {
serve_file(&state, &id, true).await
}
async fn serve_file(state: &AppState, id: &str, thumbnail: bool) -> ApiResult<Response> {
let bytes = rbv_hash::from_hex(id).map_err(|_| ApiError::bad_request("invalid image id"))?;
let iid = rbv_entity::ImageId(bytes);
let (gallery_path, filename) = rbv_data::image::find_image_gallery_path(&state.pool, &iid)
.await?
.ok_or_else(|| ApiError::not_found("image not found"))?;
let file_path = if thumbnail {
std::path::PathBuf::from(&gallery_path).join("tn").join(&filename)
} else {
std::path::PathBuf::from(&gallery_path).join(&filename)
};
let data = tokio::fs::read(&file_path)
.await
.map_err(|_| ApiError::not_found("file not found on disk"))?;
let mime = mime_from_extension(&filename);
Ok(Response::builder()
.status(StatusCode::OK)
.header(header::CONTENT_TYPE, mime)
.body(Body::from(data))
.unwrap())
}
fn mime_from_extension(filename: &str) -> &'static str {
match std::path::Path::new(filename)
.extension()
.and_then(|e| e.to_str())
.map(str::to_lowercase)
.as_deref()
{
Some("jpg" | "jpeg") => "image/jpeg",
Some("png") => "image/png",
Some("gif") => "image/gif",
Some("webp") => "image/webp",
_ => "application/octet-stream",
}
}

View File

@@ -0,0 +1,26 @@
pub mod gallery;
pub mod image;
pub mod person;
pub mod face;
pub mod search;
pub mod auth;
use axum::Router;
use crate::state::AppState;
/// Routes that require a valid session cookie (protected by `require_session` middleware).
pub fn protected_routes() -> Router<AppState> {
Router::new()
.nest("/api/galleries", gallery::router())
.nest("/api/images", image::router())
.nest("/api/persons", person::router())
.nest("/api/faces", face::router())
.nest("/api/search", search::router())
.nest("/api/auth", auth::me_router())
}
/// Routes that are always public (login, register, logout).
pub fn public_routes() -> Router<AppState> {
Router::new()
.nest("/api/auth", auth::router())
}

View File

@@ -0,0 +1,157 @@
use axum::{extract::{Path, Query, State}, routing::{get, post, put}, Json, Router};
use serde::{Deserialize, Serialize};
use rbv_entity::{PersonId, Person};
use crate::{error::{ApiError, ApiResult}, routes::gallery::GalleryResponse, state::AppState};
pub fn router() -> Router<AppState> {
Router::new()
.route("/", get(list_persons))
.route("/merge", post(merge_persons))
.route("/{id}", get(get_person))
.route("/{id}/name", put(set_name))
.route("/{id}/alias", post(add_alias))
.route("/{id}/faces", get(get_person_faces))
.route("/{id}/galleries", get(get_person_galleries))
}
#[derive(Deserialize)]
pub struct Pagination {
#[serde(default = "d_page")] pub page: i64,
#[serde(default = "d_per")] pub per_page: i64,
}
fn d_page() -> i64 { 1 }
fn d_per() -> i64 { 24 }
#[derive(Serialize)]
pub struct PersonResponse {
pub id: String,
pub primary_name: Option<String>,
pub names: Vec<String>,
pub created_at: String,
}
async fn person_response(pool: &sqlx::PgPool, person: &Person) -> ApiResult<PersonResponse> {
let name_rows = rbv_data::person::get_person_names(pool, &person.id).await?;
let primary = name_rows.iter().find(|n| n.is_primary).map(|n| n.name.clone());
let names: Vec<_> = name_rows.into_iter().map(|n| n.name).collect();
Ok(PersonResponse {
id: person.id.to_string(),
primary_name: primary,
names,
created_at: person.created_at.to_rfc3339(),
})
}
async fn list_persons(
State(state): State<AppState>,
Query(q): Query<Pagination>,
) -> ApiResult<Json<Vec<PersonResponse>>> {
let persons = rbv_data::person::get_all_persons_paged(&state.pool, q.page, q.per_page).await?;
let mut out = Vec::with_capacity(persons.len());
for p in &persons {
out.push(person_response(&state.pool, p).await?);
}
Ok(Json(out))
}
async fn get_person(
State(state): State<AppState>,
Path(id): Path<String>,
) -> ApiResult<Json<PersonResponse>> {
let pid = parse_person_id(&id)?;
let person = rbv_data::person::get_person(&state.pool, &pid)
.await?
.ok_or_else(|| ApiError::not_found("person not found"))?;
Ok(Json(person_response(&state.pool, &person).await?))
}
#[derive(Deserialize)]
pub struct SetNameBody {
pub name: String,
pub is_primary: Option<bool>,
}
async fn set_name(
State(state): State<AppState>,
Path(id): Path<String>,
Json(body): Json<SetNameBody>,
) -> ApiResult<axum::http::StatusCode> {
validate_person_name(&body.name)?;
let pid = parse_person_id(&id)?;
rbv_data::person::set_person_name(&state.pool, &pid, &body.name, body.is_primary.unwrap_or(true)).await?;
Ok(axum::http::StatusCode::NO_CONTENT)
}
async fn add_alias(
State(state): State<AppState>,
Path(id): Path<String>,
Json(body): Json<SetNameBody>,
) -> ApiResult<axum::http::StatusCode> {
validate_person_name(&body.name)?;
let pid = parse_person_id(&id)?;
rbv_data::person::set_person_name(&state.pool, &pid, &body.name, false).await?;
Ok(axum::http::StatusCode::NO_CONTENT)
}
#[derive(Deserialize)]
pub struct MergeBody {
/// The person to keep (all faces will be assigned to this person)
pub target: String,
/// The person to merge in (will be deleted)
pub source: String,
}
async fn merge_persons(
State(state): State<AppState>,
Json(body): Json<MergeBody>,
) -> ApiResult<axum::http::StatusCode> {
let target = parse_person_id(&body.target)?;
let source = parse_person_id(&body.source)?;
rbv_data::face::merge_persons(&state.pool, &target, &source).await?;
Ok(axum::http::StatusCode::NO_CONTENT)
}
async fn get_person_faces(
State(state): State<AppState>,
Path(id): Path<String>,
Query(q): Query<Pagination>,
) -> ApiResult<Json<Vec<serde_json::Value>>> {
let pid = parse_person_id(&id)?;
let faces = rbv_data::face::get_faces_for_person(&state.pool, &pid, q.page, q.per_page).await?;
let out: Vec<_> = faces.into_iter().map(|f| serde_json::json!({
"id": f.id.to_hex(),
"image_id": f.image_id.to_hex(),
"bounding_box": { "x1": f.bounding_box.x1, "y1": f.bounding_box.y1, "x2": f.bounding_box.x2, "y2": f.bounding_box.y2 },
"score": f.score,
})).collect();
Ok(Json(out))
}
async fn get_person_galleries(
State(state): State<AppState>,
Path(id): Path<String>,
Query(q): Query<Pagination>,
) -> ApiResult<Json<Vec<GalleryResponse>>> {
let pid = parse_person_id(&id)?;
let galleries = rbv_search::face::get_galleries_for_person(&state.pool, &pid, q.page, q.per_page).await?;
Ok(Json(galleries.into_iter().map(GalleryResponse::from).collect()))
}
fn parse_person_id(s: &str) -> ApiResult<PersonId> {
uuid::Uuid::parse_str(s)
.map(PersonId)
.map_err(|_| ApiError::bad_request("invalid person id"))
}
fn validate_person_name(name: &str) -> ApiResult<()> {
if name.len() < 2 {
return Err(ApiError::bad_request("name too short"));
}
if !name.chars().all(|c| c.is_ascii_lowercase() || c == '-') {
return Err(ApiError::bad_request("name must be lowercase alpha and hyphens only"));
}
if name.starts_with('-') || name.ends_with('-') {
return Err(ApiError::bad_request("name must not start or end with a hyphen"));
}
Ok(())
}

View File

@@ -0,0 +1,52 @@
use axum::{extract::State, routing::post, Json, Router};
use serde::{Deserialize, Serialize};
use rbv_entity::PersonId;
use crate::{error::ApiResult, routes::gallery::GalleryResponse, state::AppState};
pub fn router() -> Router<AppState> {
Router::new().route("/", post(search))
}
#[derive(Deserialize)]
pub struct SearchRequest {
pub text: Option<String>,
pub person_ids: Option<Vec<String>>,
#[serde(default = "default_limit")]
pub limit: i64,
}
fn default_limit() -> i64 { 24 }
#[derive(Serialize)]
pub struct SearchHit {
pub gallery_id: String,
pub gallery: GalleryResponse,
pub score: f64,
}
async fn search(
State(state): State<AppState>,
Json(req): Json<SearchRequest>,
) -> ApiResult<Json<Vec<SearchHit>>> {
let person_ids: Option<Vec<PersonId>> = req.person_ids
.map(|ids| ids.iter().filter_map(|s| uuid::Uuid::parse_str(s).ok().map(PersonId)).collect());
let results = rbv_search::combined::search_combined(
&state.pool,
&state.ml,
req.text.as_deref(),
person_ids.as_deref(),
req.limit,
)
.await?;
let hits = results.into_iter().map(|r| {
let gallery_id = r.gallery.id.to_hex();
SearchHit {
gallery_id,
gallery: r.gallery.into(),
score: r.score,
}
}).collect();
Ok(Json(hits))
}

View File

@@ -0,0 +1,64 @@
use std::sync::Arc;
use anyhow::Result;
use axum::Router;
use tower_http::services::ServeDir;
use tracing::{info, warn};
use rbv_ml::MlClient;
use crate::{args::ApiArgs, middleware, routes, state::AppState, tls::build_rustls_config};
pub async fn run(args: ApiArgs) -> Result<()> {
let pool = rbv_data::connect(&args.database).await?;
rbv_data::run_migrations(&pool).await?;
let ml = Arc::new(MlClient::new(&args.ml_uri));
let allowed_cns = args.client_cn.clone();
// Ensure face cache directory exists if configured.
if let Some(dir) = &args.face_cache {
tokio::fs::create_dir_all(dir).await?;
}
let state = AppState {
pool: pool.clone(),
ml,
allowed_cns,
face_cache: args.face_cache.clone(),
};
// Background task: purge expired sessions once per hour.
tokio::spawn(async move {
let mut interval = tokio::time::interval(std::time::Duration::from_secs(3600));
loop {
interval.tick().await;
match rbv_data::user::purge_expired_sessions(&pool).await {
Ok(n) if n > 0 => info!("Purged {n} expired sessions"),
Err(e) => warn!("Session cleanup failed: {e}"),
_ => {}
}
}
});
let auth_mw = axum::middleware::from_fn_with_state(state.clone(), middleware::require_session);
let mut router: Router = routes::protected_routes()
.route_layer(auth_mw)
.merge(routes::public_routes())
.with_state(state);
if let Some(ui_dir) = &args.ui_dir {
router = router.nest_service(
"/",
ServeDir::new(ui_dir).append_index_html_on_directories(true),
);
}
let rustls_config = build_rustls_config(&args).await?;
let addr: std::net::SocketAddr = args.listen.parse()?;
info!("Listening on https://{addr}");
axum_server::bind_rustls(addr, rustls_config)
.serve(router.into_make_service())
.await?;
Ok(())
}

View File

@@ -0,0 +1,12 @@
use std::{path::PathBuf, sync::Arc};
use rbv_ml::MlClient;
use sqlx::PgPool;
#[derive(Clone)]
#[allow(dead_code)]
pub struct AppState {
pub pool: PgPool,
pub ml: Arc<MlClient>,
pub allowed_cns: Option<Vec<String>>,
pub face_cache: Option<PathBuf>,
}

View File

@@ -0,0 +1,8 @@
use anyhow::Result;
use axum_server::tls_rustls::RustlsConfig;
use crate::args::ApiArgs;
pub async fn build_rustls_config(args: &ApiArgs) -> Result<RustlsConfig> {
let config = RustlsConfig::from_pem_file(&args.server_cert, &args.server_key).await?;
Ok(config)
}

View File

@@ -0,0 +1,16 @@
[package]
name = "rbv-auth"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
rbv-entity = { workspace = true }
argon2 = { workspace = true }
rustls = { workspace = true }
rustls-pemfile = { workspace = true }
x509-parser = { workspace = true }
rand = { workspace = true }
anyhow = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }

View File

@@ -0,0 +1,7 @@
pub mod mtls;
pub mod password;
pub mod session;
pub use mtls::{build_tls_config, validate_client_cert, ClientIdentity, MtlsConfig, MtlsError};
pub use password::{hash_password, verify_password};
pub use session::generate_session_token;

105
crates/rbv-auth/src/mtls.rs Normal file
View File

@@ -0,0 +1,105 @@
use std::path::PathBuf;
use std::sync::Arc;
use std::io::BufReader;
use thiserror::Error;
use rustls::pki_types::{CertificateDer, PrivateKeyDer};
use rustls::ServerConfig;
use rustls_pemfile::{certs, private_key};
use x509_parser::prelude::*;
pub struct MtlsConfig {
pub ca_cert_path: PathBuf,
pub server_cert_path: PathBuf,
pub server_key_path: PathBuf,
pub allowed_client_cns: Option<Vec<String>>,
}
#[derive(Debug, Clone)]
pub struct ClientIdentity {
pub common_name: String,
}
#[derive(Debug, Error)]
pub enum MtlsError {
#[error("no client certificate presented")]
NoCertificate,
#[error("client certificate has expired")]
Expired,
#[error("client certificate is not trusted by the CA")]
UntrustedCa,
#[error("client CN '{0}' is not in the allowed list")]
CnNotAllowed(String),
#[error("certificate parse error: {0}")]
ParseError(String),
}
pub fn build_tls_config(config: &MtlsConfig) -> anyhow::Result<Arc<ServerConfig>> {
let server_certs = load_certs(&config.server_cert_path)?;
let server_key = load_key(&config.server_key_path)?;
let ca_cert = load_certs(&config.ca_cert_path)?;
let mut root_store = rustls::RootCertStore::empty();
for cert in ca_cert {
root_store.add(cert)?;
}
let client_verifier = rustls::server::WebPkiClientVerifier::builder(Arc::new(root_store))
.allow_unauthenticated() // allow no cert; enforce CN in middleware
.build()?;
let tls = ServerConfig::builder()
.with_client_cert_verifier(client_verifier)
.with_single_cert(server_certs, server_key)?;
Ok(Arc::new(tls))
}
/// Validate a client certificate chain and extract the CN.
/// Called from axum middleware after the TLS handshake.
pub fn validate_client_cert(
cert_der: &CertificateDer<'_>,
allowed_cns: &Option<Vec<String>>,
) -> Result<ClientIdentity, MtlsError> {
let (_, cert) = X509Certificate::from_der(cert_der.as_ref())
.map_err(|e| MtlsError::ParseError(e.to_string()))?;
// Check expiry
let validity = cert.validity();
let now = x509_parser::time::ASN1Time::now();
if !validity.is_valid_at(now) {
return Err(MtlsError::Expired);
}
// Extract CN
let cn = cert
.subject()
.iter_common_name()
.next()
.and_then(|attr| attr.as_str().ok())
.unwrap_or("")
.to_string();
// Check CN allowlist
if let Some(allowed) = allowed_cns {
if !allowed.iter().any(|a| a == &cn) {
return Err(MtlsError::CnNotAllowed(cn));
}
}
Ok(ClientIdentity { common_name: cn })
}
fn load_certs(path: &PathBuf) -> anyhow::Result<Vec<CertificateDer<'static>>> {
let file = std::fs::File::open(path)?;
let mut reader = BufReader::new(file);
let certs: Vec<_> = certs(&mut reader).collect::<Result<_, _>>()?;
Ok(certs)
}
fn load_key(path: &PathBuf) -> anyhow::Result<PrivateKeyDer<'static>> {
let file = std::fs::File::open(path)?;
let mut reader = BufReader::new(file);
let key = private_key(&mut reader)?
.ok_or_else(|| anyhow::anyhow!("no private key found in {}", path.display()))?;
Ok(key)
}

View File

@@ -0,0 +1,28 @@
use argon2::{Argon2, PasswordHash, PasswordHasher, PasswordVerifier};
use argon2::password_hash::{rand_core::OsRng, SaltString};
use thiserror::Error;
#[derive(Debug, Error)]
pub enum PasswordError {
#[error("hashing failed: {0}")]
HashFailed(String),
#[error("invalid password hash")]
InvalidHash,
#[error("incorrect password")]
Incorrect,
}
pub fn hash_password(password: &str) -> Result<String, PasswordError> {
let salt = SaltString::generate(&mut OsRng);
let hash = Argon2::default()
.hash_password(password.as_bytes(), &salt)
.map_err(|e| PasswordError::HashFailed(e.to_string()))?;
Ok(hash.to_string())
}
pub fn verify_password(password: &str, hash: &str) -> Result<(), PasswordError> {
let parsed = PasswordHash::new(hash).map_err(|_| PasswordError::InvalidHash)?;
Argon2::default()
.verify_password(password.as_bytes(), &parsed)
.map_err(|_| PasswordError::Incorrect)
}

View File

@@ -0,0 +1,28 @@
use rand::RngCore;
/// Generate a cryptographically random 256-bit session token, base64url-encoded.
pub fn generate_session_token() -> String {
let mut bytes = [0u8; 32];
rand::thread_rng().fill_bytes(&mut bytes);
base64_url_encode(&bytes)
}
fn base64_url_encode(data: &[u8]) -> String {
// Simple base64url without padding, using only std
const ALPHABET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
let mut out = String::with_capacity((data.len() * 4 + 2) / 3);
for chunk in data.chunks(3) {
let b0 = chunk[0] as usize;
let b1 = if chunk.len() > 1 { chunk[1] as usize } else { 0 };
let b2 = if chunk.len() > 2 { chunk[2] as usize } else { 0 };
out.push(ALPHABET[b0 >> 2] as char);
out.push(ALPHABET[((b0 & 0x3) << 4) | (b1 >> 4)] as char);
if chunk.len() > 1 {
out.push(ALPHABET[((b1 & 0xf) << 2) | (b2 >> 6)] as char);
}
if chunk.len() > 2 {
out.push(ALPHABET[b2 & 0x3f] as char);
}
}
out
}

22
crates/rbv-cli/Cargo.toml Normal file
View File

@@ -0,0 +1,22 @@
[package]
name = "rbv-cli"
version.workspace = true
edition.workspace = true
license.workspace = true
[[bin]]
name = "rbv"
path = "src/main.rs"
[dependencies]
rbv-entity = { workspace = true }
rbv-hash = { workspace = true }
rbv-data = { workspace = true }
rbv-ml = { workspace = true }
rbv-ingest = { workspace = true }
rbv-cluster = { workspace = true }
clap = { workspace = true }
tokio = { workspace = true }
anyhow = { workspace = true }
tracing = { workspace = true }
tracing-subscriber = { workspace = true }

View File

@@ -0,0 +1,75 @@
use std::path::PathBuf;
use clap::{Parser, Subcommand};
use anyhow::Result;
#[derive(Parser)]
#[command(name = "rbv", about = "Image gallery indexer and recognition tool")]
pub struct Cli {
#[command(subcommand)]
pub command: Command,
}
impl Cli {
pub async fn run(self) -> Result<()> {
match self.command {
Command::Migrate(args) => crate::commands::migrate::run(args).await,
Command::Index(args) => crate::commands::index::run(args).await,
Command::Cluster(args) => crate::commands::cluster::run(args).await,
}
}
}
#[derive(Subcommand)]
pub enum Command {
/// Run database migrations
Migrate(MigrateArgs),
/// Index image galleries into the database
Index(IndexArgs),
/// Cluster detected faces into person identities
Cluster(ClusterArgs),
}
#[derive(Parser)]
pub struct MigrateArgs {
/// PostgreSQL connection string (mTLS, no password)
#[arg(long)]
pub database: String,
}
#[derive(Parser)]
pub struct IndexArgs {
/// Path(s) to index. May be a root, chunk, or gallery directory.
#[arg(long, required = true)]
pub target: Vec<PathBuf>,
/// Filename/extension inclusion filters (rsync-style globs)
#[arg(long)]
pub include: Option<Vec<String>>,
/// Filename/extension exclusion filters (rsync-style globs)
#[arg(long)]
pub exclude: Option<Vec<String>>,
/// PostgreSQL connection string
#[arg(long)]
pub database: String,
/// Base URL of the machine learning API
#[arg(long)]
pub ml_uri: String,
/// Number of images to submit to the ML API concurrently
#[arg(long, default_value = "4")]
pub concurrency: usize,
}
#[derive(Parser)]
pub struct ClusterArgs {
/// PostgreSQL connection string
#[arg(long)]
pub database: String,
/// Cosine similarity threshold for grouping faces (0.01.0)
#[arg(long, default_value = "0.65")]
pub threshold: f32,
}

View File

@@ -0,0 +1,44 @@
use anyhow::Result;
use tracing::info;
use rbv_cluster::{cluster_faces, ClusterConfig};
use crate::args::ClusterArgs;
pub async fn run(args: ClusterArgs) -> Result<()> {
let pool = rbv_data::connect(&args.database).await?;
info!("Loading unassigned face embeddings...");
let faces = rbv_data::face::unassigned_face_embeddings(&pool).await?;
info!("Found {} unassigned faces.", faces.len());
if faces.is_empty() {
info!("Nothing to cluster.");
return Ok(());
}
let config = ClusterConfig {
similarity_threshold: args.threshold,
min_cluster_size: 1,
};
info!("Clustering with threshold {}...", args.threshold);
let clusters = cluster_faces(&faces, &config);
info!("Formed {} clusters.", clusters.len());
let mut persons_created = 0u64;
let mut faces_assigned = 0u64;
for cluster in clusters {
if cluster.is_empty() {
continue;
}
let person_id = rbv_data::person::create_person(&pool).await?;
persons_created += 1;
for face_id in &cluster {
rbv_data::face::assign_face_to_person(&pool, face_id, &person_id).await?;
faces_assigned += 1;
}
}
info!("Created {} persons, assigned {} faces.", persons_created, faces_assigned);
Ok(())
}

View File

@@ -0,0 +1,37 @@
use anyhow::Result;
use tracing::info;
use rbv_ingest::{IngestConfig, ingest_galleries, discover_galleries, FilterConfig};
use rbv_ml::MlClient;
use crate::args::IndexArgs;
pub async fn run(args: IndexArgs) -> Result<()> {
let pool = rbv_data::connect(&args.database).await?;
let ml = MlClient::new(&args.ml_uri);
let filter = FilterConfig::new(args.include, args.exclude);
let config = IngestConfig {
concurrency: args.concurrency,
filter,
};
info!("Discovering galleries in {} target(s)...", args.target.len());
let galleries = discover_galleries(&args.target)?;
info!("Found {} galleries.", galleries.len());
let report = ingest_galleries(&pool, &ml, galleries, &config).await?;
info!(
"Indexing complete. Galleries: {}, Images processed: {}, skipped: {}, Faces detected: {}, Errors: {}",
report.galleries_processed,
report.images_processed,
report.images_skipped,
report.faces_detected,
report.errors.len(),
);
for (path, err) in &report.errors {
tracing::error!("Error processing {}: {err}", path.display());
}
Ok(())
}

View File

@@ -0,0 +1,12 @@
use anyhow::Result;
use tracing::info;
use crate::args::MigrateArgs;
pub async fn run(args: MigrateArgs) -> Result<()> {
info!("Connecting to database...");
let pool = rbv_data::connect(&args.database).await?;
info!("Running migrations...");
rbv_data::run_migrations(&pool).await?;
info!("Migrations complete.");
Ok(())
}

View File

@@ -0,0 +1,3 @@
pub mod migrate;
pub mod index;
pub mod cluster;

View File

@@ -0,0 +1,18 @@
mod args;
mod commands;
use anyhow::Result;
use clap::Parser;
use args::Cli;
#[tokio::main]
async fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(
tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")),
)
.init();
let cli = Cli::parse();
cli.run().await
}

View File

@@ -0,0 +1,10 @@
[package]
name = "rbv-cluster"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
rbv-entity = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }

View File

@@ -0,0 +1,118 @@
use rbv_entity::FaceId;
pub struct ClusterConfig {
/// Cosine similarity threshold: faces above this are considered the same person.
pub similarity_threshold: f32,
/// Minimum number of faces to form a cluster (1 = all faces get a person ID).
pub min_cluster_size: usize,
}
impl Default for ClusterConfig {
fn default() -> Self {
Self {
similarity_threshold: 0.65,
min_cluster_size: 1,
}
}
}
/// Cluster faces by cosine similarity using union-find connected components.
///
/// Input: list of (FaceId, embedding) pairs (embeddings need not be normalised).
/// Output: list of clusters, each cluster is a non-empty Vec<FaceId>.
///
/// Faces that don't meet `min_cluster_size` are returned as singleton clusters
/// if min_cluster_size == 1, or omitted otherwise.
pub fn cluster_faces(faces: &[(FaceId, Vec<f32>)], config: &ClusterConfig) -> Vec<Vec<FaceId>> {
let n = faces.len();
if n == 0 {
return vec![];
}
// Normalise embeddings to unit length for cosine similarity via dot product.
let normalised: Vec<Vec<f32>> = faces.iter().map(|(_, e)| normalise(e)).collect();
// Union-Find
let mut parent: Vec<usize> = (0..n).collect();
for i in 0..n {
for j in (i + 1)..n {
let sim = dot(&normalised[i], &normalised[j]);
if sim >= config.similarity_threshold {
union(&mut parent, i, j);
}
}
}
// Group face IDs by their root component.
let mut groups: std::collections::HashMap<usize, Vec<FaceId>> = std::collections::HashMap::new();
for (i, (face_id, _)) in faces.iter().enumerate() {
let root = find(&mut parent, i);
groups.entry(root).or_default().push(face_id.clone());
}
groups.into_values()
.filter(|g| g.len() >= config.min_cluster_size)
.collect()
}
fn normalise(v: &[f32]) -> Vec<f32> {
let norm = v.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm == 0.0 {
v.to_vec()
} else {
v.iter().map(|x| x / norm).collect()
}
}
fn dot(a: &[f32], b: &[f32]) -> f32 {
a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
}
fn find(parent: &mut Vec<usize>, x: usize) -> usize {
if parent[x] != x {
parent[x] = find(parent, parent[x]); // path compression
}
parent[x]
}
fn union(parent: &mut Vec<usize>, a: usize, b: usize) {
let ra = find(parent, a);
let rb = find(parent, b);
if ra != rb {
parent[ra] = rb;
}
}
#[cfg(test)]
mod tests {
use super::*;
use rbv_entity::FaceId;
fn fake_face(id: u8, embedding: Vec<f32>) -> (FaceId, Vec<f32>) {
let mut bytes = [0u8; 32];
bytes[0] = id;
(FaceId(bytes), embedding)
}
#[test]
fn identical_embeddings_cluster_together() {
let e = vec![1.0f32, 0.0, 0.0];
let faces = vec![fake_face(1, e.clone()), fake_face(2, e.clone())];
let config = ClusterConfig { similarity_threshold: 0.65, min_cluster_size: 1 };
let clusters = cluster_faces(&faces, &config);
assert_eq!(clusters.len(), 1);
assert_eq!(clusters[0].len(), 2);
}
#[test]
fn orthogonal_embeddings_form_separate_clusters() {
let faces = vec![
fake_face(1, vec![1.0, 0.0, 0.0]),
fake_face(2, vec![0.0, 1.0, 0.0]),
];
let config = ClusterConfig { similarity_threshold: 0.65, min_cluster_size: 1 };
let clusters = cluster_faces(&faces, &config);
assert_eq!(clusters.len(), 2);
}
}

View File

@@ -0,0 +1,3 @@
pub mod cluster;
pub use cluster::{cluster_faces, ClusterConfig};

View File

@@ -0,0 +1,18 @@
[package]
name = "rbv-data"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
rbv-entity = { workspace = true }
rbv-hash = { workspace = true }
sqlx = { workspace = true }
pgvector = { workspace = true }
uuid = { workspace = true }
chrono = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
thiserror = { workspace = true }
anyhow = { workspace = true }
tracing = { workspace = true }

View File

@@ -0,0 +1,44 @@
use anyhow::Result;
use sqlx::{PgPool, Row};
use pgvector::Vector;
use rbv_entity::{ClipEmbedding, ImageId};
pub async fn upsert_clip_embedding(pool: &PgPool, emb: &ClipEmbedding) -> Result<()> {
let vec = Vector::from(emb.embedding.clone());
sqlx::query(
r#"
INSERT INTO clip_embeddings (image_id, embedding)
VALUES ($1, $2)
ON CONFLICT (image_id) DO UPDATE SET embedding = EXCLUDED.embedding
"#,
)
.bind(emb.image_id.as_bytes())
.bind(vec)
.execute(pool)
.await?;
Ok(())
}
pub async fn search_clip_nearest(pool: &PgPool, query: &[f32], limit: i64) -> Result<Vec<(ImageId, f64)>> {
let vec = Vector::from(query.to_vec());
let rows = sqlx::query(
r#"
SELECT image_id, (embedding <=> $1)::float8 AS distance
FROM clip_embeddings
ORDER BY embedding <=> $1
LIMIT $2
"#,
)
.bind(vec)
.bind(limit)
.fetch_all(pool)
.await?;
Ok(rows.iter().map(|r| {
let id_bytes: Vec<u8> = r.get("image_id");
let id = ImageId(id_bytes.try_into().expect("32-byte id"));
let distance: f64 = r.get("distance");
let score = 1.0 - distance;
(id, score)
}).collect())
}

111
crates/rbv-data/src/face.rs Normal file
View File

@@ -0,0 +1,111 @@
use anyhow::Result;
use sqlx::{PgPool, Row};
use pgvector::Vector;
use rbv_entity::{BoundingBox, FaceDetection, FaceId, ImageId, PersonId};
pub async fn upsert_face(pool: &PgPool, face: &FaceDetection) -> Result<()> {
let vec = Vector::from(face.embedding.clone());
sqlx::query(
r#"
INSERT INTO face_detections (id, image_id, x1, y1, x2, y2, score, embedding)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
ON CONFLICT (id) DO UPDATE SET score = EXCLUDED.score, embedding = EXCLUDED.embedding
"#,
)
.bind(face.id.as_bytes())
.bind(face.image_id.as_bytes())
.bind(face.bounding_box.x1)
.bind(face.bounding_box.y1)
.bind(face.bounding_box.x2)
.bind(face.bounding_box.y2)
.bind(face.score)
.bind(vec)
.execute(pool)
.await?;
Ok(())
}
pub async fn assign_face_to_person(pool: &PgPool, face_id: &FaceId, person_id: &PersonId) -> Result<()> {
sqlx::query("UPDATE face_detections SET person_id = $1 WHERE id = $2")
.bind(person_id.as_uuid())
.bind(face_id.as_bytes())
.execute(pool)
.await?;
Ok(())
}
pub async fn all_face_embeddings(pool: &PgPool) -> Result<Vec<(FaceId, Vec<f32>)>> {
let rows = sqlx::query("SELECT id, embedding FROM face_detections")
.fetch_all(pool)
.await?;
Ok(rows.iter().map(|r| {
let id_bytes: Vec<u8> = r.get("id");
let vec: Vector = r.get("embedding");
(FaceId(id_bytes.try_into().expect("32-byte id")), vec.to_vec())
}).collect())
}
pub async fn unassigned_face_embeddings(pool: &PgPool) -> Result<Vec<(FaceId, Vec<f32>)>> {
let rows = sqlx::query("SELECT id, embedding FROM face_detections WHERE person_id IS NULL")
.fetch_all(pool)
.await?;
Ok(rows.iter().map(|r| {
let id_bytes: Vec<u8> = r.get("id");
let vec: Vector = r.get("embedding");
(FaceId(id_bytes.try_into().expect("32-byte id")), vec.to_vec())
}).collect())
}
pub async fn get_faces_for_image(pool: &PgPool, image_id: &ImageId) -> Result<Vec<FaceDetection>> {
let rows = sqlx::query(
"SELECT id, image_id, x1, y1, x2, y2, score, embedding, person_id FROM face_detections WHERE image_id = $1",
)
.bind(image_id.as_bytes())
.fetch_all(pool)
.await?;
Ok(rows.iter().map(row_to_face).collect())
}
pub async fn get_faces_for_person(pool: &PgPool, person_id: &PersonId, page: i64, per_page: i64) -> Result<Vec<FaceDetection>> {
let rows = sqlx::query(
"SELECT id, image_id, x1, y1, x2, y2, score, embedding, person_id FROM face_detections WHERE person_id = $1 LIMIT $2 OFFSET $3",
)
.bind(person_id.as_uuid())
.bind(per_page)
.bind((page - 1) * per_page)
.fetch_all(pool)
.await?;
Ok(rows.iter().map(row_to_face).collect())
}
pub async fn merge_persons(pool: &PgPool, keep: &PersonId, remove: &PersonId) -> Result<()> {
let mut tx = pool.begin().await?;
sqlx::query("UPDATE face_detections SET person_id = $1 WHERE person_id = $2")
.bind(keep.as_uuid()).bind(remove.as_uuid()).execute(&mut *tx).await?;
sqlx::query("UPDATE person_names SET person_id = $1 WHERE person_id = $2")
.bind(keep.as_uuid()).bind(remove.as_uuid()).execute(&mut *tx).await?;
sqlx::query("DELETE FROM persons WHERE id = $1")
.bind(remove.as_uuid()).execute(&mut *tx).await?;
tx.commit().await?;
Ok(())
}
fn row_to_face(r: &sqlx::postgres::PgRow) -> FaceDetection {
let id_bytes: Vec<u8> = r.get("id");
let iid_bytes: Vec<u8> = r.get("image_id");
let vec: Vector = r.get("embedding");
let person_id: Option<uuid::Uuid> = r.get("person_id");
FaceDetection {
id: FaceId(id_bytes.try_into().expect("32-byte id")),
image_id: ImageId(iid_bytes.try_into().expect("32-byte id")),
bounding_box: BoundingBox {
x1: r.get("x1"),
y1: r.get("y1"),
x2: r.get("x2"),
y2: r.get("y2"),
},
score: r.get("score"),
embedding: vec.to_vec(),
person_id: person_id.map(PersonId),
}
}

View File

@@ -0,0 +1,77 @@
use anyhow::Result;
use sqlx::{PgPool, Row};
use rbv_entity::{Gallery, GalleryId};
pub async fn upsert_gallery(pool: &PgPool, gallery: &Gallery) -> Result<()> {
sqlx::query(
r#"
INSERT INTO galleries (id, source_id, collection, source_name, source_url, subjects, tags, path)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
ON CONFLICT (collection, source_name, source_url) DO UPDATE SET
source_id = EXCLUDED.source_id,
subjects = EXCLUDED.subjects,
tags = EXCLUDED.tags,
path = EXCLUDED.path,
indexed_at = now()
"#,
)
.bind(gallery.id.as_bytes())
.bind(gallery.source_id as i64)
.bind(&gallery.collection)
.bind(&gallery.source_name)
.bind(&gallery.source_url)
.bind(&gallery.subjects)
.bind(&gallery.tags)
.bind(gallery.path.to_string_lossy().as_ref())
.execute(pool)
.await?;
Ok(())
}
pub async fn get_gallery(pool: &PgPool, id: &GalleryId) -> Result<Option<Gallery>> {
let row = sqlx::query(
"SELECT id, source_id, collection, source_name, source_url, subjects, tags, path FROM galleries WHERE id = $1",
)
.bind(id.as_bytes())
.fetch_optional(pool)
.await?;
Ok(row.map(|r| row_to_gallery(&r)))
}
pub async fn list_galleries(pool: &PgPool, page: i64, per_page: i64) -> Result<Vec<Gallery>> {
let rows = sqlx::query(
"SELECT id, source_id, collection, source_name, source_url, subjects, tags, path FROM galleries ORDER BY collection, source_name LIMIT $1 OFFSET $2",
)
.bind(per_page)
.bind((page - 1) * per_page)
.fetch_all(pool)
.await?;
Ok(rows.iter().map(row_to_gallery).collect())
}
pub async fn random_galleries(pool: &PgPool, count: i64) -> Result<Vec<Gallery>> {
let rows = sqlx::query(
"SELECT id, source_id, collection, source_name, source_url, subjects, tags, path FROM galleries ORDER BY random() LIMIT $1",
)
.bind(count)
.fetch_all(pool)
.await?;
Ok(rows.iter().map(row_to_gallery).collect())
}
fn row_to_gallery(r: &sqlx::postgres::PgRow) -> Gallery {
let id_bytes: Vec<u8> = r.get("id");
Gallery {
id: GalleryId(id_bytes.try_into().expect("32-byte id")),
source_id: r.get::<i64, _>("source_id") as u64,
collection: r.get("collection"),
source_name: r.get("source_name"),
source_url: r.get("source_url"),
subjects: r.get("subjects"),
tags: r.get("tags"),
path: r.get::<String, _>("path").into(),
}
}

View File

@@ -0,0 +1,115 @@
use anyhow::Result;
use sqlx::{PgPool, Row};
use rbv_entity::{GalleryId, GalleryImage, Image, ImageId};
pub async fn image_exists(pool: &PgPool, id: &ImageId) -> Result<bool> {
let row = sqlx::query("SELECT EXISTS(SELECT 1 FROM images WHERE id = $1)")
.bind(id.as_bytes())
.fetch_one(pool)
.await?;
Ok(row.get::<Option<bool>, _>(0).unwrap_or(false))
}
pub async fn upsert_image(pool: &PgPool, image: &Image) -> Result<()> {
sqlx::query(
r#"
INSERT INTO images (id, width, height)
VALUES ($1, $2, $3)
ON CONFLICT (id) DO UPDATE SET
width = COALESCE(EXCLUDED.width, images.width),
height = COALESCE(EXCLUDED.height, images.height)
"#,
)
.bind(image.id.as_bytes())
.bind(image.width)
.bind(image.height)
.execute(pool)
.await?;
Ok(())
}
pub async fn upsert_gallery_image(pool: &PgPool, gi: &GalleryImage) -> Result<()> {
sqlx::query(
r#"
INSERT INTO gallery_images (gallery_id, image_id, filename, ordering)
VALUES ($1, $2, $3, $4)
ON CONFLICT (gallery_id, image_id) DO UPDATE SET
filename = EXCLUDED.filename,
ordering = EXCLUDED.ordering
"#,
)
.bind(gi.gallery_id.as_bytes())
.bind(gi.image_id.as_bytes())
.bind(&gi.filename)
.bind(gi.ordering)
.execute(pool)
.await?;
Ok(())
}
pub async fn list_gallery_images(pool: &PgPool, gallery_id: &GalleryId) -> Result<Vec<(GalleryImage, Image)>> {
let rows = sqlx::query(
r#"
SELECT gi.gallery_id, gi.image_id, gi.filename, gi.ordering,
i.width, i.height
FROM gallery_images gi
JOIN images i ON i.id = gi.image_id
WHERE gi.gallery_id = $1
ORDER BY gi.ordering
"#,
)
.bind(gallery_id.as_bytes())
.fetch_all(pool)
.await?;
Ok(rows.iter().map(|r| {
let gid_bytes: Vec<u8> = r.get("gallery_id");
let iid_bytes: Vec<u8> = r.get("image_id");
let image_id = ImageId(iid_bytes.try_into().expect("32-byte id"));
let gi = GalleryImage {
gallery_id: GalleryId(gid_bytes.try_into().expect("32-byte id")),
image_id: image_id.clone(),
filename: r.get("filename"),
ordering: r.get("ordering"),
};
let image = Image {
id: image_id,
width: r.get("width"),
height: r.get("height"),
};
(gi, image)
}).collect())
}
pub async fn get_image(pool: &PgPool, id: &ImageId) -> Result<Option<Image>> {
let row = sqlx::query("SELECT id, width, height FROM images WHERE id = $1")
.bind(id.as_bytes())
.fetch_optional(pool)
.await?;
Ok(row.map(|r| {
let id_bytes: Vec<u8> = r.get("id");
Image {
id: ImageId(id_bytes.try_into().expect("32-byte id")),
width: r.get("width"),
height: r.get("height"),
}
}))
}
pub async fn find_image_gallery_path(pool: &PgPool, image_id: &ImageId) -> Result<Option<(String, String)>> {
let row = sqlx::query(
r#"
SELECT g.path, gi.filename
FROM gallery_images gi
JOIN galleries g ON g.id = gi.gallery_id
WHERE gi.image_id = $1
LIMIT 1
"#,
)
.bind(image_id.as_bytes())
.fetch_optional(pool)
.await?;
Ok(row.map(|r| (r.get("path"), r.get("filename"))))
}

View File

@@ -0,0 +1,10 @@
pub mod pool;
pub mod gallery;
pub mod image;
pub mod face;
pub mod person;
pub mod clip;
pub mod user;
pub use pool::{connect, run_migrations};
pub use sqlx::PgPool;

View File

@@ -0,0 +1,130 @@
use anyhow::Result;
use sqlx::{PgPool, Row};
use rbv_entity::{GalleryId, Person, PersonId, PersonName};
pub async fn create_person(pool: &PgPool) -> Result<PersonId> {
let id = PersonId::new();
sqlx::query("INSERT INTO persons (id) VALUES ($1)")
.bind(id.as_uuid())
.execute(pool)
.await?;
Ok(id)
}
pub async fn get_person(pool: &PgPool, id: &PersonId) -> Result<Option<Person>> {
let row = sqlx::query("SELECT id, created_at FROM persons WHERE id = $1")
.bind(id.as_uuid())
.fetch_optional(pool)
.await?;
Ok(row.map(|r| Person {
id: PersonId(r.get("id")),
created_at: r.get::<chrono::NaiveDateTime, _>("created_at").and_utc(),
}))
}
pub async fn get_all_persons_paged(pool: &PgPool, page: i64, per_page: i64) -> Result<Vec<Person>> {
let rows = sqlx::query(
"SELECT id, created_at FROM persons ORDER BY created_at LIMIT $1 OFFSET $2",
)
.bind(per_page)
.bind((page - 1) * per_page)
.fetch_all(pool)
.await?;
Ok(rows.iter().map(|r| Person {
id: PersonId(r.get("id")),
created_at: r.get::<chrono::NaiveDateTime, _>("created_at").and_utc(),
}).collect())
}
pub async fn set_person_name(pool: &PgPool, person_id: &PersonId, name: &str, is_primary: bool) -> Result<()> {
let mut tx = pool.begin().await?;
if is_primary {
sqlx::query(
"UPDATE person_names SET is_primary = false WHERE person_id = $1 AND is_primary = true",
)
.bind(person_id.as_uuid())
.execute(&mut *tx)
.await?;
}
sqlx::query(
r#"
INSERT INTO person_names (person_id, name, is_primary)
VALUES ($1, $2, $3)
ON CONFLICT (name) DO UPDATE SET person_id = EXCLUDED.person_id, is_primary = EXCLUDED.is_primary
"#,
)
.bind(person_id.as_uuid())
.bind(name)
.bind(is_primary)
.execute(&mut *tx)
.await?;
tx.commit().await?;
Ok(())
}
pub async fn get_person_names(pool: &PgPool, person_id: &PersonId) -> Result<Vec<PersonName>> {
let rows = sqlx::query(
"SELECT person_id, name, is_primary FROM person_names WHERE person_id = $1 ORDER BY is_primary DESC, name",
)
.bind(person_id.as_uuid())
.fetch_all(pool)
.await?;
Ok(rows.iter().map(|r| PersonName {
person_id: PersonId(r.get("person_id")),
name: r.get("name"),
is_primary: r.get("is_primary"),
}).collect())
}
pub async fn find_persons_by_name(pool: &PgPool, name: &str) -> Result<Vec<(Person, PersonName)>> {
let rows = sqlx::query(
r#"
SELECT p.id, p.created_at, pn.name, pn.is_primary
FROM person_names pn
JOIN persons p ON p.id = pn.person_id
WHERE pn.name = $1
"#,
)
.bind(name)
.fetch_all(pool)
.await?;
Ok(rows.iter().map(|r| {
let id: uuid::Uuid = r.get("id");
let person = Person {
id: PersonId(id),
created_at: r.get::<chrono::NaiveDateTime, _>("created_at").and_utc(),
};
let pname = PersonName {
person_id: PersonId(id),
name: r.get("name"),
is_primary: r.get("is_primary"),
};
(person, pname)
}).collect())
}
pub async fn count_persons(pool: &PgPool) -> Result<i64> {
let row = sqlx::query("SELECT COUNT(*) AS count FROM persons")
.fetch_one(pool)
.await?;
Ok(row.get("count"))
}
pub async fn get_persons_for_gallery(pool: &PgPool, gallery_id: &GalleryId) -> Result<Vec<Person>> {
let rows = sqlx::query(
r#"
SELECT DISTINCT p.id, p.created_at
FROM persons p
JOIN face_detections fd ON fd.person_id = p.id
JOIN gallery_images gi ON gi.image_id = fd.image_id
WHERE gi.gallery_id = $1
"#,
)
.bind(gallery_id.as_bytes())
.fetch_all(pool)
.await?;
Ok(rows.iter().map(|r| Person {
id: PersonId(r.get("id")),
created_at: r.get::<chrono::NaiveDateTime, _>("created_at").and_utc(),
}).collect())
}

View File

@@ -0,0 +1,16 @@
use anyhow::Result;
use sqlx::PgPool;
use sqlx::postgres::PgPoolOptions;
pub async fn connect(connstring: &str) -> Result<PgPool> {
let pool = PgPoolOptions::new()
.max_connections(10)
.connect(connstring)
.await?;
Ok(pool)
}
pub async fn run_migrations(pool: &PgPool) -> Result<()> {
sqlx::migrate!("../../migrations").run(pool).await?;
Ok(())
}

View File

@@ -0,0 +1,86 @@
use anyhow::Result;
use sqlx::{PgPool, Row};
use rbv_entity::{Session, User};
pub async fn get_user_by_id(pool: &PgPool, id: uuid::Uuid) -> Result<Option<User>> {
let row = sqlx::query(
"SELECT id, username, password_hash, created_at FROM users WHERE id = $1",
)
.bind(id)
.fetch_optional(pool)
.await?;
Ok(row.map(|r| User {
id: r.get("id"),
username: r.get("username"),
password_hash: r.get("password_hash"),
created_at: r.get::<chrono::NaiveDateTime, _>("created_at").and_utc(),
}))
}
pub async fn create_user(pool: &PgPool, user: &User) -> Result<()> {
sqlx::query("INSERT INTO users (id, username, password_hash) VALUES ($1, $2, $3)")
.bind(user.id)
.bind(&user.username)
.bind(&user.password_hash)
.execute(pool)
.await?;
Ok(())
}
pub async fn get_user_by_username(pool: &PgPool, username: &str) -> Result<Option<User>> {
let row = sqlx::query(
"SELECT id, username, password_hash, created_at FROM users WHERE username = $1",
)
.bind(username)
.fetch_optional(pool)
.await?;
Ok(row.map(|r| User {
id: r.get("id"),
username: r.get("username"),
password_hash: r.get("password_hash"),
created_at: r.get::<chrono::NaiveDateTime, _>("created_at").and_utc(),
}))
}
pub async fn create_session(pool: &PgPool, session: &Session) -> Result<()> {
sqlx::query(
"INSERT INTO sessions (token, user_id, created_at, expires_at) VALUES ($1, $2, $3, $4)",
)
.bind(&session.token)
.bind(session.user_id)
.bind(session.created_at.naive_utc())
.bind(session.expires_at.naive_utc())
.execute(pool)
.await?;
Ok(())
}
pub async fn validate_session(pool: &PgPool, token: &str) -> Result<Option<Session>> {
let row = sqlx::query(
"SELECT token, user_id, created_at, expires_at FROM sessions WHERE token = $1 AND expires_at > now()",
)
.bind(token)
.fetch_optional(pool)
.await?;
Ok(row.map(|r| Session {
token: r.get("token"),
user_id: r.get("user_id"),
created_at: r.get::<chrono::NaiveDateTime, _>("created_at").and_utc(),
expires_at: r.get::<chrono::NaiveDateTime, _>("expires_at").and_utc(),
}))
}
pub async fn delete_session(pool: &PgPool, token: &str) -> Result<()> {
sqlx::query("DELETE FROM sessions WHERE token = $1")
.bind(token)
.execute(pool)
.await?;
Ok(())
}
pub async fn purge_expired_sessions(pool: &PgPool) -> Result<u64> {
let result = sqlx::query("DELETE FROM sessions WHERE expires_at <= now()")
.execute(pool)
.await?;
Ok(result.rows_affected())
}

View File

@@ -0,0 +1,10 @@
[package]
name = "rbv-entity"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
serde = { workspace = true }
uuid = { workspace = true }
chrono = { workspace = true }

View File

@@ -0,0 +1,9 @@
use serde::{Deserialize, Serialize};
use crate::image::ImageId;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ClipEmbedding {
pub image_id: ImageId,
/// 512-dimensional CLIP visual embedding
pub embedding: Vec<f32>,
}

View File

@@ -0,0 +1,40 @@
use serde::{Deserialize, Serialize};
use crate::image::ImageId;
use crate::person::PersonId;
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct FaceId(pub [u8; 32]);
impl FaceId {
pub fn as_bytes(&self) -> &[u8] {
&self.0
}
pub fn to_hex(&self) -> String {
self.0.iter().map(|b| format!("{b:02x}")).collect()
}
}
impl std::fmt::Display for FaceId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.to_hex())
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub struct BoundingBox {
pub x1: i32,
pub y1: i32,
pub x2: i32,
pub y2: i32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FaceDetection {
pub id: FaceId,
pub image_id: ImageId,
pub bounding_box: BoundingBox,
pub score: f32,
pub embedding: Vec<f32>,
pub person_id: Option<PersonId>,
}

View File

@@ -0,0 +1,33 @@
use std::path::PathBuf;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct GalleryId(pub [u8; 32]);
impl GalleryId {
pub fn as_bytes(&self) -> &[u8] {
&self.0
}
pub fn to_hex(&self) -> String {
self.0.iter().map(|b| format!("{b:02x}")).collect()
}
}
impl std::fmt::Display for GalleryId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.to_hex())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Gallery {
pub id: GalleryId,
pub source_id: u64,
pub collection: String,
pub source_name: String,
pub source_url: String,
pub subjects: Vec<String>,
pub tags: Vec<String>,
pub path: PathBuf,
}

View File

@@ -0,0 +1,36 @@
use serde::{Deserialize, Serialize};
use crate::gallery::GalleryId;
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct ImageId(pub [u8; 32]);
impl ImageId {
pub fn as_bytes(&self) -> &[u8] {
&self.0
}
pub fn to_hex(&self) -> String {
self.0.iter().map(|b| format!("{b:02x}")).collect()
}
}
impl std::fmt::Display for ImageId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.to_hex())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Image {
pub id: ImageId,
pub width: Option<i32>,
pub height: Option<i32>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GalleryImage {
pub gallery_id: GalleryId,
pub image_id: ImageId,
pub filename: String,
pub ordering: i32,
}

View File

@@ -0,0 +1,13 @@
pub mod gallery;
pub mod image;
pub mod face;
pub mod person;
pub mod clip;
pub mod user;
pub use gallery::{Gallery, GalleryId};
pub use image::{Image, GalleryImage, ImageId};
pub use face::{FaceDetection, FaceId, BoundingBox};
pub use person::{Person, PersonId, PersonName};
pub use clip::ClipEmbedding;
pub use user::{User, Session};

View File

@@ -0,0 +1,42 @@
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use chrono::{DateTime, Utc};
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct PersonId(pub Uuid);
impl PersonId {
pub fn new() -> Self {
Self(Uuid::new_v4())
}
pub fn as_uuid(&self) -> &Uuid {
&self.0
}
}
impl Default for PersonId {
fn default() -> Self {
Self::new()
}
}
impl std::fmt::Display for PersonId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Person {
pub id: PersonId,
pub created_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PersonName {
pub person_id: PersonId,
/// Lowercase alpha + hyphens only, e.g. "first-last-disambiguator"
pub name: String,
pub is_primary: bool,
}

View File

@@ -0,0 +1,21 @@
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use chrono::{DateTime, Utc};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct User {
pub id: Uuid,
pub username: String,
/// Argon2 PHC string
pub password_hash: String,
pub created_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Session {
/// Base64url-encoded 256-bit random token
pub token: String,
pub user_id: Uuid,
pub created_at: DateTime<Utc>,
pub expires_at: DateTime<Utc>,
}

View File

@@ -0,0 +1,10 @@
[package]
name = "rbv-hash"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
blake3 = { workspace = true }
rbv-entity = { workspace = true }
thiserror = { workspace = true }

106
crates/rbv-hash/src/lib.rs Normal file
View File

@@ -0,0 +1,106 @@
use rbv_entity::{BoundingBox, FaceId, GalleryId, ImageId};
use thiserror::Error;
#[derive(Debug, Error)]
pub enum HashError {
#[error("invalid hex string: {0}")]
InvalidHex(String),
}
/// Compute a reproducible GalleryId from the gallery's canonical identity fields.
///
/// Hashes `collection \0 source_name \0 source_url` so the ID is stable
/// regardless of where the gallery directory lives on disk.
pub fn gallery_id(collection: &str, source_name: &str, source_url: &str) -> GalleryId {
let mut hasher = blake3::Hasher::new();
hasher.update(collection.as_bytes());
hasher.update(b"\0");
hasher.update(source_name.as_bytes());
hasher.update(b"\0");
hasher.update(source_url.as_bytes());
GalleryId(*hasher.finalize().as_bytes())
}
/// Compute a content-addressed ImageId from the raw file bytes.
///
/// Identical files produce the same ID regardless of filename or gallery.
pub fn image_id(content: &[u8]) -> ImageId {
ImageId(*blake3::hash(content).as_bytes())
}
/// Compute a FaceId from the image it was detected in and its bounding box.
pub fn face_id(image_id: &ImageId, bbox: &BoundingBox) -> FaceId {
let mut hasher = blake3::Hasher::new();
hasher.update(&image_id.0);
hasher.update(&bbox.x1.to_le_bytes());
hasher.update(&bbox.y1.to_le_bytes());
hasher.update(&bbox.x2.to_le_bytes());
hasher.update(&bbox.y2.to_le_bytes());
FaceId(*hasher.finalize().as_bytes())
}
/// Encode a 32-byte hash as a lowercase hex string (64 chars).
pub fn to_hex(bytes: &[u8; 32]) -> String {
bytes.iter().map(|b| format!("{b:02x}")).collect()
}
/// Decode a 64-character lowercase hex string back to 32 bytes.
pub fn from_hex(s: &str) -> Result<[u8; 32], HashError> {
if s.len() != 64 {
return Err(HashError::InvalidHex(s.to_string()));
}
let mut out = [0u8; 32];
for (i, chunk) in s.as_bytes().chunks(2).enumerate() {
let hi = hex_nibble(chunk[0]).ok_or_else(|| HashError::InvalidHex(s.to_string()))?;
let lo = hex_nibble(chunk[1]).ok_or_else(|| HashError::InvalidHex(s.to_string()))?;
out[i] = (hi << 4) | lo;
}
Ok(out)
}
fn hex_nibble(b: u8) -> Option<u8> {
match b {
b'0'..=b'9' => Some(b - b'0'),
b'a'..=b'f' => Some(b - b'a' + 10),
b'A'..=b'F' => Some(b - b'A' + 10),
_ => None,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn gallery_id_is_stable() {
let a = gallery_id("col", "name", "https://example.com");
let b = gallery_id("col", "name", "https://example.com");
assert_eq!(a, b);
}
#[test]
fn gallery_id_differs_on_different_inputs() {
let a = gallery_id("col", "name", "https://example.com");
let b = gallery_id("col", "other", "https://example.com");
assert_ne!(a, b);
}
#[test]
fn image_id_is_content_addressed() {
let data = b"hello world";
let a = image_id(data);
let b = image_id(data);
assert_eq!(a, b);
let c = image_id(b"different");
assert_ne!(a, c);
}
#[test]
fn hex_roundtrip() {
let bytes = [0xab; 32];
let hex = to_hex(&bytes);
assert_eq!(hex.len(), 64);
let decoded = from_hex(&hex).unwrap();
assert_eq!(decoded, bytes);
}
}

View File

@@ -0,0 +1,20 @@
[package]
name = "rbv-ingest"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
rbv-entity = { workspace = true }
rbv-hash = { workspace = true }
rbv-data = { workspace = true }
rbv-ml = { workspace = true }
sqlx = { workspace = true }
tokio = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
thiserror = { workspace = true }
anyhow = { workspace = true }
tracing = { workspace = true }
glob = { workspace = true }
image = { workspace = true }

View File

@@ -0,0 +1,43 @@
use std::path::Path;
use glob::Pattern;
pub struct FilterConfig {
include: Option<Vec<Pattern>>,
exclude: Option<Vec<Pattern>>,
}
impl FilterConfig {
pub fn new(include: Option<Vec<String>>, exclude: Option<Vec<String>>) -> Self {
Self {
include: include.map(|v| v.iter().filter_map(|s| Pattern::new(s).ok()).collect()),
exclude: exclude.map(|v| v.iter().filter_map(|s| Pattern::new(s).ok()).collect()),
}
}
/// Returns true if the given filename should be included.
///
/// Mirrors rsync behaviour:
/// - If include patterns are set, filename must match at least one.
/// - If exclude patterns are set, filename must not match any.
/// - If neither is set, all files are included.
pub fn matches(&self, path: &Path) -> bool {
let name = match path.file_name().and_then(|n| n.to_str()) {
Some(n) => n,
None => return false,
};
if let Some(includes) = &self.include {
if !includes.iter().any(|p| p.matches(name)) {
return false;
}
}
if let Some(excludes) = &self.exclude {
if excludes.iter().any(|p| p.matches(name)) {
return false;
}
}
true
}
}

View File

@@ -0,0 +1,70 @@
use std::path::{Path, PathBuf};
use serde::Deserialize;
use anyhow::{Context, Result};
/// Deserialised `index.json` from a gallery directory.
#[derive(Debug, Deserialize)]
pub struct IndexJson {
pub id: u64,
pub collection: String,
pub source: Source,
#[serde(default)]
pub subjects: Vec<String>,
#[serde(default)]
pub tags: Vec<String>,
}
#[derive(Debug, Deserialize)]
pub struct Source {
pub name: String,
pub url: String,
}
pub fn read_index_json(gallery_path: &Path) -> Result<IndexJson> {
let path = gallery_path.join("index.json");
let contents = std::fs::read_to_string(&path)
.with_context(|| format!("reading {}", path.display()))?;
let index: IndexJson = serde_json::from_str(&contents)
.with_context(|| format!("parsing {}", path.display()))?;
Ok(index)
}
/// List all image files in the gallery directory, excluding the `tn/` subdirectory.
pub fn list_images(gallery_path: &Path) -> Result<Vec<PathBuf>> {
let tn_path = gallery_path.join("tn");
let mut images = Vec::new();
for entry in std::fs::read_dir(gallery_path)
.with_context(|| format!("reading dir {}", gallery_path.display()))?
{
let entry = entry?;
let path = entry.path();
// Skip directories (including tn/)
if path.is_dir() {
continue;
}
// Skip anything inside tn/ (shouldn't happen at this level, but guard anyway)
if path.starts_with(&tn_path) {
continue;
}
// Skip non-image files
if !is_image_file(&path) {
continue;
}
images.push(path);
}
images.sort();
Ok(images)
}
fn is_image_file(path: &Path) -> bool {
matches!(
path.extension().and_then(|e| e.to_str()).map(str::to_lowercase).as_deref(),
Some("jpg" | "jpeg" | "png" | "gif" | "webp" | "tiff" | "tif" | "bmp")
)
}

View File

@@ -0,0 +1,8 @@
pub mod traversal;
pub mod filter;
pub mod gallery;
pub mod pipeline;
pub use pipeline::{ingest_galleries, IngestConfig, IngestReport};
pub use traversal::{discover_galleries, DirKind};
pub use filter::FilterConfig;

View File

@@ -0,0 +1,257 @@
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Duration;
use anyhow::Result;
use tokio::sync::Semaphore;
use tracing::{info, warn};
use rbv_entity::{ClipEmbedding, FaceDetection, Gallery, GalleryImage, Image};
use rbv_hash::{face_id, gallery_id, image_id};
use rbv_ml::{AnalysisResult, MlClient, MlError};
use sqlx::PgPool;
use crate::filter::FilterConfig;
use crate::gallery::{list_images, read_index_json};
pub struct IngestConfig {
pub concurrency: usize,
pub filter: FilterConfig,
}
pub struct IngestReport {
pub galleries_processed: usize,
pub images_processed: usize,
pub images_skipped: usize,
pub faces_detected: usize,
pub errors: Vec<(PathBuf, anyhow::Error)>,
}
pub async fn ingest_galleries(
pool: &PgPool,
ml: &MlClient,
galleries: Vec<PathBuf>,
config: &IngestConfig,
) -> Result<IngestReport> {
let mut report = IngestReport {
galleries_processed: 0,
images_processed: 0,
images_skipped: 0,
faces_detected: 0,
errors: Vec::new(),
};
let semaphore = Arc::new(Semaphore::new(config.concurrency));
for gallery_path in &galleries {
info!("Processing gallery: {}", gallery_path.display());
let index = match read_index_json(gallery_path) {
Ok(i) => i,
Err(e) => {
warn!("Skipping {}: {e}", gallery_path.display());
report.errors.push((gallery_path.clone(), e));
continue;
}
};
let gid = gallery_id(&index.collection, &index.source.name, &index.source.url);
let gallery = Gallery {
id: gid.clone(),
source_id: index.id,
collection: index.collection,
source_name: index.source.name,
source_url: index.source.url,
subjects: index.subjects,
tags: index.tags,
path: gallery_path.clone(),
};
if let Err(e) = rbv_data::gallery::upsert_gallery(pool, &gallery).await {
report.errors.push((gallery_path.clone(), e));
continue;
}
let image_paths = match list_images(gallery_path) {
Ok(p) => p,
Err(e) => {
report.errors.push((gallery_path.clone(), e));
continue;
}
};
let filtered: Vec<PathBuf> = image_paths
.into_iter()
.filter(|p| config.filter.matches(p))
.collect();
let mut tasks = Vec::new();
for (ordering, image_path) in filtered.iter().enumerate() {
let pool = pool.clone();
let ml = ml.clone();
let gid = gid.clone();
let image_path = image_path.clone();
let sem = semaphore.clone();
tasks.push(tokio::spawn(async move {
let _permit = sem.acquire_owned().await.unwrap();
process_image(&pool, &ml, &gid, &image_path, ordering as i32).await
}));
}
for task in tasks {
match task.await {
Ok(Ok((skipped, faces))) => {
if skipped {
report.images_skipped += 1;
} else {
report.images_processed += 1;
report.faces_detected += faces;
}
}
Ok(Err(e)) => {
warn!("Image error: {e:#}");
report.errors.push((gallery_path.clone(), e));
}
Err(e) => {
report.errors.push((gallery_path.clone(), anyhow::anyhow!("task panicked: {e}")));
}
}
}
report.galleries_processed += 1;
}
Ok(report)
}
/// Process a single image: hash, check if exists, submit to ML, upsert to DB.
/// Returns (was_skipped, faces_detected).
async fn process_image(
pool: &PgPool,
ml: &MlClient,
gallery_id: &rbv_entity::GalleryId,
image_path: &PathBuf,
ordering: i32,
) -> Result<(bool, usize)> {
let bytes = tokio::fs::read(image_path).await?;
let iid = image_id(&bytes);
let filename = image_path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("")
.to_string();
// Always upsert the gallery_images row (ordering may change)
let gi = GalleryImage {
gallery_id: gallery_id.clone(),
image_id: iid.clone(),
filename,
ordering,
};
// Check if ML work has already been done for this image content
if rbv_data::image::image_exists(pool, &iid).await? {
rbv_data::image::upsert_gallery_image(pool, &gi).await?;
return Ok((true, 0));
}
// Resize for ML API (max 1MB part limit). Hash is computed on originals above.
let ml_bytes = prepare_for_ml(&bytes)?;
// Submit to ML API (with retry/backoff for transient connection errors)
let result = match analyze_with_backoff(ml, &ml_bytes, image_path).await {
Ok(r) => r,
Err(e) => {
warn!("Skipping {} after ML retries exhausted: {e:#}", image_path.display());
return Ok((true, 0));
}
};
// Upsert image record
let image = Image {
id: iid.clone(),
width: Some(result.image_width),
height: Some(result.image_height),
};
rbv_data::image::upsert_image(pool, &image).await?;
rbv_data::image::upsert_gallery_image(pool, &gi).await?;
// Upsert CLIP embedding
let clip = ClipEmbedding {
image_id: iid.clone(),
embedding: result.clip_embedding,
};
rbv_data::clip::upsert_clip_embedding(pool, &clip).await?;
// Upsert face detections
let face_count = result.faces.len();
for detected in result.faces {
let fid = face_id(&iid, &detected.bounding_box);
let face = FaceDetection {
id: fid,
image_id: iid.clone(),
bounding_box: detected.bounding_box,
score: detected.detection_score,
embedding: detected.embedding,
person_id: None,
};
rbv_data::face::upsert_face(pool, &face).await?;
}
Ok((false, face_count))
}
/// Call `ml.analyze_image` with exponential backoff for transient failures.
/// Retries up to 3 times (delays: 5s, 10s, 20s) on connection/timeout/5xx errors.
async fn analyze_with_backoff(
ml: &MlClient,
bytes: &[u8],
image_path: &Path,
) -> Result<AnalysisResult, MlError> {
const MAX_RETRIES: u32 = 3;
let mut delay = Duration::from_secs(5);
let mut last_err = match ml.analyze_image(bytes).await {
Ok(r) => return Ok(r),
Err(e) => e,
};
for attempt in 1..=MAX_RETRIES {
if !last_err.is_retryable() {
break;
}
warn!(
"ML API transient error for {} ({last_err}); retrying in {}s ({attempt}/{MAX_RETRIES})",
image_path.display(),
delay.as_secs(),
);
tokio::time::sleep(delay).await;
delay *= 2;
last_err = match ml.analyze_image(bytes).await {
Ok(r) => return Ok(r),
Err(e) => e,
};
}
Err(last_err)
}
/// Decode, resize if needed, and re-encode as JPEG for the ML API.
/// Always produces JPEG output regardless of the source format, ensuring
/// Pillow on the server side can always identify the image.
fn prepare_for_ml(bytes: &[u8]) -> anyhow::Result<Vec<u8>> {
const MAX_DIM: u32 = 1280;
let img = image::load_from_memory(bytes)?;
let img = if img.width() > MAX_DIM || img.height() > MAX_DIM {
img.resize(MAX_DIM, MAX_DIM, image::imageops::FilterType::Lanczos3)
} else {
img
};
let mut out = Vec::new();
img.write_to(&mut std::io::Cursor::new(&mut out), image::ImageFormat::Jpeg)?;
Ok(out)
}

View File

@@ -0,0 +1,89 @@
use std::path::{Path, PathBuf};
use anyhow::Result;
#[derive(Debug, Clone, PartialEq)]
pub enum DirKind {
Gallery,
Chunk,
Root,
Unknown,
}
/// A gallery directory contains `index.json` and a `tn/` subdirectory.
pub fn is_gallery(path: &Path) -> bool {
path.is_dir()
&& path.join("index.json").is_file()
&& path.join("tn").is_dir()
}
/// A chunk directory contains only gallery subdirectories.
pub fn is_chunk(path: &Path) -> bool {
if !path.is_dir() {
return false;
}
let entries = match std::fs::read_dir(path) {
Ok(e) => e,
Err(_) => return false,
};
let children: Vec<PathBuf> = entries
.filter_map(|e| e.ok().map(|e| e.path()))
.filter(|p| p.is_dir())
.collect();
!children.is_empty() && children.iter().all(|p| is_gallery(p))
}
/// A root directory contains only chunk subdirectories.
pub fn is_root(path: &Path) -> bool {
if !path.is_dir() {
return false;
}
let entries = match std::fs::read_dir(path) {
Ok(e) => e,
Err(_) => return false,
};
let children: Vec<PathBuf> = entries
.filter_map(|e| e.ok().map(|e| e.path()))
.filter(|p| p.is_dir())
.collect();
!children.is_empty() && children.iter().all(|p| is_chunk(p))
}
pub fn classify_directory(path: &Path) -> DirKind {
if is_gallery(path) {
DirKind::Gallery
} else if is_chunk(path) {
DirKind::Chunk
} else if is_root(path) {
DirKind::Root
} else {
DirKind::Unknown
}
}
/// Discover all gallery directories reachable from the given target paths.
pub fn discover_galleries(targets: &[PathBuf]) -> Result<Vec<PathBuf>> {
let mut galleries = Vec::new();
for target in targets {
collect_galleries(target, &mut galleries)?;
}
Ok(galleries)
}
fn collect_galleries(path: &Path, out: &mut Vec<PathBuf>) -> Result<()> {
if is_gallery(path) {
out.push(path.to_path_buf());
return Ok(());
}
if !path.is_dir() {
return Ok(());
}
let mut entries: Vec<PathBuf> = std::fs::read_dir(path)?
.filter_map(|e| e.ok().map(|e| e.path()))
.filter(|p| p.is_dir())
.collect();
entries.sort();
for child in entries {
collect_galleries(&child, out)?;
}
Ok(())
}

13
crates/rbv-ml/Cargo.toml Normal file
View File

@@ -0,0 +1,13 @@
[package]
name = "rbv-ml"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
rbv-entity = { workspace = true }
reqwest = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }

131
crates/rbv-ml/src/client.rs Normal file
View File

@@ -0,0 +1,131 @@
use thiserror::Error;
use tracing::debug;
use crate::request::{image_entries, text_entries};
use crate::response::{AnalysisResult, ParseError, RawPredictResponse, RawTextResponse};
const DEFAULT_CLIP_MODEL: &str = "ViT-B-32__openai";
const DEFAULT_FACE_MODEL: &str = "buffalo_l";
#[derive(Debug, Error)]
pub enum MlError {
#[error("HTTP error: {0}")]
Http(#[from] reqwest::Error),
#[error("HTTP {status}: {body}")]
HttpStatus { status: u16, body: String },
#[error("Parse error: {0}")]
Parse(#[from] ParseError),
#[error("Missing embedding in response")]
MissingEmbedding,
}
impl MlError {
/// Returns true if the error is transient and worth retrying.
pub fn is_retryable(&self) -> bool {
match self {
MlError::Http(e) => e.is_connect() || e.is_timeout() || e.is_request(),
MlError::HttpStatus { status, .. } => *status >= 500,
_ => false,
}
}
}
#[derive(Clone)]
pub struct MlClient {
http: reqwest::Client,
predict_url: String,
clip_model: String,
face_model: String,
}
impl MlClient {
pub fn new(base_url: &str) -> Self {
let base = base_url.trim_end_matches('/');
Self {
http: reqwest::Client::new(),
predict_url: format!("{base}/predict"),
clip_model: DEFAULT_CLIP_MODEL.to_string(),
face_model: DEFAULT_FACE_MODEL.to_string(),
}
}
pub fn with_models(mut self, clip_model: &str, face_model: &str) -> Self {
self.clip_model = clip_model.to_string();
self.face_model = face_model.to_string();
self
}
/// Submit an image for CLIP visual embedding + face detection/recognition.
pub async fn analyze_image(&self, image_bytes: &[u8]) -> Result<AnalysisResult, MlError> {
let entries = image_entries(&self.clip_model, &self.face_model);
let image_part = reqwest::multipart::Part::bytes(image_bytes.to_vec())
.file_name("image.jpg")
.mime_str("image/jpeg")
.unwrap();
let form = reqwest::multipart::Form::new()
.part("image", image_part)
.text("entries", entries);
let is_jpeg = image_bytes.starts_with(&[0xFF, 0xD8, 0xFF]);
debug!(
url = %self.predict_url,
bytes = image_bytes.len(),
is_jpeg,
"Submitting image to ML API",
);
if !is_jpeg {
tracing::warn!(
"Image does not have JPEG magic bytes; first 4: {:02X?}",
&image_bytes[..4.min(image_bytes.len())]
);
}
let resp = self.http
.post(&self.predict_url)
.multipart(form)
.send()
.await?;
if !resp.status().is_success() {
let status = resp.status().as_u16();
let body = resp.text().await.unwrap_or_default();
return Err(MlError::HttpStatus { status, body });
}
let response = resp.json::<RawPredictResponse>().await?;
Ok(response.into_analysis_result()?)
}
/// Submit a text query for CLIP text embedding.
pub async fn embed_text(&self, text: &str) -> Result<Vec<f32>, MlError> {
let entries = text_entries(&self.clip_model);
let form = reqwest::multipart::Form::new()
.text("text", text.to_string())
.text("entries", entries);
debug!(url = %self.predict_url, text, "Submitting text to ML API");
let resp = self.http
.post(&self.predict_url)
.multipart(form)
.send()
.await?;
if !resp.status().is_success() {
let status = resp.status().as_u16();
let body = resp.text().await.unwrap_or_default();
return Err(MlError::HttpStatus { status, body });
}
let response = resp.json::<RawTextResponse>().await?;
let clip_str = response.clip.ok_or(MlError::MissingEmbedding)?;
let embedding: Vec<f32> = serde_json::from_str(&clip_str)
.map_err(|e| MlError::Parse(ParseError::InvalidEmbedding(e)))?;
Ok(embedding)
}
}

6
crates/rbv-ml/src/lib.rs Normal file
View File

@@ -0,0 +1,6 @@
pub mod client;
pub mod request;
pub mod response;
pub use client::{MlClient, MlError};
pub use response::{AnalysisResult, DetectedFace};

View File

@@ -0,0 +1,37 @@
use serde_json::{json, Value};
/// Build the `entries` JSON string for a combined CLIP visual + facial recognition request.
pub(crate) fn image_entries(clip_model: &str, face_model: &str) -> String {
let entries: Value = json!({
"clip": {
"visual": {
"modelName": clip_model,
"options": {}
}
},
"facial-recognition": {
"detection": {
"modelName": face_model,
"options": {}
},
"recognition": {
"modelName": face_model,
"options": {}
}
}
});
entries.to_string()
}
/// Build the `entries` JSON string for a text CLIP embedding request.
pub(crate) fn text_entries(clip_model: &str) -> String {
let entries: Value = json!({
"clip": {
"textual": {
"modelName": clip_model,
"options": {}
}
}
});
entries.to_string()
}

View File

@@ -0,0 +1,95 @@
use serde::Deserialize;
use rbv_entity::BoundingBox;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum ParseError {
#[error("missing field: {0}")]
MissingField(&'static str),
#[error("invalid embedding: {0}")]
InvalidEmbedding(#[from] serde_json::Error),
}
/// Public result type returned to callers of MlClient.
#[derive(Debug, Clone)]
pub struct AnalysisResult {
pub clip_embedding: Vec<f32>,
pub faces: Vec<DetectedFace>,
pub image_width: i32,
pub image_height: i32,
}
#[derive(Debug, Clone)]
pub struct DetectedFace {
pub bounding_box: BoundingBox,
pub embedding: Vec<f32>,
pub detection_score: f32,
}
// ---------- wire format (private) ----------
#[derive(Deserialize)]
pub(crate) struct RawPredictResponse {
/// CLIP embedding encoded as a JSON array string, e.g. "[0.1, -0.2, ...]"
pub clip: Option<String>,
#[serde(rename = "facial-recognition")]
pub facial_recognition: Option<Vec<RawFaceDetection>>,
#[serde(rename = "imageHeight")]
pub image_height: Option<i32>,
#[serde(rename = "imageWidth")]
pub image_width: Option<i32>,
}
#[derive(Deserialize)]
pub(crate) struct RawFaceDetection {
#[serde(rename = "boundingBox")]
pub bounding_box: RawBoundingBox,
/// Face embedding encoded as a JSON array string
pub embedding: String,
pub score: f32,
}
#[derive(Deserialize)]
pub(crate) struct RawBoundingBox {
pub x1: f32,
pub y1: f32,
pub x2: f32,
pub y2: f32,
}
impl RawPredictResponse {
pub(crate) fn into_analysis_result(self) -> Result<AnalysisResult, ParseError> {
let clip_str = self.clip.ok_or(ParseError::MissingField("clip"))?;
let clip_embedding: Vec<f32> = serde_json::from_str(&clip_str)?;
let faces = self.facial_recognition.unwrap_or_default()
.into_iter()
.map(|f| {
let embedding: Vec<f32> = serde_json::from_str(&f.embedding)?;
Ok(DetectedFace {
bounding_box: BoundingBox {
x1: f.bounding_box.x1 as i32,
y1: f.bounding_box.y1 as i32,
x2: f.bounding_box.x2 as i32,
y2: f.bounding_box.y2 as i32,
},
embedding,
detection_score: f.score,
})
})
.collect::<Result<Vec<_>, serde_json::Error>>()?;
Ok(AnalysisResult {
clip_embedding,
faces,
image_width: self.image_width.unwrap_or(0),
image_height: self.image_height.unwrap_or(0),
})
}
}
/// Wire response for a text-only CLIP embedding request.
#[derive(Deserialize)]
pub(crate) struct RawTextResponse {
pub clip: Option<String>,
}

View File

@@ -0,0 +1,14 @@
[package]
name = "rbv-search"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
rbv-entity = { workspace = true }
rbv-data = { workspace = true }
rbv-ml = { workspace = true }
sqlx = { workspace = true }
anyhow = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }

View File

@@ -0,0 +1,76 @@
use anyhow::Result;
use rbv_entity::{Gallery, GalleryId, ImageId, PersonId};
use rbv_ml::MlClient;
use sqlx::{PgPool, Row};
use crate::text::search_by_text;
pub struct SearchResult {
pub image_id: ImageId,
pub gallery: Gallery,
pub score: f64,
}
pub async fn search_combined(
pool: &PgPool,
ml: &MlClient,
text: Option<&str>,
person_ids: Option<&[PersonId]>,
limit: i64,
) -> Result<Vec<SearchResult>> {
let mut scored: std::collections::HashMap<[u8; 32], f64> = std::collections::HashMap::new();
if let Some(query) = text {
let hits = search_by_text(pool, ml, query, limit * 2).await?;
for (id, score) in hits {
*scored.entry(id.0).or_insert(0.0) += score;
}
}
if let Some(persons) = person_ids {
for person_id in persons {
let images = crate::face::get_images_for_person(pool, person_id, limit * 2).await?;
for image_id in images {
*scored.entry(image_id.0).or_insert(0.0) += 1.0;
}
}
}
let mut ranked: Vec<([u8; 32], f64)> = scored.into_iter().collect();
ranked.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
ranked.truncate(limit as usize);
let mut results = Vec::new();
for (id_bytes, score) in ranked {
let image_id = ImageId(id_bytes);
if let Some((gallery_path, _filename)) = rbv_data::image::find_image_gallery_path(pool, &image_id).await? {
if let Some(gallery) = lookup_gallery_by_path(pool, &gallery_path).await? {
results.push(SearchResult { image_id, gallery, score });
}
}
}
Ok(results)
}
async fn lookup_gallery_by_path(pool: &PgPool, path: &str) -> Result<Option<Gallery>> {
let row = sqlx::query(
"SELECT id, source_id, collection, source_name, source_url, subjects, tags, path FROM galleries WHERE path = $1",
)
.bind(path)
.fetch_optional(pool)
.await?;
Ok(row.map(|r| {
let id_bytes: Vec<u8> = r.get("id");
Gallery {
id: GalleryId(id_bytes.try_into().expect("32-byte id")),
source_id: r.get::<i64, _>("source_id") as u64,
collection: r.get("collection"),
source_name: r.get("source_name"),
source_url: r.get("source_url"),
subjects: r.get("subjects"),
tags: r.get("tags"),
path: r.get::<String, _>("path").into(),
}
}))
}

View File

@@ -0,0 +1,61 @@
use anyhow::Result;
use rbv_entity::{Gallery, GalleryId, ImageId, PersonId};
use sqlx::{PgPool, Row};
pub async fn get_galleries_for_person(
pool: &PgPool,
person_id: &PersonId,
page: i64,
per_page: i64,
) -> Result<Vec<Gallery>> {
let rows = sqlx::query(
r#"
SELECT DISTINCT g.id, g.source_id, g.collection, g.source_name, g.source_url,
g.subjects, g.tags, g.path
FROM galleries g
JOIN gallery_images gi ON gi.gallery_id = g.id
JOIN face_detections fd ON fd.image_id = gi.image_id
WHERE fd.person_id = $1
ORDER BY g.collection, g.source_name
LIMIT $2 OFFSET $3
"#,
)
.bind(person_id.as_uuid())
.bind(per_page)
.bind((page - 1) * per_page)
.fetch_all(pool)
.await?;
Ok(rows.iter().map(|r| {
let id_bytes: Vec<u8> = r.get("id");
Gallery {
id: GalleryId(id_bytes.try_into().expect("32-byte id")),
source_id: r.get::<i64, _>("source_id") as u64,
collection: r.get("collection"),
source_name: r.get("source_name"),
source_url: r.get("source_url"),
subjects: r.get("subjects"),
tags: r.get("tags"),
path: r.get::<String, _>("path").into(),
}
}).collect())
}
pub async fn get_images_for_person(
pool: &PgPool,
person_id: &PersonId,
limit: i64,
) -> Result<Vec<ImageId>> {
let rows = sqlx::query(
"SELECT DISTINCT image_id FROM face_detections WHERE person_id = $1 LIMIT $2",
)
.bind(person_id.as_uuid())
.bind(limit)
.fetch_all(pool)
.await?;
Ok(rows.iter().map(|r| {
let id_bytes: Vec<u8> = r.get("image_id");
ImageId(id_bytes.try_into().expect("32-byte id"))
}).collect())
}

View File

@@ -0,0 +1,5 @@
pub mod text;
pub mod face;
pub mod combined;
pub use combined::{search_combined, SearchResult};

View File

@@ -0,0 +1,10 @@
use anyhow::Result;
use rbv_entity::ImageId;
use rbv_ml::MlClient;
use sqlx::PgPool;
pub async fn search_by_text(pool: &PgPool, ml: &MlClient, query: &str, limit: i64) -> Result<Vec<(ImageId, f64)>> {
let embedding = ml.embed_text(query).await
.map_err(|e| anyhow::anyhow!("ML error: {e}"))?;
rbv_data::clip::search_clip_nearest(pool, &embedding, limit).await
}

370
doc/plan/workspace.md Normal file
View File

@@ -0,0 +1,370 @@
# RBV Workspace Plan
## Context
Build a Rust workspace for indexing image galleries, extracting facial recognition and CLIP embeddings via an external ML API (initially immich-ml compatible), clustering faces into person identities, and serving results through an mTLS API and React web UI. The image collection is ~60GB of personal photos organised into galleries with metadata, hosted on a DL380 Gen10 with 780GB RAM and 40 Xeon cores.
## Workspace Structure
```
/home/grenade/immich-ml/
├── Cargo.toml # workspace root
├── migrations/ # sqlx postgres migrations
│ ├── 0001_extensions.sql
│ ├── 0002_galleries.sql
│ ├── 0003_images.sql
│ ├── 0004_clip.sql
│ ├── 0005_persons.sql
│ ├── 0006_faces.sql
│ ├── 0007_users.sql
│ └── 0008_vector_indexes.sql
├── crates/
│ ├── rbv-entity/ # types only, no logic
│ ├── rbv-hash/ # BLAKE3 ID generation
│ ├── rbv-data/ # postgres DAL (sqlx + pgvector)
│ ├── rbv-ml/ # ML API client (immich-ml compatible wire format)
│ ├── rbv-cluster/ # face embedding clustering (DBSCAN + connected components)
│ ├── rbv-ingest/ # directory traversal, gallery detection, pipeline
│ ├── rbv-auth/ # mTLS validation, argon2 passwords, sessions
│ ├── rbv-search/ # combined face + CLIP search queries
│ ├── rbv-cli/ # binary: `rbv` (index, cluster, migrate subcommands)
│ └── rbv-api/ # binary: axum HTTPS server over mTLS
└── ui/ # Vite + React + TypeScript
├── package.json
├── vite.config.ts
└── src/
├── i18n/en-GB.json
├── theme/{light,dark}.ts
├── api/client.ts
├── components/
└── pages/{Home,Login,People,Person,MergePerson,Gallery,Search}.tsx
```
## Crate Dependency Graph
```
rbv-entity (no deps)
rbv-hash → rbv-entity
rbv-data → rbv-entity, rbv-hash
rbv-ml → rbv-entity
rbv-cluster → rbv-entity
rbv-ingest → rbv-entity, rbv-hash, rbv-data, rbv-ml
rbv-auth → rbv-entity
rbv-search → rbv-entity, rbv-data, rbv-ml
rbv-cli → rbv-entity, rbv-hash, rbv-data, rbv-ml, rbv-ingest, rbv-cluster
rbv-api → rbv-entity, rbv-data, rbv-ml, rbv-auth, rbv-search
```
## Key Workspace Dependencies
| Dependency | Purpose |
|---|---|
| `clap` (derive) | CLI argument parsing |
| `axum` + `tower-http` | HTTP server + static files + CORS |
| `rustls` + `tokio-rustls` + `x509-parser` | mTLS |
| `sqlx` (postgres, migrate) | Database access |
| `pgvector` (sqlx feature) | vector(512) type support |
| `reqwest` (multipart, rustls-tls) | ML API HTTP client |
| `blake3` | Reproducible hash IDs |
| `argon2` | Password hashing |
| `serde` + `serde_json` | Serialization |
| `tokio` (full) | Async runtime |
| `tracing` | Logging |
| `image` | Face thumbnail cropping |
## Entity Types (rbv-entity)
```rust
pub struct GalleryId(pub [u8; 32]); // BLAKE3(collection || source_name || source_url)
pub struct ImageId(pub [u8; 32]); // BLAKE3(file content)
pub struct FaceId(pub [u8; 32]); // BLAKE3(image_id || bbox coords)
pub struct PersonId(pub uuid::Uuid); // UUIDv4, generated
pub struct Gallery { id, source_id: u64, collection, source_name, source_url, subjects: Vec<String>, tags: Vec<String>, path }
pub struct Image { id, width: Option<i32>, height: Option<i32> }
pub struct GalleryImage { gallery_id, image_id, filename, ordering: i32 }
pub struct ClipEmbedding { image_id, embedding: Vec<f32> } // 512-dim
pub struct FaceDetection { id, image_id, bounding_box: BoundingBox, score: f32, embedding: Vec<f32>, person_id: Option<PersonId> }
pub struct BoundingBox { x1: i32, y1: i32, x2: i32, y2: i32 }
pub struct Person { id, created_at }
pub struct PersonName { person_id, name: String, is_primary: bool } // lowercase alpha + hyphens
pub struct User { id: Uuid, username, password_hash }
pub struct Session { token, user_id, expires_at }
```
## BLAKE3 Hashing Strategy (rbv-hash)
| Entity | Input | Rationale |
|---|---|---|
| Gallery | `collection \0 source_name \0 source_url` | Stable identity regardless of filesystem location |
| Image | raw file bytes | Content-addressed; same file = same ID across galleries |
| Face | `image_id \|\| x1 \|\| y1 \|\| x2 \|\| y2` | Unique per detected face region |
Images use a many-to-many join (`gallery_images`) so ML work happens once per unique file.
## Database Schema (PostgreSQL + pgvector)
```sql
CREATE EXTENSION IF NOT EXISTS vector;
-- Galleries
CREATE TABLE galleries (
id BYTEA PRIMARY KEY, -- 32-byte BLAKE3
source_id BIGINT NOT NULL,
collection TEXT NOT NULL,
source_name TEXT NOT NULL,
source_url TEXT NOT NULL,
subjects TEXT[] NOT NULL DEFAULT '{}',
tags TEXT[] NOT NULL DEFAULT '{}',
path TEXT NOT NULL,
indexed_at TIMESTAMPTZ NOT NULL DEFAULT now(),
UNIQUE (collection, source_name, source_url)
);
-- Images (content-addressed)
CREATE TABLE images (
id BYTEA PRIMARY KEY, -- BLAKE3 of file content
width INT,
height INT,
indexed_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
-- Many-to-many: gallery <-> image
CREATE TABLE gallery_images (
gallery_id BYTEA NOT NULL REFERENCES galleries(id) ON DELETE CASCADE,
image_id BYTEA NOT NULL REFERENCES images(id) ON DELETE CASCADE,
filename TEXT NOT NULL,
ordering INT NOT NULL DEFAULT 0,
PRIMARY KEY (gallery_id, image_id),
UNIQUE (gallery_id, filename)
);
-- CLIP embeddings (one per unique image)
CREATE TABLE clip_embeddings (
image_id BYTEA PRIMARY KEY REFERENCES images(id) ON DELETE CASCADE,
embedding vector(512) NOT NULL
);
-- Persons + names
CREATE TABLE persons (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE TABLE person_names (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
person_id UUID NOT NULL REFERENCES persons(id) ON DELETE CASCADE,
name TEXT NOT NULL,
is_primary BOOLEAN NOT NULL DEFAULT false,
CONSTRAINT name_format CHECK (name ~ '^[a-z][a-z\-]*[a-z]$'),
UNIQUE (name) -- globally unique across all persons
);
-- Face detections
CREATE TABLE face_detections (
id BYTEA PRIMARY KEY, -- BLAKE3(image_id || bbox)
image_id BYTEA NOT NULL REFERENCES images(id) ON DELETE CASCADE,
x1 INT NOT NULL, y1 INT NOT NULL, x2 INT NOT NULL, y2 INT NOT NULL,
score REAL NOT NULL,
embedding vector(512) NOT NULL,
person_id UUID REFERENCES persons(id) ON DELETE SET NULL
);
-- Users + sessions
CREATE TABLE users (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
username TEXT NOT NULL UNIQUE,
password_hash TEXT NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE TABLE sessions (
token TEXT PRIMARY KEY,
user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
expires_at TIMESTAMPTZ NOT NULL
);
-- Vector indexes (created after initial bulk ingest for best IVFFlat quality)
CREATE INDEX idx_clip_ivfflat ON clip_embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
CREATE INDEX idx_face_ivfflat ON face_detections USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
```
## ML Client Design (rbv-ml)
Wire-compatible with immich-ml but no immich terminology in the public API.
**Terminology mapping (internal only):**
| Wire format (immich-ml) | rbv public API |
|---|---|
| `clip` / `textual` | `TextEmbeddingTask` |
| `clip` / `visual` | `VisualEmbeddingTask` |
| `facial-recognition` / `detection` | `FaceDetectionTask` |
| `facial-recognition` / `recognition` | `FaceRecognitionTask` |
| `entries` JSON string | built internally by `MlClient` |
| `modelName` | configurable via `MlClient` builder, defaults: `ViT-B-32__openai` (CLIP), `buffalo_l` (face) |
**Public API:**
```rust
pub struct MlClient { /* http client, base_url, model names */ }
impl MlClient {
pub fn new(base_url: &str) -> Self;
pub async fn analyze_image(&self, image_bytes: &[u8]) -> Result<AnalysisResult>;
pub async fn embed_text(&self, text: &str) -> Result<Vec<f32>>;
}
pub struct AnalysisResult {
pub clip_embedding: Vec<f32>, // 512-dim
pub faces: Vec<DetectedFace>,
pub image_width: i32,
pub image_height: i32,
}
pub struct DetectedFace {
pub bounding_box: BoundingBox,
pub embedding: Vec<f32>, // 512-dim
pub detection_score: f32,
}
```
Internally, `analyze_image` sends a single `/predict` request with both CLIP visual and facial-recognition entries. Response parsing uses private serde structs with `#[serde(rename)]` for the wire format.
## Directory Traversal (rbv-ingest)
Detection heuristics:
- **Gallery**: directory contains `index.json` AND `tn/` subdirectory
- **Chunk**: directory whose immediate children are all galleries
- **Root**: directory whose immediate children are all chunks
`--target` paths are classified and recursed accordingly. `--include`/`--exclude` use rsync-style glob patterns applied to image filenames.
**Pipeline flow per gallery:**
1. Read `index.json` → compute `GalleryId` → upsert gallery metadata
2. List image files (excluding `tn/`) → apply include/exclude filters
3. For each image (concurrency-limited via `tokio::sync::Semaphore`, default 4):
- Read file bytes → compute `ImageId` (BLAKE3 of content)
- Skip if `image_exists()` in DB (incremental re-indexing)
- Submit to ML API → get CLIP embedding + face detections
- Compute `FaceId` for each detection
4. Batch upsert: image, gallery_image, clip_embedding, face_detections
## CLI Subcommands (rbv-cli → binary name `rbv`)
```
rbv index --target <PATH>... --database <CONNSTR> --ml-uri <URL>
[--include <PATTERN>...] [--exclude <PATTERN>...] [--concurrency <N>]
rbv cluster --database <CONNSTR> [--threshold <FLOAT>]
rbv migrate --database <CONNSTR>
```
- `index`: runs the ingest pipeline
- `cluster`: loads all face embeddings, runs DBSCAN, assigns person IDs
- `migrate`: runs sqlx migrations
## Clustering (rbv-cluster)
Pure Rust, no FAISS dependency. Brute-force cosine distance + DBSCAN for initial implementation (feasible at this scale with 780GB RAM).
1. Load all `(FaceId, Vec<f32>)` from DB
2. Normalise embeddings to unit length
3. Build pairwise cosine similarity where `sim >= threshold` (default 0.65)
4. Run connected components (union-find) on the resulting adjacency
5. Each component = one person cluster
6. Create `persons` rows, update `face_detections.person_id`
Idempotent: existing assignments are preserved, only unassigned faces are clustered. Merging is handled separately via the API.
## API Routes (rbv-api)
```
GET /api/galleries?page=&per_page=
GET /api/galleries/random?count=
GET /api/galleries/:id
GET /api/galleries/:id/images
GET /api/images/:id
GET /api/images/:id/file
GET /api/images/:id/thumbnail
GET /api/persons?page=&per_page=
GET /api/persons/:id
GET /api/persons/:id/galleries?page=&per_page=
GET /api/persons/:id/faces?page=&per_page=
PUT /api/persons/:id/name
POST /api/persons/:id/alias
POST /api/persons/merge { target: PersonId, source: PersonId }
GET /api/faces/:id/crop serves cropped face from source image
POST /api/search { text?, person_ids?, limit }
POST /api/auth/login { username, password }
POST /api/auth/register { username, password }
POST /api/auth/logout
GET /api/auth/me
```
**mTLS handling**: configure rustls to accept any client cert at TLS layer, validate CN in axum middleware so HTTP JSON errors can be returned:
- No cert → 401 `{"error": "no_client_certificate"}`
- Expired → 403 `{"error": "certificate_expired"}`
- Bad CA → 403 `{"error": "untrusted_ca"}`
- CN not in allowlist → 403 `{"error": "cn_not_allowed", "cn": "..."}`
## UI Structure (Vite + React + TypeScript)
- **i18n**: react-i18next, initially en-GB only
- **Routing**: react-router-dom v7
- **Theme**: CSS custom properties, dark/light toggle, stored in localStorage
- **Layout**: full viewport width/height, header with nav + search + theme toggle + profile
**Routes:**
| Route | Behaviour |
|---|---|
| `/` | Random thumbnails from random galleries (1 per gallery), refreshable |
| `/login` | Login/register forms |
| `/people` | Paged grid of round face thumbnails, links to `/people/:person` and `/merge/:person` |
| `/people/:person` | Paged galleries containing `:person`. Name edit form. Alias management. Disambiguation for ambiguous names |
| `/merge/:person` | Header shows `:person`, body shows paged faces. Click face → confirm → irreversible merge |
| `/gallery/:gallery_id` | Full image viewer with prev/next (click + arrow keys, wrapping). Thumbnail strip. Related galleries strip. Round face images in header |
| `/search` | Advanced form: combine people + text contexts |
## Implementation Phases
### Phase 1: Foundation
- Workspace Cargo.toml with all members
- `rbv-entity`: all type definitions
- `rbv-hash`: BLAKE3 utilities
- `rbv-data`: pool, migrations runner, gallery/image CRUD
- SQL migrations
- `rbv-cli` with `migrate` subcommand
- **Verify**: `rbv migrate` creates schema successfully
### Phase 2: ML Client + Ingestion
- `rbv-ml`: request building, response parsing, `analyze_image`, `embed_text`
- `rbv-ingest`: traversal, index.json parsing, filtering, pipeline
- `rbv-cli` `index` subcommand
- Face/clip data access in `rbv-data`
- **Verify**: `rbv index --target /path --database ... --ml-uri http://10.3.10.108:3003` indexes galleries end-to-end
### Phase 3: Clustering
- `rbv-cluster`: DBSCAN, connected components
- Person CRUD in `rbv-data`
- `rbv-cli` `cluster` subcommand
- **Verify**: `rbv cluster` groups faces into persons
### Phase 4: Auth + API Server
- `rbv-auth`: mTLS validation, argon2, sessions
- `rbv-api`: TLS setup, all API routes, static file serving
- `rbv-search`: text + face + combined search
- **Verify**: API serves JSON over mTLS, search returns results
### Phase 5: Web UI
- Vite + React + TypeScript scaffold
- i18n, router, theme system
- All pages: Home, Login, People, Person, MergePerson, Gallery, Search
- API client with session management
- **Verify**: full UI functional against running API
### Phase 6: Polish
- IVFFlat vector index creation after bulk load
- Face thumbnail caching on disk
- Session expiry cleanup
- Error handling, logging, CI

View File

@@ -0,0 +1 @@
CREATE EXTENSION IF NOT EXISTS vector;

View File

@@ -0,0 +1,12 @@
CREATE TABLE galleries (
id BYTEA PRIMARY KEY,
source_id BIGINT NOT NULL,
collection TEXT NOT NULL,
source_name TEXT NOT NULL,
source_url TEXT NOT NULL,
subjects TEXT[] NOT NULL DEFAULT '{}',
tags TEXT[] NOT NULL DEFAULT '{}',
path TEXT NOT NULL,
indexed_at TIMESTAMPTZ NOT NULL DEFAULT now(),
UNIQUE (collection, source_name, source_url)
);

View File

@@ -0,0 +1,18 @@
CREATE TABLE images (
id BYTEA PRIMARY KEY,
width INT,
height INT,
indexed_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE TABLE gallery_images (
gallery_id BYTEA NOT NULL REFERENCES galleries(id) ON DELETE CASCADE,
image_id BYTEA NOT NULL REFERENCES images(id) ON DELETE CASCADE,
filename TEXT NOT NULL,
ordering INT NOT NULL DEFAULT 0,
PRIMARY KEY (gallery_id, image_id),
UNIQUE (gallery_id, filename)
);
CREATE INDEX idx_gi_gallery ON gallery_images(gallery_id);
CREATE INDEX idx_gi_image ON gallery_images(image_id);

4
migrations/0004_clip.sql Normal file
View File

@@ -0,0 +1,4 @@
CREATE TABLE clip_embeddings (
image_id BYTEA PRIMARY KEY REFERENCES images(id) ON DELETE CASCADE,
embedding vector(512) NOT NULL
);

View File

@@ -0,0 +1,16 @@
CREATE TABLE persons (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE TABLE person_names (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
person_id UUID NOT NULL REFERENCES persons(id) ON DELETE CASCADE,
name TEXT NOT NULL,
is_primary BOOLEAN NOT NULL DEFAULT false,
CONSTRAINT name_format CHECK (name ~ '^[a-z][a-z\-]*[a-z]$'),
UNIQUE (name)
);
CREATE INDEX idx_pn_person ON person_names(person_id);
CREATE INDEX idx_pn_name ON person_names(name);

15
migrations/0006_faces.sql Normal file
View File

@@ -0,0 +1,15 @@
CREATE TABLE face_detections (
id BYTEA PRIMARY KEY,
image_id BYTEA NOT NULL REFERENCES images(id) ON DELETE CASCADE,
x1 INT NOT NULL,
y1 INT NOT NULL,
x2 INT NOT NULL,
y2 INT NOT NULL,
score REAL NOT NULL,
embedding vector(512) NOT NULL,
person_id UUID REFERENCES persons(id) ON DELETE SET NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX idx_fd_image ON face_detections(image_id);
CREATE INDEX idx_fd_person ON face_detections(person_id);

16
migrations/0007_users.sql Normal file
View File

@@ -0,0 +1,16 @@
CREATE TABLE users (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
username TEXT NOT NULL UNIQUE,
password_hash TEXT NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE TABLE sessions (
token TEXT PRIMARY KEY,
user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
expires_at TIMESTAMPTZ NOT NULL
);
CREATE INDEX idx_sess_user ON sessions(user_id);
CREATE INDEX idx_sess_expires ON sessions(expires_at);

View File

@@ -0,0 +1,10 @@
-- IVFFlat indexes for approximate nearest-neighbour vector search.
-- Created as a separate migration to be run after the initial bulk ingest,
-- when there are enough vectors for the index to be meaningful.
-- Re-running this migration is safe (IF NOT EXISTS).
CREATE INDEX IF NOT EXISTS idx_clip_ivfflat ON clip_embeddings
USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
CREATE INDEX IF NOT EXISTS idx_face_ivfflat ON face_detections
USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);

19
script/datasets.sh Normal file
View File

@@ -0,0 +1,19 @@
sudo zfs create \
-o recordsize=1M \
-o compression=zstd \
tank/data/rbv
sudo zfs create -p \
-o recordsize=64K \
-o compression=zstd \
tank/containers/rbv
sudo zfs create \
-o recordsize=8K \
-o compression=zstd \
-o primarycache=metadata \
-o logbias=throughput \
tank/containers/rbv/db
zfs list -o name,mountpoint,used,avail,compression,recordsize -r
sudo semanage fcontext -a -t container_file_t '/tank/containers/rbv(/.*)?'
sudo semanage fcontext -a -t container_file_t '/tank/data/rbv(/.*)?'
sudo restorecon -Rv /tank/containers/rbv
sudo restorecon -Rv /tank/data/rbv

61
script/deploy.sh Executable file
View File

@@ -0,0 +1,61 @@
postgres_host=gramathea.kosherinata.internal
api_host=gramathea.kosherinata.internal
if rsync \
--archive \
--compress \
--rsync-path 'sudo rsync' \
--chown root:root \
asset/quadlet/postgres.container \
${postgres_host}:/etc/containers/systemd/rbv-postgres.container \
&& rsync \
--archive \
--compress \
--rsync-path 'sudo rsync' \
--chown root:root \
--chmod 644 \
asset/quadlet/.env \
${postgres_host}:/tank/containers/rbv/.env \
&& ssh ${postgres_host} sudo systemctl daemon-reload \
&& ssh ${postgres_host} sudo systemctl restart rbv-postgres; then
echo "postgres quadlet deployed successfully"
else
echo "Failed to deploy postgres quadlet"
exit 1
fi
cargo build --release
postgres_password=$(grep POSTGRES_PASSWORD asset/quadlet/.env | cut -d '=' -f 2)
ssh ${api_host} 'systemctl is-active rbv.service && sudo systemctl stop rbv.service'
if rsync \
--archive \
--compress \
--rsync-path 'sudo rsync' \
--chown root:root \
--chmod '+x' \
target/release/rbv \
${api_host}:/usr/local/bin/rbv \
&& rsync \
--archive \
--compress \
--rsync-path 'sudo rsync' \
--chown root:root \
--chmod '+x' \
target/release/rbv-api \
${api_host}:/usr/local/bin/rbv-api \
&& rsync \
--archive \
--compress \
--rsync-path 'sudo rsync' \
--chown root:root \
asset/systemd/rbv.service \
${api_host}:/etc/systemd/system/rbv.service \
&& ssh ${api_host} sudo sed -i -e "s/password/${postgres_password}/" /etc/systemd/system/rbv.service \
&& ssh ${api_host} sudo systemctl daemon-reload \
&& ssh ${api_host} sudo systemctl start rbv.service; then
echo "rbv deployed successfully"
else
echo "Failed to deploy rbv"
exit 1
fi

18
ui/.gitignore vendored Normal file
View File

@@ -0,0 +1,18 @@
# Dependencies
node_modules/
# Build output
dist/
*.tsbuildinfo
# Environment overrides
.env.local
.env.development.local
.env.test.local
.env.production.local
# Test coverage
coverage/
# Vite cache
.vite/

12
ui/index.html Normal file
View File

@@ -0,0 +1,12 @@
<!doctype html>
<html lang="en-GB">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>rbv</title>
</head>
<body>
<div id="root"></div>
<script type="module" src="/src/main.tsx"></script>
</body>
</html>

1884
ui/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

25
ui/package.json Normal file
View File

@@ -0,0 +1,25 @@
{
"name": "rbv-ui",
"private": true,
"version": "0.1.0",
"type": "module",
"scripts": {
"dev": "vite",
"build": "tsc -b && vite build",
"preview": "vite preview"
},
"dependencies": {
"i18next": "^23.11.5",
"react": "^18.3.1",
"react-dom": "^18.3.1",
"react-i18next": "^14.1.2",
"react-router-dom": "^6.23.1"
},
"devDependencies": {
"@types/react": "^18.3.3",
"@types/react-dom": "^18.3.0",
"@vitejs/plugin-react": "^4.3.1",
"typescript": "^5.4.5",
"vite": "^5.3.1"
}
}

44
ui/src/App.tsx Normal file
View File

@@ -0,0 +1,44 @@
import { useState, useEffect } from 'react'
import { BrowserRouter, Routes, Route, Navigate } from 'react-router-dom'
import { useTheme } from './theme/useTheme'
import { Header } from './components/Header'
import { Login } from './pages/Login'
import { Home } from './pages/Home'
import { People } from './pages/People'
import { PersonPage } from './pages/Person'
import { MergePerson } from './pages/MergePerson'
import { Gallery } from './pages/Gallery'
import { Search } from './pages/Search'
import { me } from './api/client'
import type { User } from './api/client'
export function App() {
const { theme, toggle } = useTheme()
const [user, setUser] = useState<User | null | undefined>(undefined) // undefined = checking
useEffect(() => {
me().then(setUser).catch(() => setUser(null))
}, [])
if (user === undefined) return null // waiting for session check
const authed = user !== null
return (
<BrowserRouter>
<Header user={user} theme={theme} onToggleTheme={toggle} onLogout={() => setUser(null)} />
<Routes>
<Route path="/login" element={
authed ? <Navigate to="/" replace /> : <Login onLogin={setUser} />
} />
<Route path="/" element={authed ? <Home /> : <Navigate to="/login" replace />} />
<Route path="/people" element={authed ? <People /> : <Navigate to="/login" replace />} />
<Route path="/people/:id" element={authed ? <PersonPage /> : <Navigate to="/login" replace />} />
<Route path="/merge/:id" element={authed ? <MergePerson /> : <Navigate to="/login" replace />} />
<Route path="/gallery/:id" element={authed ? <Gallery /> : <Navigate to="/login" replace />} />
<Route path="/search" element={authed ? <Search /> : <Navigate to="/login" replace />} />
<Route path="*" element={<Navigate to="/" replace />} />
</Routes>
</BrowserRouter>
)
}

129
ui/src/api/client.ts Normal file
View File

@@ -0,0 +1,129 @@
const BASE = '/api'
export class ApiError extends Error {
constructor(public status: number, public code: string, message: string) {
super(message)
}
}
async function request<T>(path: string, init?: RequestInit): Promise<T> {
const res = await fetch(`${BASE}${path}`, {
credentials: 'include',
headers: { 'Content-Type': 'application/json', ...init?.headers },
...init,
})
if (!res.ok) {
const body = await res.json().catch(() => ({}))
throw new ApiError(res.status, body.error ?? 'unknown', body.message ?? res.statusText)
}
if (res.status === 204 || res.headers.get('content-length') === '0') {
return undefined as T
}
return res.json() as Promise<T>
}
// ── Auth ─────────────────────────────────────────────────────────────────────
export interface User { id: string; username: string }
export const login = (username: string, password: string) =>
request<User>('/auth/login', { method: 'POST', body: JSON.stringify({ username, password }) })
export const register = (username: string, password: string) =>
request<User>('/auth/register', { method: 'POST', body: JSON.stringify({ username, password }) })
export const logout = () => request<void>('/auth/logout', { method: 'POST' })
export const me = () => request<User>('/auth/me')
// ── Galleries ─────────────────────────────────────────────────────────────────
export interface Gallery {
id: string
collection: string
source_name: string
source_url: string
subjects: string[]
tags: string[]
path: string
}
export interface GalleryImage {
image_id: string
filename: string
ordering: number
}
export const listGalleries = (page = 1, perPage = 24) =>
request<Gallery[]>(`/galleries?page=${page}&per_page=${perPage}`)
export const randomGalleries = (count = 12) =>
request<Gallery[]>(`/galleries/random?count=${count}`)
export const getGallery = (id: string) => request<Gallery>(`/galleries/${id}`)
export const getGalleryImages = (id: string) => request<GalleryImage[]>(`/galleries/${id}/images`)
// ── Images ────────────────────────────────────────────────────────────────────
export interface ImageMeta {
id: string
width: number | null
height: number | null
}
export const getImageMeta = (id: string) => request<ImageMeta>(`/images/${id}`)
export const imageFileUrl = (id: string) => `${BASE}/images/${id}/file`
export const thumbnailUrl = (id: string) => `${BASE}/images/${id}/thumbnail`
export const faceCropUrl = (id: string) => `${BASE}/faces/${id}/crop`
// ── Persons ───────────────────────────────────────────────────────────────────
export interface Person {
id: string
primary_name: string | null
names: string[]
created_at: string
}
export interface FaceRef {
id: string
image_id: string
x1: number
y1: number
x2: number
y2: number
score: number
}
export const listPersons = (page = 1, perPage = 48) =>
request<Person[]>(`/persons?page=${page}&per_page=${perPage}`)
export const getPerson = (id: string) => request<Person>(`/persons/${id}`)
export const setPersonName = (id: string, name: string) =>
request<void>(`/persons/${id}/name`, { method: 'PUT', body: JSON.stringify({ name }) })
export const addPersonAlias = (id: string, name: string) =>
request<void>(`/persons/${id}/alias`, { method: 'POST', body: JSON.stringify({ name }) })
export const getPersonFaces = (id: string, page = 1, perPage = 48) =>
request<FaceRef[]>(`/persons/${id}/faces?page=${page}&per_page=${perPage}`)
export const getPersonGalleries = (id: string, page = 1, perPage = 24) =>
request<Gallery[]>(`/persons/${id}/galleries?page=${page}&per_page=${perPage}`)
export const mergePersons = (target: string, source: string) =>
request<void>('/persons/merge', { method: 'POST', body: JSON.stringify({ target, source }) })
// ── Search ────────────────────────────────────────────────────────────────────
export interface SearchResult {
gallery_id: string
gallery: Gallery
score: number
}
export const search = (params: { text?: string; person_ids?: string[]; limit?: number }) =>
request<SearchResult[]>('/search', { method: 'POST', body: JSON.stringify(params) })

View File

@@ -0,0 +1,58 @@
.header {
position: sticky;
top: 0;
z-index: 100;
height: var(--header-height);
display: flex;
align-items: center;
justify-content: space-between;
padding: 0 1.25rem;
background: var(--colour-bg);
border-bottom: 1px solid var(--colour-border);
box-shadow: var(--shadow);
}
.header-nav {
display: flex;
align-items: center;
gap: 1.25rem;
}
.header-brand {
font-weight: 700;
font-size: 1.2rem;
color: var(--colour-accent);
text-decoration: none;
letter-spacing: 0.05em;
}
.nav-link {
color: var(--colour-text-muted);
text-decoration: none;
font-size: 0.9rem;
}
.nav-link:hover {
color: var(--colour-text);
}
.header-actions {
display: flex;
align-items: center;
gap: 0.5rem;
}
.icon-btn {
background: none;
border: none;
cursor: pointer;
font-size: 1.1rem;
color: var(--colour-text-muted);
padding: 0.25rem 0.5rem;
border-radius: var(--radius);
}
.icon-btn:hover {
color: var(--colour-text);
background: var(--colour-surface);
}

View File

@@ -0,0 +1,44 @@
import { Link, useNavigate } from 'react-router-dom'
import { useTranslation } from 'react-i18next'
import { logout } from '../api/client'
import type { User } from '../api/client'
import './Header.css'
interface Props {
user: User | null
theme: 'light' | 'dark'
onToggleTheme: () => void
onLogout: () => void
}
export function Header({ user, theme, onToggleTheme, onLogout }: Props) {
const { t } = useTranslation()
const navigate = useNavigate()
const handleLogout = async () => {
await logout().catch(() => null)
onLogout()
navigate('/login')
}
return (
<header className="header">
<nav className="header-nav">
<Link to="/" className="header-brand">rbv</Link>
<Link to="/" className="nav-link">{t('nav.home')}</Link>
<Link to="/people" className="nav-link">{t('nav.people')}</Link>
<Link to="/search" className="nav-link">{t('nav.search')}</Link>
</nav>
<div className="header-actions">
<button className="icon-btn" onClick={onToggleTheme} title={t('theme.toggle')}>
{theme === 'dark' ? '☀' : '☾'}
</button>
{user && (
<button className="icon-btn" onClick={handleLogout} title={t('nav.logout')}>
</button>
)}
</div>
</header>
)
}

View File

@@ -0,0 +1,12 @@
.pager {
display: flex;
align-items: center;
justify-content: center;
gap: 1rem;
padding: 2rem 0;
}
.pager-page {
color: var(--colour-text-muted);
font-size: 0.875rem;
}

View File

@@ -0,0 +1,18 @@
import './Pager.css'
interface Props {
page: number
hasMore: boolean
onPrev: () => void
onNext: () => void
}
export function Pager({ page, hasMore, onPrev, onNext }: Props) {
return (
<div className="pager">
<button className="btn-secondary" onClick={onPrev} disabled={page <= 1}> Previous</button>
<span className="pager-page">Page {page}</span>
<button className="btn-secondary" onClick={onNext} disabled={!hasMore}>Next </button>
</div>
)
}

61
ui/src/i18n/en-GB.json Normal file
View File

@@ -0,0 +1,61 @@
{
"nav": {
"home": "Home",
"people": "People",
"search": "Search",
"profile": "Profile",
"logout": "Sign out"
},
"theme": {
"toggle": "Toggle theme"
},
"home": {
"refresh": "Refresh"
},
"login": {
"title": "Sign in",
"username": "Username",
"password": "Password",
"submit": "Sign in",
"register": "Create account",
"registerTitle": "Create account",
"registerSubmit": "Create account",
"switchToLogin": "Already have an account? Sign in"
},
"people": {
"title": "People",
"merge": "Merge"
},
"person": {
"editName": "Edit name",
"addAlias": "Add alias",
"aliases": "Also known as",
"galleries": "Galleries",
"faces": "Faces",
"save": "Save",
"cancel": "Cancel"
},
"merge": {
"title": "Merge into {{name}}",
"instruction": "Click a face to merge that person into {{name}}. This cannot be undone.",
"confirm": "Merge this person into {{name}}?",
"confirmYes": "Merge",
"confirmNo": "Cancel"
},
"gallery": {
"related": "Related galleries"
},
"search": {
"title": "Search",
"textLabel": "Describe an image",
"textPlaceholder": "yellow sofa, birthday cake...",
"peopleLabel": "Include people",
"submit": "Search",
"noResults": "No results found."
},
"error": {
"notFound": "Not found",
"loading": "Loading...",
"retry": "Retry"
}
}

14
ui/src/i18n/index.ts Normal file
View File

@@ -0,0 +1,14 @@
import i18n from 'i18next'
import { initReactI18next } from 'react-i18next'
import enGB from './en-GB.json'
i18n
.use(initReactI18next)
.init({
resources: { 'en-GB': { translation: enGB } },
lng: 'en-GB',
fallbackLng: 'en-GB',
interpolation: { escapeValue: false },
})
export default i18n

186
ui/src/index.css Normal file
View File

@@ -0,0 +1,186 @@
@import './theme/variables.css';
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
html, body, #root {
height: 100%;
font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
font-size: 16px;
background: var(--colour-bg);
color: var(--colour-text);
}
a { color: inherit; }
img { display: block; max-width: 100%; }
button { font-family: inherit; }
.btn {
display: inline-flex;
align-items: center;
gap: 0.4rem;
padding: 0.5rem 1.1rem;
border: none;
border-radius: var(--radius);
font-size: 0.9rem;
cursor: pointer;
background: var(--colour-accent);
color: #fff;
transition: background 0.15s;
}
.btn:hover { background: var(--colour-accent-hover); }
.btn:disabled { opacity: 0.5; cursor: not-allowed; }
.btn-secondary {
display: inline-flex;
align-items: center;
gap: 0.4rem;
padding: 0.4rem 0.9rem;
border: 1px solid var(--colour-border);
border-radius: var(--radius);
font-size: 0.875rem;
cursor: pointer;
background: var(--colour-surface);
color: var(--colour-text);
transition: background 0.15s;
}
.btn-secondary:hover { background: var(--colour-border); }
.btn-secondary:disabled { opacity: 0.5; cursor: not-allowed; }
.btn-danger {
background: var(--colour-danger);
color: #fff;
border: none;
padding: 0.5rem 1.1rem;
border-radius: var(--radius);
font-size: 0.9rem;
cursor: pointer;
transition: background 0.15s;
}
.btn-danger:hover { background: var(--colour-danger-hover); }
input[type='text'], input[type='password'] {
width: 100%;
padding: 0.5rem 0.75rem;
border: 1px solid var(--colour-border);
border-radius: var(--radius);
background: var(--colour-bg-alt);
color: var(--colour-text);
font-size: 0.9rem;
}
input:focus {
outline: 2px solid var(--colour-accent);
outline-offset: -1px;
border-color: transparent;
}
.page {
padding: 1.5rem 1.25rem;
max-width: 1400px;
margin: 0 auto;
}
.page-title {
font-size: 1.4rem;
font-weight: 700;
margin-bottom: 1.25rem;
}
.loading {
padding: 2rem;
text-align: center;
color: var(--colour-text-muted);
}
.error-msg {
padding: 1rem;
border-radius: var(--radius);
background: rgba(239,68,68,0.1);
color: var(--colour-danger);
margin-bottom: 1rem;
}
.face-avatar {
width: 60px;
height: 60px;
border-radius: var(--radius-round);
object-fit: cover;
background: var(--colour-surface);
}
.grid-4 {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
gap: 1rem;
}
.card {
border: 1px solid var(--colour-border);
border-radius: var(--radius);
overflow: hidden;
background: var(--colour-bg-alt);
transition: box-shadow 0.15s;
}
.card:hover { box-shadow: 0 4px 12px rgba(0,0,0,0.15); }
.card-thumb {
width: 100%;
aspect-ratio: 4/3;
object-fit: cover;
background: var(--colour-surface);
}
.card-label {
padding: 0.5rem 0.75rem;
font-size: 0.8rem;
color: var(--colour-text-muted);
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
.tag {
display: inline-block;
padding: 0.2rem 0.5rem;
border-radius: var(--radius-round);
background: var(--colour-surface);
font-size: 0.75rem;
color: var(--colour-text-muted);
}
.modal-backdrop {
position: fixed;
inset: 0;
background: rgba(0,0,0,0.5);
display: flex;
align-items: center;
justify-content: center;
z-index: 200;
}
.modal {
background: var(--colour-bg);
border-radius: var(--radius);
padding: 1.5rem;
max-width: 380px;
width: 90%;
box-shadow: 0 8px 32px rgba(0,0,0,0.3);
}
.modal-title {
font-size: 1rem;
font-weight: 600;
margin-bottom: 1rem;
}
.modal-actions {
display: flex;
gap: 0.75rem;
margin-top: 1.25rem;
justify-content: flex-end;
}

11
ui/src/main.tsx Normal file
View File

@@ -0,0 +1,11 @@
import { StrictMode } from 'react'
import { createRoot } from 'react-dom/client'
import './i18n'
import './index.css'
import { App } from './App'
createRoot(document.getElementById('root')!).render(
<StrictMode>
<App />
</StrictMode>
)

105
ui/src/pages/Gallery.css Normal file
View File

@@ -0,0 +1,105 @@
.gallery-page {
display: flex;
flex-direction: column;
height: calc(100vh - var(--header-height));
overflow: hidden;
}
.gallery-header {
display: flex;
align-items: center;
gap: 1rem;
padding: 0.75rem 1.25rem;
border-bottom: 1px solid var(--colour-border);
background: var(--colour-bg-alt);
}
.gallery-title {
font-weight: 600;
font-size: 0.95rem;
flex: 1;
}
.gallery-faces {
display: flex;
gap: 0.5rem;
}
.gallery-viewer {
flex: 1;
display: flex;
align-items: center;
justify-content: center;
background: #000;
position: relative;
overflow: hidden;
min-height: 0;
}
.gallery-main-img {
max-width: 100%;
max-height: 100%;
object-fit: contain;
}
.nav-btn {
position: absolute;
top: 50%;
transform: translateY(-50%);
background: rgba(0,0,0,0.4);
border: none;
color: #fff;
font-size: 2.5rem;
line-height: 1;
padding: 0.5rem 0.75rem;
cursor: pointer;
border-radius: var(--radius);
z-index: 10;
transition: background 0.15s;
}
.nav-btn:hover { background: rgba(0,0,0,0.7); }
.nav-btn:disabled { opacity: 0.2; cursor: not-allowed; }
.nav-prev { left: 0.5rem; }
.nav-next { right: 0.5rem; }
.thumb-strip {
display: flex;
gap: 4px;
overflow-x: auto;
padding: 6px 8px;
background: var(--colour-bg-alt);
border-top: 1px solid var(--colour-border);
flex-shrink: 0;
}
.thumb-btn {
flex-shrink: 0;
border: 2px solid transparent;
border-radius: 4px;
background: none;
cursor: pointer;
padding: 0;
overflow: hidden;
}
.thumb-btn.active {
border-color: var(--colour-accent);
}
.thumb-btn img {
width: 64px;
height: 48px;
object-fit: cover;
display: block;
}
.gallery-tags {
display: flex;
flex-wrap: wrap;
gap: 0.4rem;
padding: 0.5rem 1rem;
background: var(--colour-bg);
border-top: 1px solid var(--colour-border);
flex-shrink: 0;
}

86
ui/src/pages/Gallery.tsx Normal file
View File

@@ -0,0 +1,86 @@
import { useEffect, useState, useCallback } from 'react'
import { useParams } from 'react-router-dom'
import { useTranslation } from 'react-i18next'
import {
getGallery, getGalleryImages, faceCropUrl,
imageFileUrl, thumbnailUrl,
} from '../api/client'
import type { Gallery as GalleryType, GalleryImage, FaceRef } from '../api/client'
import './Gallery.css'
export function Gallery() {
const { id } = useParams<{ id: string }>()
const { t } = useTranslation()
const [gallery, setGallery] = useState<GalleryType | null>(null)
const [images, setImages] = useState<GalleryImage[]>([])
const [current, setCurrent] = useState(0)
const [faces, setFaces] = useState<FaceRef[]>([])
useEffect(() => {
if (!id) return
getGallery(id).then(setGallery)
getGalleryImages(id).then(imgs => { setImages(imgs); setCurrent(0) })
}, [id])
// Load faces appearing in this gallery's images (first 10 distinct persons)
useEffect(() => {
// Placeholder — face-per-gallery query not in API; skip for now
setFaces([])
}, [id])
const prev = useCallback(() => setCurrent(c => (c - 1 + images.length) % images.length), [images.length])
const next = useCallback(() => setCurrent(c => (c + 1) % images.length), [images.length])
useEffect(() => {
const handler = (e: KeyboardEvent) => {
if (e.key === 'ArrowLeft') prev()
else if (e.key === 'ArrowRight') next()
}
window.addEventListener('keydown', handler)
return () => window.removeEventListener('keydown', handler)
}, [prev, next])
if (!gallery) return <div className="loading">{t('error.loading')}</div>
const currentImage = images[current]
return (
<div className="gallery-page">
<div className="gallery-header">
<div className="gallery-title">{gallery.source_name || gallery.collection}</div>
<div className="gallery-faces">
{faces.map(f => (
<img key={f.id} className="face-avatar" src={faceCropUrl(f.id)} alt="" loading="lazy" />
))}
</div>
</div>
<div className="gallery-viewer">
<button className="nav-btn nav-prev" onClick={prev} disabled={images.length < 2}></button>
{currentImage && (
<img
key={currentImage.image_id}
className="gallery-main-img"
src={imageFileUrl(currentImage.image_id)}
alt={currentImage.filename}
/>
)}
<button className="nav-btn nav-next" onClick={next} disabled={images.length < 2}></button>
</div>
<div className="thumb-strip">
{images.map((img, i) => (
<button key={img.image_id} className={`thumb-btn${i === current ? ' active' : ''}`} onClick={() => setCurrent(i)}>
<img src={thumbnailUrl(img.image_id)} alt={img.filename} loading="lazy" />
</button>
))}
</div>
{gallery.subjects.length > 0 && (
<div className="gallery-tags">
{gallery.subjects.map(s => <span key={s} className="tag">{s}</span>)}
{gallery.tags.map(tag => <span key={tag} className="tag">{tag}</span>)}
</div>
)}
</div>
)
}

60
ui/src/pages/Home.tsx Normal file
View File

@@ -0,0 +1,60 @@
import { useEffect, useState, useCallback } from 'react'
import { Link } from 'react-router-dom'
import { useTranslation } from 'react-i18next'
import { randomGalleries, getGalleryImages, thumbnailUrl } from '../api/client'
import type { Gallery, GalleryImage } from '../api/client'
interface GalleryThumb {
gallery: Gallery
image: GalleryImage | null
}
export function Home() {
const { t } = useTranslation()
const [items, setItems] = useState<GalleryThumb[]>([])
const [loading, setLoading] = useState(true)
const load = useCallback(async () => {
setLoading(true)
try {
const galleries = await randomGalleries(12)
const thumbs = await Promise.all(
galleries.map(async (gallery): Promise<GalleryThumb> => {
const images = await getGalleryImages(gallery.id).catch(() => [] as GalleryImage[])
return { gallery, image: images[0] ?? null }
})
)
setItems(thumbs)
} finally {
setLoading(false)
}
}, [])
useEffect(() => { load() }, [load])
return (
<div className="page">
<div className="home-toolbar">
<button className="btn-secondary" onClick={load} disabled={loading}>
{t('home.refresh')}
</button>
</div>
{loading
? <div className="loading">{t('error.loading')}</div>
: (
<div className="grid-4" style={{ marginTop: '1rem' }}>
{items.map(({ gallery, image }) => (
<Link key={gallery.id} to={`/gallery/${gallery.id}`} className="card" style={{ textDecoration: 'none' }}>
{image
? <img className="card-thumb" src={thumbnailUrl(image.image_id)} alt={image.filename} loading="lazy" />
: <div className="card-thumb" />
}
<div className="card-label">{gallery.source_name || gallery.collection}</div>
</Link>
))}
</div>
)
}
</div>
)
}

36
ui/src/pages/Login.css Normal file
View File

@@ -0,0 +1,36 @@
.login-wrap {
min-height: calc(100vh - var(--header-height));
display: flex;
align-items: center;
justify-content: center;
padding: 2rem;
}
.login-card {
background: var(--colour-bg-alt);
border: 1px solid var(--colour-border);
border-radius: var(--radius);
padding: 2rem;
width: 100%;
max-width: 360px;
display: flex;
flex-direction: column;
gap: 0.75rem;
}
.login-title {
font-size: 1.4rem;
font-weight: 700;
margin-bottom: 0.25rem;
}
.field-label {
font-size: 0.85rem;
color: var(--colour-text-muted);
margin-top: 0.25rem;
}
.login-switch {
text-align: center;
justify-content: center;
}

74
ui/src/pages/Login.tsx Normal file
View File

@@ -0,0 +1,74 @@
import { useState } from 'react'
import { useNavigate } from 'react-router-dom'
import { useTranslation } from 'react-i18next'
import { login, register } from '../api/client'
import type { User } from '../api/client'
import './Login.css'
interface Props {
onLogin: (user: User) => void
}
export function Login({ onLogin }: Props) {
const { t } = useTranslation()
const navigate = useNavigate()
const [mode, setMode] = useState<'login' | 'register'>('login')
const [username, setUsername] = useState('')
const [password, setPassword] = useState('')
const [error, setError] = useState('')
const [loading, setLoading] = useState(false)
const submit = async (e: React.FormEvent) => {
e.preventDefault()
setError('')
setLoading(true)
try {
const user = mode === 'login'
? await login(username, password)
: await register(username, password)
onLogin(user)
navigate('/')
} catch (err: unknown) {
setError(err instanceof Error ? err.message : 'Unknown error')
} finally {
setLoading(false)
}
}
return (
<div className="login-wrap">
<form className="login-card" onSubmit={submit}>
<h1 className="login-title">
{mode === 'login' ? t('login.title') : t('login.registerTitle')}
</h1>
{error && <div className="error-msg">{error}</div>}
<label className="field-label">{t('login.username')}</label>
<input
type="text"
value={username}
onChange={e => setUsername(e.target.value)}
autoComplete="username"
required
/>
<label className="field-label">{t('login.password')}</label>
<input
type="password"
value={password}
onChange={e => setPassword(e.target.value)}
autoComplete={mode === 'login' ? 'current-password' : 'new-password'}
required
/>
<button className="btn" type="submit" disabled={loading}>
{mode === 'login' ? t('login.submit') : t('login.registerSubmit')}
</button>
<button
type="button"
className="btn-secondary login-switch"
onClick={() => setMode(m => m === 'login' ? 'register' : 'login')}
>
{mode === 'login' ? t('login.register') : t('login.switchToLogin')}
</button>
</form>
</div>
)
}

Some files were not shown because too many files have changed in this diff Show More