Initial nix-ota implementation
Self-hostable OTA update system for NixOS fleets: a control server, device agent, publisher CLI, and NixOS modules that ship prebuilt system closures from a binary cache to devices that don't have the flake. - crates/common: signed manifest types (ed25519), store-path validator - crates/server: axum + sqlite + HTMX dashboard, channel/device API - crates/agent: poll, verify signature + revision, nix copy, switch, health check, magic-rollback on failure - crates/publisher: keygen + sign + publish CLI for operators/CI - nix/modules: NixOS modules for server and agent - nix/tests/ota.nix: end-to-end VM test exercising publish A -> B -> broken C -> rollback to B (passes) The control server never holds the signing key; manifests are signed offline and verified against a pinned public key on each device.
This commit is contained in:
commit
42b2ce4d1d
19 changed files with 4745 additions and 0 deletions
316
crates/server/src/main.rs
Normal file
316
crates/server/src/main.rs
Normal file
|
|
@ -0,0 +1,316 @@
|
|||
//! `nix-ota-server` — control plane.
|
||||
//!
|
||||
//! Single static binary that:
|
||||
//! * serves the REST API consumed by the agent and publisher,
|
||||
//! * persists channel/device state in SQLite,
|
||||
//! * renders an HTMX-based dashboard from embedded templates.
|
||||
//!
|
||||
//! The server never holds the manifest signing key. Operators sign
|
||||
//! manifests on a workstation (or in CI with a sealed secret) and POST
|
||||
//! the already-signed manifest to `/channels/:name/publish`. The server's
|
||||
//! job is purely to fan signed manifests out to devices and to record
|
||||
//! check-ins, so a server compromise cannot push arbitrary closures.
|
||||
|
||||
use anyhow::Result;
|
||||
use axum::{
|
||||
extract::{Path, State},
|
||||
http::{header, HeaderMap, StatusCode},
|
||||
response::IntoResponse,
|
||||
routing::{get, post},
|
||||
Json, Router,
|
||||
};
|
||||
use clap::Parser;
|
||||
use nix_ota_common as common;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sqlx::{sqlite::SqlitePoolOptions, SqlitePool};
|
||||
use std::{net::SocketAddr, sync::Arc};
|
||||
use tower_http::trace::TraceLayer;
|
||||
|
||||
mod ui;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(version, about = "nix-ota control server")]
|
||||
struct Args {
|
||||
/// Listen address.
|
||||
#[arg(long, env = "NIX_OTA_LISTEN", default_value = "0.0.0.0:8080")]
|
||||
listen: SocketAddr,
|
||||
/// Path to SQLite database.
|
||||
#[arg(long, env = "NIX_OTA_DB", default_value = "nix-ota.db")]
|
||||
db: String,
|
||||
/// Bearer token required for /publish endpoints. If unset, a random
|
||||
/// token is generated and printed at startup.
|
||||
#[arg(long, env = "NIX_OTA_PUBLISH_TOKEN")]
|
||||
publish_token: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AppState {
|
||||
pub db: SqlitePool,
|
||||
pub publish_token: Arc<String>,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(
|
||||
tracing_subscriber::EnvFilter::try_from_default_env()
|
||||
.unwrap_or_else(|_| "info,sqlx=warn".into()),
|
||||
)
|
||||
.init();
|
||||
|
||||
let args = Args::parse();
|
||||
let publish_token = args.publish_token.clone().unwrap_or_else(|| {
|
||||
let t = ulid::Ulid::new().to_string();
|
||||
tracing::warn!("no --publish-token set; generated ephemeral token: {t}");
|
||||
t
|
||||
});
|
||||
|
||||
let db_url = format!("sqlite://{}?mode=rwc", args.db);
|
||||
let db = SqlitePoolOptions::new()
|
||||
.max_connections(8)
|
||||
.connect(&db_url)
|
||||
.await?;
|
||||
migrate(&db).await?;
|
||||
|
||||
let state = AppState {
|
||||
db,
|
||||
publish_token: Arc::new(publish_token),
|
||||
};
|
||||
|
||||
let app = Router::new()
|
||||
.route("/healthz", get(|| async { "ok" }))
|
||||
.route("/channels/:name/current", get(get_current))
|
||||
.route("/channels/:name/publish", post(publish))
|
||||
.route("/devices/:id/checkin", post(checkin))
|
||||
.route("/", get(ui::index))
|
||||
.route("/ui/channels/:name", get(ui::channel_detail))
|
||||
.route("/ui/devices/:id", get(ui::device_detail))
|
||||
.with_state(state)
|
||||
.layer(TraceLayer::new_for_http());
|
||||
|
||||
tracing::info!("listening on {}", args.listen);
|
||||
let listener = tokio::net::TcpListener::bind(args.listen).await?;
|
||||
axum::serve(listener, app).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn migrate(db: &SqlitePool) -> Result<()> {
|
||||
sqlx::query(
|
||||
r#"
|
||||
CREATE TABLE IF NOT EXISTS channels (
|
||||
name TEXT PRIMARY KEY,
|
||||
current_manifest TEXT,
|
||||
revision INTEGER NOT NULL DEFAULT 0,
|
||||
updated_at INTEGER NOT NULL DEFAULT 0
|
||||
);
|
||||
"#,
|
||||
)
|
||||
.execute(db)
|
||||
.await?;
|
||||
sqlx::query(
|
||||
r#"
|
||||
CREATE TABLE IF NOT EXISTS channel_history (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
channel TEXT NOT NULL,
|
||||
manifest TEXT NOT NULL,
|
||||
revision INTEGER NOT NULL,
|
||||
published_at INTEGER NOT NULL
|
||||
);
|
||||
"#,
|
||||
)
|
||||
.execute(db)
|
||||
.await?;
|
||||
sqlx::query(
|
||||
r#"
|
||||
CREATE TABLE IF NOT EXISTS devices (
|
||||
id TEXT PRIMARY KEY,
|
||||
channel TEXT NOT NULL,
|
||||
current_store_path TEXT,
|
||||
target_store_path TEXT,
|
||||
health TEXT NOT NULL DEFAULT 'ok',
|
||||
last_message TEXT,
|
||||
agent_version TEXT,
|
||||
last_seen INTEGER NOT NULL DEFAULT 0
|
||||
);
|
||||
"#,
|
||||
)
|
||||
.execute(db)
|
||||
.await?;
|
||||
sqlx::query(
|
||||
r#"
|
||||
CREATE TABLE IF NOT EXISTS device_history (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
device_id TEXT NOT NULL,
|
||||
store_path TEXT,
|
||||
health TEXT NOT NULL,
|
||||
message TEXT,
|
||||
at INTEGER NOT NULL
|
||||
);
|
||||
"#,
|
||||
)
|
||||
.execute(db)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ---------- API handlers ----------
|
||||
|
||||
async fn get_current(
|
||||
State(s): State<AppState>,
|
||||
Path(name): Path<String>,
|
||||
) -> Result<Json<common::Manifest>, ApiError> {
|
||||
let row: Option<(Option<String>,)> =
|
||||
sqlx::query_as("SELECT current_manifest FROM channels WHERE name = ?")
|
||||
.bind(&name)
|
||||
.fetch_optional(&s.db)
|
||||
.await?;
|
||||
let manifest_json = row.and_then(|r| r.0).ok_or(ApiError::NotFound)?;
|
||||
let manifest: common::Manifest = serde_json::from_str(&manifest_json)?;
|
||||
Ok(Json(manifest))
|
||||
}
|
||||
|
||||
async fn publish(
|
||||
State(s): State<AppState>,
|
||||
Path(name): Path<String>,
|
||||
headers: HeaderMap,
|
||||
Json(manifest): Json<common::Manifest>,
|
||||
) -> Result<Json<serde_json::Value>, ApiError> {
|
||||
require_token(&headers, &s.publish_token)?;
|
||||
if manifest.body.channel != name {
|
||||
return Err(ApiError::BadRequest("channel mismatch".into()));
|
||||
}
|
||||
common::validate_store_path(&manifest.body.store_path)
|
||||
.map_err(|e| ApiError::BadRequest(e.to_string()))?;
|
||||
// We do NOT verify the signature against any key here — the server is
|
||||
// intentionally key-agnostic. Devices verify against their pinned key.
|
||||
|
||||
let now = common::now();
|
||||
let json = serde_json::to_string(&manifest)?;
|
||||
let mut tx = s.db.begin().await?;
|
||||
// Bump revision atomically.
|
||||
let cur: Option<(i64,)> = sqlx::query_as("SELECT revision FROM channels WHERE name = ?")
|
||||
.bind(&name)
|
||||
.fetch_optional(&mut *tx)
|
||||
.await?;
|
||||
let next_rev = cur.map(|r| r.0).unwrap_or(0) + 1;
|
||||
if (manifest.body.revision as i64) != next_rev {
|
||||
return Err(ApiError::BadRequest(format!(
|
||||
"manifest revision must be {next_rev}, got {}",
|
||||
manifest.body.revision
|
||||
)));
|
||||
}
|
||||
sqlx::query(
|
||||
"INSERT INTO channels(name, current_manifest, revision, updated_at) VALUES(?,?,?,?)
|
||||
ON CONFLICT(name) DO UPDATE SET current_manifest=excluded.current_manifest,
|
||||
revision=excluded.revision, updated_at=excluded.updated_at",
|
||||
)
|
||||
.bind(&name)
|
||||
.bind(&json)
|
||||
.bind(next_rev)
|
||||
.bind(now)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
sqlx::query(
|
||||
"INSERT INTO channel_history(channel, manifest, revision, published_at) VALUES(?,?,?,?)",
|
||||
)
|
||||
.bind(&name)
|
||||
.bind(&json)
|
||||
.bind(next_rev)
|
||||
.bind(now)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
tx.commit().await?;
|
||||
Ok(Json(serde_json::json!({"ok": true, "revision": next_rev})))
|
||||
}
|
||||
|
||||
async fn checkin(
|
||||
State(s): State<AppState>,
|
||||
Path(id): Path<String>,
|
||||
Json(ci): Json<common::CheckIn>,
|
||||
) -> Result<Json<common::CheckInAck>, ApiError> {
|
||||
if ci.device_id != id {
|
||||
return Err(ApiError::BadRequest("device id mismatch".into()));
|
||||
}
|
||||
let now = common::now();
|
||||
let health = serde_json::to_string(&ci.health)?.trim_matches('"').to_string();
|
||||
sqlx::query(
|
||||
"INSERT INTO devices(id, channel, current_store_path, target_store_path, health,
|
||||
last_message, agent_version, last_seen)
|
||||
VALUES(?,?,?,?,?,?,?,?)
|
||||
ON CONFLICT(id) DO UPDATE SET
|
||||
channel=excluded.channel,
|
||||
current_store_path=excluded.current_store_path,
|
||||
target_store_path=excluded.target_store_path,
|
||||
health=excluded.health,
|
||||
last_message=excluded.last_message,
|
||||
agent_version=excluded.agent_version,
|
||||
last_seen=excluded.last_seen",
|
||||
)
|
||||
.bind(&ci.device_id)
|
||||
.bind(&ci.channel)
|
||||
.bind(&ci.current_store_path)
|
||||
.bind(&ci.target_store_path)
|
||||
.bind(&health)
|
||||
.bind(&ci.message)
|
||||
.bind(&ci.agent_version)
|
||||
.bind(now)
|
||||
.execute(&s.db)
|
||||
.await?;
|
||||
sqlx::query(
|
||||
"INSERT INTO device_history(device_id, store_path, health, message, at) VALUES(?,?,?,?,?)",
|
||||
)
|
||||
.bind(&ci.device_id)
|
||||
.bind(&ci.current_store_path)
|
||||
.bind(&health)
|
||||
.bind(&ci.message)
|
||||
.bind(now)
|
||||
.execute(&s.db)
|
||||
.await?;
|
||||
Ok(Json(common::CheckInAck { server_time: now }))
|
||||
}
|
||||
|
||||
// ---------- helpers ----------
|
||||
|
||||
fn require_token(headers: &HeaderMap, expected: &str) -> Result<(), ApiError> {
|
||||
let v = headers
|
||||
.get(header::AUTHORIZATION)
|
||||
.and_then(|h| h.to_str().ok())
|
||||
.unwrap_or("");
|
||||
let token = v.strip_prefix("Bearer ").unwrap_or("");
|
||||
if token != expected || token.is_empty() {
|
||||
return Err(ApiError::Unauthorized);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct ApiErrorBody {
|
||||
pub error: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ApiError {
|
||||
NotFound,
|
||||
BadRequest(String),
|
||||
Unauthorized,
|
||||
Internal(String),
|
||||
}
|
||||
|
||||
impl From<sqlx::Error> for ApiError {
|
||||
fn from(e: sqlx::Error) -> Self { Self::Internal(e.to_string()) }
|
||||
}
|
||||
impl From<serde_json::Error> for ApiError {
|
||||
fn from(e: serde_json::Error) -> Self { Self::BadRequest(e.to_string()) }
|
||||
}
|
||||
|
||||
impl IntoResponse for ApiError {
|
||||
fn into_response(self) -> axum::response::Response {
|
||||
let (code, msg) = match self {
|
||||
ApiError::NotFound => (StatusCode::NOT_FOUND, "not found".to_string()),
|
||||
ApiError::BadRequest(m) => (StatusCode::BAD_REQUEST, m),
|
||||
ApiError::Unauthorized => (StatusCode::UNAUTHORIZED, "unauthorized".to_string()),
|
||||
ApiError::Internal(m) => (StatusCode::INTERNAL_SERVER_ERROR, m),
|
||||
};
|
||||
(code, Json(ApiErrorBody { error: msg })).into_response()
|
||||
}
|
||||
}
|
||||
145
crates/server/src/ui.rs
Normal file
145
crates/server/src/ui.rs
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
//! Minimal HTMX-flavored dashboard rendered with plain string formatting.
|
||||
//!
|
||||
//! Kept dependency-free on purpose; the UI is intentionally tiny so the
|
||||
//! whole server stays a single static binary with no asset pipeline.
|
||||
|
||||
use crate::{ApiError, AppState};
|
||||
use axum::{
|
||||
extract::{Path, State},
|
||||
response::Html,
|
||||
};
|
||||
|
||||
const HEAD: &str = r#"<!doctype html>
|
||||
<html><head><meta charset="utf-8"><title>nix-ota</title>
|
||||
<style>
|
||||
body{font-family:ui-monospace,Menlo,monospace;max-width:1100px;margin:2em auto;padding:0 1em;color:#222}
|
||||
table{border-collapse:collapse;width:100%}th,td{border-bottom:1px solid #ddd;padding:.4em .6em;text-align:left;font-size:13px}
|
||||
th{background:#f5f5f5}
|
||||
.ok{color:#197a19}.failed{color:#b00}.updating{color:#b58900}.rolled_back{color:#b58900}
|
||||
h1,h2{font-weight:600}
|
||||
a{color:#06c;text-decoration:none}a:hover{text-decoration:underline}
|
||||
code{background:#f0f0f0;padding:1px 4px;border-radius:3px}
|
||||
</style></head><body>"#;
|
||||
|
||||
fn short(p: &Option<String>) -> String {
|
||||
match p {
|
||||
None => "—".into(),
|
||||
Some(s) => {
|
||||
// /nix/store/<hash>-name → hash[0..8]
|
||||
s.strip_prefix("/nix/store/")
|
||||
.and_then(|r| r.get(..8))
|
||||
.unwrap_or(s)
|
||||
.to_string()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn ago(ts: i64) -> String {
|
||||
if ts == 0 { return "never".into(); }
|
||||
let d = (nix_ota_common::now() - ts).max(0);
|
||||
if d < 60 { format!("{d}s ago") }
|
||||
else if d < 3600 { format!("{}m ago", d/60) }
|
||||
else if d < 86400 { format!("{}h ago", d/3600) }
|
||||
else { format!("{}d ago", d/86400) }
|
||||
}
|
||||
|
||||
pub async fn index(State(s): State<AppState>) -> Result<Html<String>, ApiError> {
|
||||
let chans: Vec<(String, i64, i64, Option<String>)> = sqlx::query_as(
|
||||
"SELECT name, revision, updated_at, current_manifest FROM channels ORDER BY name"
|
||||
).fetch_all(&s.db).await?;
|
||||
let devs: Vec<(String, String, Option<String>, Option<String>, String, i64)> = sqlx::query_as(
|
||||
"SELECT id, channel, current_store_path, target_store_path, health, last_seen
|
||||
FROM devices ORDER BY id"
|
||||
).fetch_all(&s.db).await?;
|
||||
|
||||
let mut html = String::from(HEAD);
|
||||
html.push_str("<h1>nix-ota</h1>");
|
||||
|
||||
html.push_str("<h2>Channels</h2><table><tr><th>name</th><th>rev</th><th>target</th><th>updated</th></tr>");
|
||||
for (name, rev, updated, mj) in &chans {
|
||||
let target = mj.as_ref()
|
||||
.and_then(|j| serde_json::from_str::<nix_ota_common::Manifest>(j).ok())
|
||||
.map(|m| short(&Some(m.body.store_path)))
|
||||
.unwrap_or_else(|| "—".into());
|
||||
html.push_str(&format!(
|
||||
"<tr><td><a href=\"/ui/channels/{name}\">{name}</a></td><td>{rev}</td><td><code>{target}</code></td><td>{}</td></tr>",
|
||||
ago(*updated)
|
||||
));
|
||||
}
|
||||
html.push_str("</table>");
|
||||
|
||||
html.push_str("<h2>Devices</h2><table><tr><th>id</th><th>channel</th><th>current</th><th>target</th><th>health</th><th>last seen</th></tr>");
|
||||
for (id, channel, cur, tgt, health, last) in &devs {
|
||||
html.push_str(&format!(
|
||||
"<tr><td><a href=\"/ui/devices/{id}\">{id}</a></td><td>{channel}</td><td><code>{}</code></td><td><code>{}</code></td><td class=\"{health}\">{health}</td><td>{}</td></tr>",
|
||||
short(cur), short(tgt), ago(*last)
|
||||
));
|
||||
}
|
||||
html.push_str("</table></body></html>");
|
||||
Ok(Html(html))
|
||||
}
|
||||
|
||||
pub async fn channel_detail(
|
||||
State(s): State<AppState>,
|
||||
Path(name): Path<String>,
|
||||
) -> Result<Html<String>, ApiError> {
|
||||
let hist: Vec<(i64, String, i64)> = sqlx::query_as(
|
||||
"SELECT revision, manifest, published_at FROM channel_history
|
||||
WHERE channel = ? ORDER BY revision DESC LIMIT 50"
|
||||
).bind(&name).fetch_all(&s.db).await?;
|
||||
|
||||
let mut html = String::from(HEAD);
|
||||
html.push_str(&format!("<h1>channel: {name}</h1><p><a href=\"/\">← back</a></p>"));
|
||||
html.push_str("<h2>History</h2><table><tr><th>rev</th><th>store path</th><th>substituter</th><th>published</th></tr>");
|
||||
for (rev, mj, at) in &hist {
|
||||
if let Ok(m) = serde_json::from_str::<nix_ota_common::Manifest>(mj) {
|
||||
html.push_str(&format!(
|
||||
"<tr><td>{rev}</td><td><code>{}</code></td><td>{}</td><td>{}</td></tr>",
|
||||
m.body.store_path, m.body.substituter, ago(*at)
|
||||
));
|
||||
}
|
||||
}
|
||||
html.push_str("</table></body></html>");
|
||||
Ok(Html(html))
|
||||
}
|
||||
|
||||
pub async fn device_detail(
|
||||
State(s): State<AppState>,
|
||||
Path(id): Path<String>,
|
||||
) -> Result<Html<String>, ApiError> {
|
||||
let dev: Option<(String, String, Option<String>, Option<String>, String, Option<String>, Option<String>, i64)> =
|
||||
sqlx::query_as(
|
||||
"SELECT id, channel, current_store_path, target_store_path, health,
|
||||
last_message, agent_version, last_seen
|
||||
FROM devices WHERE id = ?"
|
||||
).bind(&id).fetch_optional(&s.db).await?;
|
||||
let hist: Vec<(Option<String>, String, Option<String>, i64)> = sqlx::query_as(
|
||||
"SELECT store_path, health, message, at FROM device_history
|
||||
WHERE device_id = ? ORDER BY id DESC LIMIT 100"
|
||||
).bind(&id).fetch_all(&s.db).await?;
|
||||
|
||||
let mut html = String::from(HEAD);
|
||||
html.push_str(&format!("<h1>device: {id}</h1><p><a href=\"/\">← back</a></p>"));
|
||||
if let Some((_, channel, cur, tgt, health, msg, ver, last)) = &dev {
|
||||
html.push_str(&format!(
|
||||
"<p>channel: <b>{channel}</b><br>agent: {}<br>health: <span class=\"{health}\">{health}</span><br>\
|
||||
current: <code>{}</code><br>target: <code>{}</code><br>last seen: {}<br>last message: {}</p>",
|
||||
ver.as_deref().unwrap_or("?"),
|
||||
cur.as_deref().unwrap_or("—"),
|
||||
tgt.as_deref().unwrap_or("—"),
|
||||
ago(*last),
|
||||
msg.as_deref().unwrap_or("")
|
||||
));
|
||||
}
|
||||
html.push_str("<h2>History</h2><table><tr><th>at</th><th>health</th><th>store path</th><th>message</th></tr>");
|
||||
for (sp, h, msg, at) in &hist {
|
||||
html.push_str(&format!(
|
||||
"<tr><td>{}</td><td class=\"{h}\">{h}</td><td><code>{}</code></td><td>{}</td></tr>",
|
||||
ago(*at),
|
||||
sp.as_deref().unwrap_or("—"),
|
||||
msg.as_deref().unwrap_or("")
|
||||
));
|
||||
}
|
||||
html.push_str("</table></body></html>");
|
||||
Ok(Html(html))
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue