Initial nix-ota implementation

Self-hostable OTA update system for NixOS fleets: a control server,
device agent, publisher CLI, and NixOS modules that ship prebuilt
system closures from a binary cache to devices that don't have the
flake.

- crates/common: signed manifest types (ed25519), store-path validator
- crates/server: axum + sqlite + HTMX dashboard, channel/device API
- crates/agent: poll, verify signature + revision, nix copy, switch,
  health check, magic-rollback on failure
- crates/publisher: keygen + sign + publish CLI for operators/CI
- nix/modules: NixOS modules for server and agent
- nix/tests/ota.nix: end-to-end VM test exercising publish A -> B ->
  broken C -> rollback to B (passes)

The control server never holds the signing key; manifests are signed
offline and verified against a pinned public key on each device.
This commit is contained in:
0m.ax 2026-05-25 14:58:42 +02:00
commit 42b2ce4d1d
19 changed files with 4745 additions and 0 deletions

18
crates/common/Cargo.toml Normal file
View file

@ -0,0 +1,18 @@
[package]
name = "nix-ota-common"
version.workspace = true
edition.workspace = true
license.workspace = true
[lib]
path = "src/lib.rs"
[dependencies]
serde.workspace = true
serde_json.workspace = true
ed25519-dalek.workspace = true
base64.workspace = true
rand.workspace = true
time.workspace = true
thiserror.workspace = true
sha2.workspace = true

234
crates/common/src/lib.rs Normal file
View file

@ -0,0 +1,234 @@
//! Shared types and crypto for nix-ota.
//!
//! The central object is a signed [`Manifest`]: a small JSON document
//! pointing at a NixOS system closure store path together with the
//! substituter to fetch it from. Manifests are signed by an offline
//! ed25519 key; the agent verifies them on every poll.
//!
//! The signature covers the canonical serialization of [`ManifestBody`]
//! (the manifest without its own signature). We use serde_json with sorted
//! keys via `BTreeMap`-style ordering to keep things deterministic.
use base64::{engine::general_purpose::STANDARD as B64, Engine as _};
use ed25519_dalek::{Signature, Signer, SigningKey, Verifier, VerifyingKey};
use rand::rngs::OsRng;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use time::OffsetDateTime;
pub const STORE_PATH_PREFIX: &str = "/nix/store/";
#[derive(Debug, Error)]
pub enum Error {
#[error("invalid base64: {0}")]
Base64(#[from] base64::DecodeError),
#[error("invalid signature")]
Signature,
#[error("invalid key: {0}")]
Key(String),
#[error("invalid store path: {0}")]
StorePath(String),
#[error("serialization: {0}")]
Serde(#[from] serde_json::Error),
#[error("manifest signed by unexpected key")]
KeyMismatch,
}
/// The signed payload of a manifest.
///
/// `key_id` is the first 8 bytes (hex) of the SHA-256 of the verifying key,
/// to help operators rotate keys and to give clear errors when a device
/// is configured with the wrong key.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct ManifestBody {
pub channel: String,
/// Absolute Nix store path of the system closure top-level
/// (e.g. `/nix/store/...-nixos-system-foo-24.05.toplevel`).
pub store_path: String,
/// Substituter URL the agent should `nix copy --from`.
pub substituter: String,
/// Unix timestamp seconds.
pub timestamp: i64,
/// Monotonically increasing revision for this channel. Used by agents
/// to ignore replays of older manifests.
pub revision: u64,
pub key_id: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Manifest {
#[serde(flatten)]
pub body: ManifestBody,
/// base64(ed25519 signature over canonical JSON of `body`).
pub signature: String,
}
/// Validate that a string looks like a Nix store path. This is intentionally
/// strict to avoid an attacker tricking the agent into running arbitrary paths.
pub fn validate_store_path(p: &str) -> Result<(), Error> {
if !p.starts_with(STORE_PATH_PREFIX) {
return Err(Error::StorePath(format!("must start with {STORE_PATH_PREFIX}")));
}
let rest = &p[STORE_PATH_PREFIX.len()..];
if rest.is_empty() || rest.contains('/') || rest.contains("..") {
return Err(Error::StorePath("must be a single store object".into()));
}
// hash-name format: 32 base32 chars, '-', name
let dash = rest.find('-').ok_or_else(|| Error::StorePath("missing -".into()))?;
if dash != 32 {
return Err(Error::StorePath("hash must be 32 chars".into()));
}
for c in rest.chars() {
if !(c.is_ascii_alphanumeric() || matches!(c, '-' | '_' | '.' | '+' | '?' | '=')) {
return Err(Error::StorePath(format!("invalid char {c:?}")));
}
}
Ok(())
}
pub fn key_id(vk: &VerifyingKey) -> String {
use sha2::{Digest, Sha256};
let mut h = Sha256::new();
h.update(vk.as_bytes());
let out = h.finalize();
hex_short(&out[..8])
}
fn hex_short(bytes: &[u8]) -> String {
let mut s = String::with_capacity(bytes.len() * 2);
for b in bytes {
s.push_str(&format!("{b:02x}"));
}
s
}
/// Canonical bytes used for signing/verification.
pub fn canonical_body(body: &ManifestBody) -> Result<Vec<u8>, Error> {
// serde_json preserves field order from the struct definition, which is
// stable. That's our canonical form for v1.
Ok(serde_json::to_vec(body)?)
}
pub fn sign_manifest(sk: &SigningKey, mut body: ManifestBody) -> Result<Manifest, Error> {
body.key_id = key_id(&sk.verifying_key());
let bytes = canonical_body(&body)?;
let sig: Signature = sk.sign(&bytes);
Ok(Manifest {
body,
signature: B64.encode(sig.to_bytes()),
})
}
pub fn verify_manifest(vk: &VerifyingKey, m: &Manifest) -> Result<(), Error> {
if m.body.key_id != key_id(vk) {
return Err(Error::KeyMismatch);
}
let sig_bytes = B64.decode(m.signature.as_bytes())?;
let sig = Signature::from_slice(&sig_bytes).map_err(|_| Error::Signature)?;
let bytes = canonical_body(&m.body)?;
vk.verify(&bytes, &sig).map_err(|_| Error::Signature)?;
validate_store_path(&m.body.store_path)?;
Ok(())
}
pub fn generate_keypair() -> SigningKey {
SigningKey::generate(&mut OsRng)
}
pub fn encode_signing_key(sk: &SigningKey) -> String {
B64.encode(sk.to_bytes())
}
pub fn decode_signing_key(s: &str) -> Result<SigningKey, Error> {
let raw = B64.decode(s.trim().as_bytes())?;
let arr: [u8; 32] = raw.as_slice().try_into().map_err(|_| Error::Key("len".into()))?;
Ok(SigningKey::from_bytes(&arr))
}
pub fn encode_verifying_key(vk: &VerifyingKey) -> String {
B64.encode(vk.to_bytes())
}
pub fn decode_verifying_key(s: &str) -> Result<VerifyingKey, Error> {
let raw = B64.decode(s.trim().as_bytes())?;
let arr: [u8; 32] = raw.as_slice().try_into().map_err(|_| Error::Key("len".into()))?;
VerifyingKey::from_bytes(&arr).map_err(|e| Error::Key(e.to_string()))
}
// --- check-in API types ---
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CheckIn {
pub device_id: String,
pub channel: String,
pub current_store_path: Option<String>,
pub target_store_path: Option<String>,
pub health: Health,
pub agent_version: String,
pub message: Option<String>,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum Health {
Ok,
Updating,
Failed,
RolledBack,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CheckInAck {
pub server_time: i64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PublishRequest {
pub store_path: String,
pub substituter: String,
}
pub fn now() -> i64 {
OffsetDateTime::now_utc().unix_timestamp()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn sign_and_verify_roundtrip() {
let sk = generate_keypair();
let body = ManifestBody {
channel: "prod".into(),
store_path: "/nix/store/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-system".into(),
substituter: "https://cache.example.com".into(),
timestamp: 1234,
revision: 1,
key_id: String::new(),
};
let m = sign_manifest(&sk, body).unwrap();
verify_manifest(&sk.verifying_key(), &m).unwrap();
}
#[test]
fn rejects_tamper() {
let sk = generate_keypair();
let body = ManifestBody {
channel: "prod".into(),
store_path: "/nix/store/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-system".into(),
substituter: "https://cache.example.com".into(),
timestamp: 1234,
revision: 1,
key_id: String::new(),
};
let mut m = sign_manifest(&sk, body).unwrap();
m.body.store_path = "/nix/store/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-evil".into();
assert!(verify_manifest(&sk.verifying_key(), &m).is_err());
}
#[test]
fn rejects_bad_store_path() {
assert!(validate_store_path("/etc/passwd").is_err());
assert!(validate_store_path("/nix/store/short-name").is_err());
assert!(validate_store_path("/nix/store/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-system/../x").is_err());
assert!(validate_store_path("/nix/store/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-system").is_ok());
}
}