{ pkgs, self, system }: # NixOS VM test for nix-ota. # # Builds three "system closure" stand-ins at evaluation time (each is a # directory containing a marker file and a `bin/switch-to-configuration` # stub), then drives the agent through three publishes: # 1. publish A -> device switches to A # 2. publish B -> device switches to B # 3. publish C (broken: agent's healthCmd will fail) -> device rolls back to B let mkClosure = label: extraScript: pkgs.runCommand "sys-${label}" {} '' mkdir -p $out/bin echo "${label}" > $out/marker cat > $out/bin/switch-to-configuration <<'EOF' #!/bin/sh set -eu echo "applied ${label}" >&2 ${extraScript} exit 0 EOF chmod +x $out/bin/switch-to-configuration ''; closureA = mkClosure "a" "touch /run/nix-ota-applied-a"; closureB = mkClosure "b" "touch /run/nix-ota-applied-b"; # Closure C activates fine, but the healthCmd checks for /run/nix-ota-broken # which we create before publishing C, forcing rollback. closureC = mkClosure "c" "touch /run/nix-ota-applied-c"; # Pre-generated binary cache keypair (test fixture; not secret). # Generated with: nix-store --generate-binary-cache-key cache.local sec pub cacheKeys = pkgs.runCommand "test-cache-keys" {} '' mkdir -p $out export HOME=$TMPDIR export NIX_STATE_DIR=$TMPDIR/state export NIX_STORE_DIR=$TMPDIR/store mkdir -p $NIX_STATE_DIR $NIX_STORE_DIR ${pkgs.nix}/bin/nix-store --generate-binary-cache-key cache.local $out/secret $out/public ''; pubBin = "${self.packages.${system}.nix-ota-publisher}/bin/nix-ota"; in pkgs.testers.runNixOSTest { name = "nix-ota"; nodes = { server = { config, pkgs, lib, ... }: { imports = [ self.nixosModules.server ]; nix.settings.experimental-features = [ "nix-command" "flakes" ]; services.nix-ota-server = { enable = true; listen = "0.0.0.0:8080"; openFirewall = true; publishTokenFile = pkgs.writeText "tok" "test-token"; }; services.nix-serve = { enable = true; port = 5000; secretKeyFile = "${cacheKeys}/secret"; }; networking.firewall.allowedTCPPorts = [ 5000 ]; # The closures need to be in the server's store so nix-serve can serve them. system.extraDependencies = [ closureA closureB closureC ]; }; device = { config, pkgs, lib, ... }: { imports = [ self.nixosModules.agent ]; services.nix-ota-agent = { enable = true; server = "http://server:8080"; channel = "prod"; deviceId = "vm-device-1"; publicKeyFile = "/var/lib/nix-ota/public.key"; cacheUrl = "http://server:5000"; cachePublicKey = builtins.readFile "${cacheKeys}/public"; interval = 5; healthCmd = "test ! -f /run/nix-ota-broken"; }; nix.settings.experimental-features = [ "nix-command" "flakes" ]; nix.settings.trusted-users = [ "root" ]; }; }; testScript = '' closureA = "${closureA}" closureB = "${closureB}" closureC = "${closureC}" pubBin = "${pubBin}" start_all() server.wait_for_unit("nix-ota-server.service") server.wait_for_open_port(8080) server.wait_for_unit("nix-serve.service") server.wait_for_open_port(5000) # Drive the agent ourselves; disable the timer for deterministic stepping. device.succeed("systemctl stop nix-ota-agent.timer || true") # Sign the closures with the binary cache key so the device's Nix will accept them. for c in [closureA, closureB, closureC]: server.succeed(f"nix store sign --extra-experimental-features nix-command --key-file ${cacheKeys}/secret --recursive {c}") # Operator generates a manifest signing key on the server host. server.succeed("mkdir -p /root/keys") pub = server.succeed(f"{pubBin} keygen --out /root/keys/sign.key").strip() # Push pubkey onto the device's writable state dir. device.succeed("mkdir -p /var/lib/nix-ota") device.succeed(f"echo '{pub}' > /var/lib/nix-ota/public.key") def publish(store_path, rev): server.succeed( f"{pubBin} publish " f"--server http://localhost:8080 --token test-token " f"--key /root/keys/sign.key --channel prod " f"--store-path {store_path} --substituter http://server:5000 --revision {rev}" ) def poll_agent(): # oneshot service: start and wait for it to finish (success or failure). device.succeed("systemctl start --wait nix-ota-agent.service || true") # --- Step 1: publish A publish(closureA, 1) poll_agent() device.succeed(f"readlink -f /nix/var/nix/profiles/system | grep -qF {closureA}") device.succeed("test -f /run/nix-ota-applied-a") # --- Step 2: publish B publish(closureB, 2) poll_agent() device.succeed(f"readlink -f /nix/var/nix/profiles/system | grep -qF {closureB}") device.succeed("test -f /run/nix-ota-applied-b") # --- Step 3: publish C with health check rigged to fail device.succeed("touch /run/nix-ota-broken") publish(closureC, 3) poll_agent() # Agent should have applied C, failed the health check, and rolled back to B. device.succeed(f"readlink -f /nix/var/nix/profiles/system | grep -qF {closureB}") # The activation script for C did run before health check. device.succeed("test -f /run/nix-ota-applied-c") # The dashboard should reflect the rolled_back state. server.wait_until_succeeds( "curl -fsS http://localhost:8080/ | grep -Eq 'rolled_back|failed'", timeout=30 ) ''; }