diff --git a/services/postgresql.nix b/services/postgresql.nix index 3fb4463..3757e7b 100644 --- a/services/postgresql.nix +++ b/services/postgresql.nix @@ -5,6 +5,30 @@ lib, ... }: +let + pgCheckpoint = pkgs.writeShellScript "pg-checkpoint" '' + # Flush PostgreSQL dirty buffers to disk before ZFS snapshot so the + # on-disk state is consistent and the snapshot is recoverable. + # On failure: log a warning but exit 0 so sanoid still takes the + # snapshot (an inconsistent snapshot beats no snapshot). + if ! ${pkgs.systemd}/bin/systemctl is-active --quiet postgresql.service; then + echo "postgresql is not running, skipping checkpoint" >&2 + exit 0 + fi + + if ${pkgs.coreutils}/bin/timeout 120 \ + ${pkgs.util-linux}/bin/runuser -u postgres -- \ + ${lib.getExe' config.services.postgresql.package "psql"} \ + -v ON_ERROR_STOP=1 -c "CHECKPOINT" 2>&1; then + echo "postgresql checkpoint completed" + else + echo "WARNING: postgresql checkpoint failed, snapshot may be inconsistent" >&2 + fi + + # Always exit 0 — sanoid must run regardless + exit 0 + ''; +in { imports = [ (lib.serviceMountWithZpool "postgresql" service_configs.zpool_ssds [ @@ -29,4 +53,10 @@ }; }; + # Run a PostgreSQL CHECKPOINT before sanoid snapshots so the on-disk + # state is consistent (required since full_page_writes = false). + systemd.services.sanoid.serviceConfig = { + ExecStartPre = lib.mkAfter [ "+${pgCheckpoint}" ]; + TimeoutStartSec = lib.mkForce 300; # checkpoint can be slow with large txg_timeout + }; }