63 lines
2.0 KiB
Nix
63 lines
2.0 KiB
Nix
{
|
|
pkgs,
|
|
config,
|
|
service_configs,
|
|
lib,
|
|
...
|
|
}:
|
|
let
|
|
pgCheckpoint = pkgs.writeShellScript "pg-checkpoint" ''
|
|
# Flush PostgreSQL dirty buffers to disk before ZFS snapshot so the
|
|
# on-disk state is consistent and the snapshot is recoverable.
|
|
# On failure: log a warning but exit 0 so sanoid still takes the
|
|
# snapshot (an inconsistent snapshot beats no snapshot).
|
|
if ! ${pkgs.systemd}/bin/systemctl is-active --quiet postgresql.service; then
|
|
echo "postgresql is not running, skipping checkpoint" >&2
|
|
exit 0
|
|
fi
|
|
|
|
if ${pkgs.coreutils}/bin/timeout 120 \
|
|
${pkgs.util-linux}/bin/runuser -u postgres -- \
|
|
${lib.getExe' config.services.postgresql.package "psql"} \
|
|
-v ON_ERROR_STOP=1 -c "CHECKPOINT" 2>&1; then
|
|
echo "postgresql checkpoint completed"
|
|
else
|
|
echo "WARNING: postgresql checkpoint failed, snapshot may be inconsistent" >&2
|
|
fi
|
|
|
|
# Always exit 0 — sanoid must run regardless
|
|
exit 0
|
|
'';
|
|
in
|
|
{
|
|
imports = [
|
|
(lib.serviceMountWithZpool "postgresql" service_configs.zpool_ssds [
|
|
config.services.postgresql.dataDir
|
|
])
|
|
(lib.serviceFilePerms "postgresql" [
|
|
"Z ${config.services.postgresql.dataDir} 0700 postgres postgres"
|
|
])
|
|
];
|
|
|
|
services.postgresql = {
|
|
enable = true;
|
|
package = pkgs.postgresql_16;
|
|
dataDir = service_configs.postgres.dataDir;
|
|
settings = {
|
|
# ZFS provides checksumming and atomic writes, making PostgreSQL's
|
|
# full_page_writes redundant. Disabling reduces write amplification
|
|
# and SSD wear on the zpool.
|
|
# Did this in conjunction with setting recordsize=8k
|
|
# on the zvolume this is on
|
|
full_page_writes = false;
|
|
};
|
|
};
|
|
|
|
# Run a PostgreSQL CHECKPOINT before sanoid snapshots so the on-disk
|
|
# state is consistent (required since full_page_writes = false).
|
|
systemd.services.sanoid.serviceConfig = {
|
|
ExecStartPre = lib.mkAfter [ "+${pgCheckpoint}" ];
|
|
TimeoutStartSec = lib.mkForce 300; # checkpoint can be slow with large txg_timeout
|
|
};
|
|
}
|