Compare commits

...

2 Commits

Author SHA1 Message Date
40fa8147e6 security: harden CI pipeline (pin host keys, suppress ntfy topic, restrict secrets)
Some checks failed
Build and Deploy / deploy (push) Failing after 2m33s
2026-03-31 11:03:07 -04:00
c556b82f9a grafana: init
Shows powerdraw, temps, uptime, and jellyfin streams
2026-03-31 10:57:40 -04:00
6 changed files with 515 additions and 8 deletions

View File

@@ -7,7 +7,7 @@ jobs:
deploy:
runs-on: nix
env:
GIT_SSH_COMMAND: "ssh -i /run/agenix/ci-deploy-key -o StrictHostKeyChecking=no"
GIT_SSH_COMMAND: "ssh -i /run/agenix/ci-deploy-key -o StrictHostKeyChecking=yes -o UserKnownHostsFile=/etc/ci-known-hosts"
steps:
- uses: https://github.com/actions/checkout@v4
with:
@@ -25,12 +25,12 @@ jobs:
run: |
eval $(ssh-agent -s)
ssh-add /run/agenix/ci-deploy-key
nix run github:serokell/deploy-rs -- .#muffin --skip-checks --ssh-opts="-o StrictHostKeyChecking=no"
nix run github:serokell/deploy-rs -- .#muffin --skip-checks --ssh-opts="-o StrictHostKeyChecking=yes -o UserKnownHostsFile=/etc/ci-known-hosts"
- name: Health check
run: |
sleep 10
ssh -i /run/agenix/ci-deploy-key -o StrictHostKeyChecking=no root@server-public \
ssh -i /run/agenix/ci-deploy-key -o StrictHostKeyChecking=yes -o UserKnownHostsFile=/etc/ci-known-hosts root@server-public \
"systemctl is-active gitea && systemctl is-active caddy && systemctl is-active continuwuity && systemctl is-active coturn"
- name: Notify success
@@ -38,7 +38,7 @@ jobs:
run: |
TOPIC=$(cat /run/agenix/ntfy-alerts-topic | tr -d '[:space:]')
TOKEN=$(cat /run/agenix/ntfy-alerts-token | tr -d '[:space:]')
curl -sf -X POST \
curl -sf -o /dev/null -X POST \
"https://ntfy.sigkill.computer/$TOPIC" \
-H "Authorization: Bearer $TOKEN" \
-H "Title: [muffin] Deploy succeeded" \
@@ -51,7 +51,7 @@ jobs:
run: |
TOPIC=$(cat /run/agenix/ntfy-alerts-topic 2>/dev/null | tr -d '[:space:]')
TOKEN=$(cat /run/agenix/ntfy-alerts-token 2>/dev/null | tr -d '[:space:]')
curl -sf -X POST \
curl -sf -o /dev/null -X POST \
"https://ntfy.sigkill.computer/$TOPIC" \
-H "Authorization: Bearer $TOKEN" \
-H "Title: [muffin] Deploy FAILED" \

View File

@@ -47,6 +47,7 @@
./services/soulseek.nix
./services/ups.nix
./services/monitoring.nix
./services/bitwarden.nix
./services/firefox-syncserver.nix
@@ -78,6 +79,14 @@
networking.hosts."192.168.1.50" = [ "server-public" ];
networking.hosts."192.168.1.223" = [ "desktop" ];
# SSH known_hosts for CI runner (pinned host keys)
environment.etc."ci-known-hosts".text = ''
server-public ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFMjgaMnE+zS7tL+m5E7gh9Q9U1zurLdmU0qcmEmaucu
192.168.1.50 ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFMjgaMnE+zS7tL+m5E7gh9Q9U1zurLdmU0qcmEmaucu
git.sigkill.computer ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFMjgaMnE+zS7tL+m5E7gh9Q9U1zurLdmU0qcmEmaucu
git.gardling.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFMjgaMnE+zS7tL+m5E7gh9Q9U1zurLdmU0qcmEmaucu
'';
services.kmscon.enable = true;
systemd.targets = {

View File

@@ -140,8 +140,8 @@
git-crypt-key-dotfiles = {
file = ../secrets/git-crypt-key-dotfiles.age;
mode = "0400";
owner = "gitea-runner";
group = "gitea-runner";
owner = "root";
group = "root";
};
# Git-crypt symmetric key for server-config repo

View File

@@ -153,6 +153,22 @@ rec {
port = 8020;
proto = "tcp";
};
grafana = {
port = 3000;
proto = "tcp";
};
prometheus = {
port = 9090;
proto = "tcp";
};
prometheus_node = {
port = 9100;
proto = "tcp";
};
prometheus_apcupsd = {
port = 9162;
proto = "tcp";
};
};
};
@@ -266,6 +282,11 @@ rec {
domain = "firefox-sync.${https.domain}";
};
grafana = {
dir = services_dir + "/grafana";
domain = "grafana.${https.domain}";
};
media = {
moviesDir = torrents_path + "/media/movies";
tvDir = torrents_path + "/media/tv";

View File

@@ -41,6 +41,6 @@
User = "gitea-runner";
Group = "gitea-runner";
};
environment.GIT_SSH_COMMAND = "ssh -i /run/agenix/ci-deploy-key -o StrictHostKeyChecking=no";
environment.GIT_SSH_COMMAND = "ssh -i /run/agenix/ci-deploy-key -o StrictHostKeyChecking=yes -o UserKnownHostsFile=/etc/ci-known-hosts";
};
}

477
services/monitoring.nix Normal file
View File

@@ -0,0 +1,477 @@
{
config,
pkgs,
service_configs,
lib,
...
}:
let
textfileDir = "/var/lib/prometheus-node-exporter-textfiles";
promDs = {
type = "prometheus";
uid = "prometheus";
};
jellyfinCollector = pkgs.writeShellApplication {
name = "jellyfin-metrics-collector";
runtimeInputs = with pkgs; [
curl
jq
];
text = ''
API_KEY=$(cat "$CREDENTIALS_DIRECTORY/jellyfin-api-key")
JELLYFIN="http://127.0.0.1:${toString service_configs.ports.private.jellyfin.port}"
if response=$(curl -sf --max-time 5 "''${JELLYFIN}/Sessions?api_key=''${API_KEY}"); then
active_streams=$(echo "$response" | jq '[.[] | select(.NowPlayingItem != null)] | length')
else
active_streams=0
fi
{
echo '# HELP jellyfin_active_streams Number of currently active Jellyfin streams'
echo '# TYPE jellyfin_active_streams gauge'
echo "jellyfin_active_streams $active_streams"
} > "${textfileDir}/jellyfin.prom.$$.tmp"
mv "${textfileDir}/jellyfin.prom.$$.tmp" "${textfileDir}/jellyfin.prom"
'';
};
dashboard = {
editable = true;
graphTooltip = 1;
schemaVersion = 39;
tags = [
"system"
"monitoring"
];
time = {
from = "now-6h";
to = "now";
};
timezone = "browser";
title = "System Overview";
uid = "system-overview";
panels = [
# -- Row 1: UPS --
{
id = 1;
type = "timeseries";
title = "UPS Power Draw";
gridPos = {
h = 8;
w = 12;
x = 0;
y = 0;
};
datasource = promDs;
targets = [
{
datasource = promDs;
expr = "apcupsd_ups_load_percent / 100 * apcupsd_nominal_power_watts";
legendFormat = "Power (W)";
refId = "A";
}
];
fieldConfig = {
defaults = {
unit = "watt";
color.mode = "palette-classic";
custom = {
lineWidth = 2;
fillOpacity = 20;
spanNulls = true;
};
};
overrides = [ ];
};
}
{
id = 2;
type = "gauge";
title = "UPS Load";
gridPos = {
h = 8;
w = 6;
x = 12;
y = 0;
};
datasource = promDs;
targets = [
{
datasource = promDs;
expr = "apcupsd_ups_load_percent";
refId = "A";
}
];
fieldConfig = {
defaults = {
unit = "percent";
min = 0;
max = 100;
thresholds = {
mode = "absolute";
steps = [
{
color = "green";
value = null;
}
{
color = "yellow";
value = 70;
}
{
color = "red";
value = 90;
}
];
};
};
overrides = [ ];
};
options.reduceOptions = {
calcs = [ "lastNotNull" ];
fields = "";
values = false;
};
}
{
id = 3;
type = "gauge";
title = "UPS Battery";
gridPos = {
h = 8;
w = 6;
x = 18;
y = 0;
};
datasource = promDs;
targets = [
{
datasource = promDs;
expr = "apcupsd_battery_charge_percent";
refId = "A";
}
];
fieldConfig = {
defaults = {
unit = "percent";
min = 0;
max = 100;
thresholds = {
mode = "absolute";
steps = [
{
color = "red";
value = null;
}
{
color = "yellow";
value = 20;
}
{
color = "green";
value = 50;
}
];
};
};
overrides = [ ];
};
options.reduceOptions = {
calcs = [ "lastNotNull" ];
fields = "";
values = false;
};
}
# -- Row 2: System --
{
id = 4;
type = "timeseries";
title = "CPU Temperature";
gridPos = {
h = 8;
w = 12;
x = 0;
y = 8;
};
datasource = promDs;
targets = [
{
datasource = promDs;
expr = "node_hwmon_temp_celsius";
legendFormat = "{{chip}} {{sensor}}";
refId = "A";
}
];
fieldConfig = {
defaults = {
unit = "celsius";
color.mode = "palette-classic";
custom = {
lineWidth = 2;
fillOpacity = 10;
spanNulls = true;
};
};
overrides = [ ];
};
}
{
id = 5;
type = "stat";
title = "System Uptime";
gridPos = {
h = 8;
w = 6;
x = 12;
y = 8;
};
datasource = promDs;
targets = [
{
datasource = promDs;
expr = "time() - node_boot_time_seconds";
refId = "A";
}
];
fieldConfig = {
defaults = {
unit = "s";
thresholds = {
mode = "absolute";
steps = [
{
color = "green";
value = null;
}
];
};
};
overrides = [ ];
};
options = {
reduceOptions = {
calcs = [ "lastNotNull" ];
fields = "";
values = false;
};
colorMode = "value";
graphMode = "none";
};
}
{
id = 6;
type = "stat";
title = "Jellyfin Active Streams";
gridPos = {
h = 8;
w = 6;
x = 18;
y = 8;
};
datasource = promDs;
targets = [
{
datasource = promDs;
expr = "jellyfin_active_streams";
refId = "A";
}
];
fieldConfig = {
defaults = {
thresholds = {
mode = "absolute";
steps = [
{
color = "green";
value = null;
}
{
color = "yellow";
value = 3;
}
{
color = "red";
value = 6;
}
];
};
};
overrides = [ ];
};
options = {
reduceOptions = {
calcs = [ "lastNotNull" ];
fields = "";
values = false;
};
colorMode = "value";
graphMode = "area";
};
}
];
};
in
{
imports = [
(lib.serviceMountWithZpool "grafana" service_configs.zpool_ssds [
service_configs.grafana.dir
])
(lib.serviceFilePerms "grafana" [
"Z ${service_configs.grafana.dir} 0700 grafana grafana"
])
(lib.serviceMountWithZpool "prometheus" service_configs.zpool_ssds [
"/var/lib/prometheus"
])
(lib.serviceFilePerms "prometheus" [
"Z /var/lib/prometheus 0700 prometheus prometheus"
])
];
# -- Prometheus --
services.prometheus = {
enable = true;
port = service_configs.ports.private.prometheus.port;
listenAddress = "127.0.0.1";
stateDir = "prometheus";
retentionTime = "90d";
exporters = {
node = {
enable = true;
port = service_configs.ports.private.prometheus_node.port;
listenAddress = "127.0.0.1";
enabledCollectors = [
"hwmon"
"systemd"
"textfile"
];
extraFlags = [
"--collector.textfile.directory=${textfileDir}"
];
};
apcupsd = {
enable = true;
port = service_configs.ports.private.prometheus_apcupsd.port;
listenAddress = "127.0.0.1";
apcupsdAddress = "127.0.0.1:3551";
};
};
scrapeConfigs = [
{
job_name = "prometheus";
static_configs = [
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.prometheus.port}" ]; }
];
}
{
job_name = "node";
static_configs = [
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.prometheus_node.port}" ]; }
];
}
{
job_name = "apcupsd";
static_configs = [
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.prometheus_apcupsd.port}" ]; }
];
}
];
};
# -- Grafana --
services.grafana = {
enable = true;
dataDir = service_configs.grafana.dir;
settings = {
server = {
http_addr = "127.0.0.1";
http_port = service_configs.ports.private.grafana.port;
domain = service_configs.grafana.domain;
root_url = "https://${service_configs.grafana.domain}";
};
# Caddy handles auth -- disable Grafana login entirely
"auth.anonymous" = {
enabled = true;
org_role = "Admin";
};
"auth.basic".enabled = false;
"auth".disable_login_form = true;
analytics.reporting_enabled = false;
};
provision = {
datasources.settings = {
apiVersion = 1;
datasources = [
{
name = "Prometheus";
type = "prometheus";
url = "http://127.0.0.1:${toString service_configs.ports.private.prometheus.port}";
access = "proxy";
isDefault = true;
editable = false;
uid = "prometheus";
}
];
};
dashboards.settings.providers = [
{
name = "system";
type = "file";
options.path = "/etc/grafana-dashboards";
disableDeletion = true;
updateIntervalSeconds = 60;
}
];
};
};
# Provision dashboard JSON
environment.etc."grafana-dashboards/system-overview.json" = {
text = builtins.toJSON dashboard;
mode = "0444";
};
# Caddy reverse proxy with auth
services.caddy.virtualHosts."${service_configs.grafana.domain}".extraConfig = ''
import ${config.age.secrets.caddy_auth.path}
reverse_proxy :${builtins.toString service_configs.ports.private.grafana.port}
'';
# -- Jellyfin metrics collector --
# Queries the Jellyfin API for active streams and writes a .prom file
# for the node_exporter textfile collector.
systemd.services.jellyfin-metrics-collector = {
description = "Collect Jellyfin metrics for Prometheus";
after = [ "network.target" ];
serviceConfig = {
Type = "oneshot";
ExecStart = lib.getExe jellyfinCollector;
LoadCredential = "jellyfin-api-key:${config.age.secrets.jellyfin-api-key.path}";
};
};
systemd.timers.jellyfin-metrics-collector = {
wantedBy = [ "timers.target" ];
timerConfig = {
OnCalendar = "*:*:0/30";
RandomizedDelaySec = "5s";
};
};
# Ensure textfile collector directory exists (tmpfs root -- recreated on boot)
systemd.tmpfiles.rules = [
"d ${textfileDir} 0755 root root -"
];
}