diff --git a/configuration.nix b/configuration.nix index 65d960f..cad6b63 100644 --- a/configuration.nix +++ b/configuration.nix @@ -47,6 +47,7 @@ ./services/soulseek.nix ./services/ups.nix + ./services/monitoring.nix ./services/bitwarden.nix ./services/firefox-syncserver.nix diff --git a/service-configs.nix b/service-configs.nix index a79a16c..e48a910 100644 --- a/service-configs.nix +++ b/service-configs.nix @@ -153,6 +153,22 @@ rec { port = 8020; proto = "tcp"; }; + grafana = { + port = 3000; + proto = "tcp"; + }; + prometheus = { + port = 9090; + proto = "tcp"; + }; + prometheus_node = { + port = 9100; + proto = "tcp"; + }; + prometheus_apcupsd = { + port = 9162; + proto = "tcp"; + }; }; }; @@ -266,6 +282,11 @@ rec { domain = "firefox-sync.${https.domain}"; }; + grafana = { + dir = services_dir + "/grafana"; + domain = "grafana.${https.domain}"; + }; + media = { moviesDir = torrents_path + "/media/movies"; tvDir = torrents_path + "/media/tv"; diff --git a/services/monitoring.nix b/services/monitoring.nix new file mode 100644 index 0000000..8dc9ed9 --- /dev/null +++ b/services/monitoring.nix @@ -0,0 +1,530 @@ +{ + config, + pkgs, + service_configs, + lib, + ... +}: +let + textfileDir = "/var/lib/prometheus-node-exporter-textfiles"; + + promDs = { + type = "prometheus"; + uid = "prometheus"; + }; + + jellyfinCollector = pkgs.writeShellApplication { + name = "jellyfin-metrics-collector"; + runtimeInputs = with pkgs; [ + curl + jq + ]; + text = '' + API_KEY=$(cat "$CREDENTIALS_DIRECTORY/jellyfin-api-key") + JELLYFIN="http://127.0.0.1:${toString service_configs.ports.private.jellyfin.port}" + + if response=$(curl -sf --max-time 5 "''${JELLYFIN}/Sessions?api_key=''${API_KEY}"); then + active_streams=$(echo "$response" | jq '[.[] | select(.NowPlayingItem != null)] | length') + else + active_streams=0 + fi + + { + echo '# HELP jellyfin_active_streams Number of currently active Jellyfin streams' + echo '# TYPE jellyfin_active_streams gauge' + echo "jellyfin_active_streams $active_streams" + } > "${textfileDir}/jellyfin.prom.$$.tmp" + mv "${textfileDir}/jellyfin.prom.$$.tmp" "${textfileDir}/jellyfin.prom" + ''; + }; + + dashboard = { + editable = true; + graphTooltip = 1; + schemaVersion = 39; + tags = [ + "system" + "monitoring" + ]; + time = { + from = "now-6h"; + to = "now"; + }; + timezone = "browser"; + title = "System Overview"; + uid = "system-overview"; + + panels = [ + # -- Row 1: UPS -- + { + id = 1; + type = "timeseries"; + title = "UPS Power Draw"; + gridPos = { + h = 8; + w = 8; + x = 0; + y = 0; + }; + datasource = promDs; + targets = [ + { + datasource = promDs; + expr = "apcupsd_ups_load_percent / 100 * apcupsd_nominal_power_watts"; + legendFormat = "Power (W)"; + refId = "A"; + } + ]; + fieldConfig = { + defaults = { + unit = "watt"; + color.mode = "palette-classic"; + custom = { + lineWidth = 2; + fillOpacity = 20; + spanNulls = true; + }; + }; + overrides = [ ]; + }; + } + { + id = 7; + type = "stat"; + title = "Energy Usage (24h)"; + gridPos = { + h = 8; + w = 4; + x = 8; + y = 0; + }; + datasource = promDs; + targets = [ + { + datasource = promDs; + expr = "avg_over_time((apcupsd_ups_load_percent / 100 * apcupsd_nominal_power_watts)[24h:]) * 24 / 1000"; + legendFormat = ""; + refId = "A"; + } + ]; + fieldConfig = { + defaults = { + unit = "kwatth"; + decimals = 2; + thresholds = { + mode = "absolute"; + steps = [ + { + color = "green"; + value = null; + } + { + color = "yellow"; + value = 5; + } + { + color = "red"; + value = 10; + } + ]; + }; + }; + overrides = [ ]; + }; + options = { + reduceOptions = { + calcs = [ "lastNotNull" ]; + fields = ""; + values = false; + }; + colorMode = "value"; + graphMode = "none"; + }; + } + { + id = 2; + type = "gauge"; + title = "UPS Load"; + gridPos = { + h = 8; + w = 6; + x = 12; + y = 0; + }; + datasource = promDs; + targets = [ + { + datasource = promDs; + expr = "apcupsd_ups_load_percent"; + refId = "A"; + } + ]; + fieldConfig = { + defaults = { + unit = "percent"; + min = 0; + max = 100; + thresholds = { + mode = "absolute"; + steps = [ + { + color = "green"; + value = null; + } + { + color = "yellow"; + value = 70; + } + { + color = "red"; + value = 90; + } + ]; + }; + }; + overrides = [ ]; + }; + options.reduceOptions = { + calcs = [ "lastNotNull" ]; + fields = ""; + values = false; + }; + } + { + id = 3; + type = "gauge"; + title = "UPS Battery"; + gridPos = { + h = 8; + w = 6; + x = 18; + y = 0; + }; + datasource = promDs; + targets = [ + { + datasource = promDs; + expr = "apcupsd_battery_charge_percent"; + refId = "A"; + } + ]; + fieldConfig = { + defaults = { + unit = "percent"; + min = 0; + max = 100; + thresholds = { + mode = "absolute"; + steps = [ + { + color = "red"; + value = null; + } + { + color = "yellow"; + value = 20; + } + { + color = "green"; + value = 50; + } + ]; + }; + }; + overrides = [ ]; + }; + options.reduceOptions = { + calcs = [ "lastNotNull" ]; + fields = ""; + values = false; + }; + } + + # -- Row 2: System -- + { + id = 4; + type = "timeseries"; + title = "CPU Temperature"; + gridPos = { + h = 8; + w = 12; + x = 0; + y = 8; + }; + datasource = promDs; + targets = [ + { + datasource = promDs; + expr = ''node_hwmon_temp_celsius{chip=~"pci.*"}''; + legendFormat = "CPU {{sensor}}"; + refId = "A"; + } + ]; + fieldConfig = { + defaults = { + unit = "celsius"; + color.mode = "palette-classic"; + custom = { + lineWidth = 2; + fillOpacity = 10; + spanNulls = true; + }; + }; + overrides = [ ]; + }; + } + { + id = 5; + type = "stat"; + title = "System Uptime"; + gridPos = { + h = 8; + w = 6; + x = 12; + y = 8; + }; + datasource = promDs; + targets = [ + { + datasource = promDs; + expr = "time() - node_boot_time_seconds"; + refId = "A"; + } + ]; + fieldConfig = { + defaults = { + unit = "s"; + thresholds = { + mode = "absolute"; + steps = [ + { + color = "green"; + value = null; + } + ]; + }; + }; + overrides = [ ]; + }; + options = { + reduceOptions = { + calcs = [ "lastNotNull" ]; + fields = ""; + values = false; + }; + colorMode = "value"; + graphMode = "none"; + }; + } + { + id = 6; + type = "stat"; + title = "Jellyfin Active Streams"; + gridPos = { + h = 8; + w = 6; + x = 18; + y = 8; + }; + datasource = promDs; + targets = [ + { + datasource = promDs; + expr = "jellyfin_active_streams"; + refId = "A"; + } + ]; + fieldConfig = { + defaults = { + thresholds = { + mode = "absolute"; + steps = [ + { + color = "green"; + value = null; + } + { + color = "yellow"; + value = 3; + } + { + color = "red"; + value = 6; + } + ]; + }; + }; + overrides = [ ]; + }; + options = { + reduceOptions = { + calcs = [ "lastNotNull" ]; + fields = ""; + values = false; + }; + colorMode = "value"; + graphMode = "area"; + }; + } + ]; + }; +in +{ + imports = [ + (lib.serviceMountWithZpool "grafana" service_configs.zpool_ssds [ + service_configs.grafana.dir + ]) + (lib.serviceFilePerms "grafana" [ + "Z ${service_configs.grafana.dir} 0700 grafana grafana" + ]) + (lib.serviceMountWithZpool "prometheus" service_configs.zpool_ssds [ + "/var/lib/prometheus" + ]) + (lib.serviceFilePerms "prometheus" [ + "Z /var/lib/prometheus 0700 prometheus prometheus" + ]) + ]; + + # -- Prometheus -- + services.prometheus = { + enable = true; + port = service_configs.ports.private.prometheus.port; + listenAddress = "127.0.0.1"; + stateDir = "prometheus"; + retentionTime = "90d"; + + exporters = { + node = { + enable = true; + port = service_configs.ports.private.prometheus_node.port; + listenAddress = "127.0.0.1"; + enabledCollectors = [ + "hwmon" + "systemd" + "textfile" + ]; + extraFlags = [ + "--collector.textfile.directory=${textfileDir}" + ]; + }; + + apcupsd = { + enable = true; + port = service_configs.ports.private.prometheus_apcupsd.port; + listenAddress = "127.0.0.1"; + apcupsdAddress = "127.0.0.1:3551"; + }; + }; + + scrapeConfigs = [ + { + job_name = "prometheus"; + static_configs = [ + { targets = [ "127.0.0.1:${toString service_configs.ports.private.prometheus.port}" ]; } + ]; + } + { + job_name = "node"; + static_configs = [ + { targets = [ "127.0.0.1:${toString service_configs.ports.private.prometheus_node.port}" ]; } + ]; + } + { + job_name = "apcupsd"; + static_configs = [ + { targets = [ "127.0.0.1:${toString service_configs.ports.private.prometheus_apcupsd.port}" ]; } + ]; + } + ]; + }; + + # -- Grafana -- + services.grafana = { + enable = true; + dataDir = service_configs.grafana.dir; + + settings = { + server = { + http_addr = "127.0.0.1"; + http_port = service_configs.ports.private.grafana.port; + domain = service_configs.grafana.domain; + root_url = "https://${service_configs.grafana.domain}"; + }; + + # Caddy handles auth -- disable Grafana login entirely + "auth.anonymous" = { + enabled = true; + org_role = "Admin"; + }; + "auth.basic".enabled = false; + "auth".disable_login_form = true; + + analytics.reporting_enabled = false; + }; + + provision = { + datasources.settings = { + apiVersion = 1; + datasources = [ + { + name = "Prometheus"; + type = "prometheus"; + url = "http://127.0.0.1:${toString service_configs.ports.private.prometheus.port}"; + access = "proxy"; + isDefault = true; + editable = false; + uid = "prometheus"; + } + ]; + }; + + dashboards.settings.providers = [ + { + name = "system"; + type = "file"; + options.path = "/etc/grafana-dashboards"; + disableDeletion = true; + updateIntervalSeconds = 60; + } + ]; + }; + }; + + # Provision dashboard JSON + environment.etc."grafana-dashboards/system-overview.json" = { + text = builtins.toJSON dashboard; + mode = "0444"; + }; + + # Caddy reverse proxy with auth + services.caddy.virtualHosts."${service_configs.grafana.domain}".extraConfig = '' + import ${config.age.secrets.caddy_auth.path} + reverse_proxy :${builtins.toString service_configs.ports.private.grafana.port} + ''; + + # -- Jellyfin metrics collector -- + # Queries the Jellyfin API for active streams and writes a .prom file + # for the node_exporter textfile collector. + systemd.services.jellyfin-metrics-collector = { + description = "Collect Jellyfin metrics for Prometheus"; + after = [ "network.target" ]; + serviceConfig = { + Type = "oneshot"; + ExecStart = lib.getExe jellyfinCollector; + LoadCredential = "jellyfin-api-key:${config.age.secrets.jellyfin-api-key.path}"; + }; + }; + + systemd.timers.jellyfin-metrics-collector = { + wantedBy = [ "timers.target" ]; + timerConfig = { + OnCalendar = "*:*:0/30"; + RandomizedDelaySec = "5s"; + }; + }; + + # Ensure textfile collector directory exists (tmpfs root -- recreated on boot) + systemd.tmpfiles.rules = [ + "d ${textfileDir} 0755 root root -" + ]; +}