server-config/services/llama-cpp-annotations.nix

{
  config,
  pkgs,
  service_configs,
  lib,
  ...
}:
{
  systemd.services.llama-cpp-annotations = {
    description = "LLM request annotation service for Grafana";
    after = [
      "network.target"
      "grafana.service"
      "llama-cpp.service"
    ];
    wantedBy = [ "multi-user.target" ];
    serviceConfig = {
      ExecStart = "${pkgs.python3}/bin/python3 ${./llama-cpp-annotations.py}";
      Restart = "always";
      RestartSec = "10s";
      DynamicUser = true;
      StateDirectory = "llama-cpp-annotations";
      NoNewPrivileges = true;
      ProtectSystem = "strict";
      ProtectHome = true;
      PrivateTmp = true;
      RestrictAddressFamilies = [
        "AF_INET"
        "AF_INET6"
      ];
      MemoryDenyWriteExecute = true;
    };
    environment = {
      LLAMA_CPP_URL = "http://127.0.0.1:${toString service_configs.ports.private.llama_cpp.port}";
      GRAFANA_URL = "http://127.0.0.1:${toString service_configs.ports.private.grafana.port}";
      STATE_FILE = "/var/lib/llama-cpp-annotations/state.json";
      POLL_INTERVAL = "5";
    };
  };
}