llama.cpp: fail2ban for invalid api keys

2026-04-20 17:20:52 -04:00
parent b1c3914b8f
commit 9ddef4bd54
3 changed files with 146 additions and 0 deletions
--- a/services/llama-cpp.nix
+++ b/services/llama-cpp.nix
@@ -20,6 +20,48 @@ in
    })
  ];

+  # Per-vhost Caddy access log for fail2ban to tail. llama.cpp's own
+  # "Invalid API Key" warning has no client IP, and behind Caddy the
+  # llama-server access log only sees 127.0.0.1. Caddy's JSON log has
+  # the real client IP via request.remote_ip.
+  services.caddy.virtualHosts."llm.${service_configs.https.domain}".extraConfig = ''
+    log {
+      output file /var/log/caddy/access-llama-cpp.log
+      format json
+    }
+  '';
+
+  # Ensure the log file exists on boot so fail2ban can start before Caddy
+  # has received its first request.
+  systemd.tmpfiles.rules = [
+    "d /var/log/caddy 755 caddy caddy"
+    "f /var/log/caddy/access-llama-cpp.log 644 caddy caddy"
+  ];
+
+  # Ban IPs that repeatedly fail API key validation. llama.cpp's public
+  # endpoints (/, /index.html, /bundle.{js,css}, /health, /v1/models,
+  # /v1/health, /models, /api/tags, /props) bypass auth, so any 401 on
+  # this vhost is an authenticated-endpoint failure -- no need to filter
+  # on the Authorization header the way caddy-auth does.
+  services.fail2ban.jails.llama-cpp = {
+    enabled = true;
+    settings = {
+      backend = "auto";
+      port = "http,https";
+      logpath = "/var/log/caddy/access-llama-cpp.log";
+      # defaults: maxretry=5, findtime=10m, bantime=10m
+
+      # NAT hairpinning sends LAN traffic via the router IP. Don't ban
+      # 192.168.1.0/24 or we lock ourselves out.
+      ignoreip = "127.0.0.1/8 ::1 192.168.1.0/24";
+    };
+    filter.Definition = {
+      failregex = ''^.*"remote_ip":"<HOST>".*"status":401.*$'';
+      ignoreregex = "";
+      datepattern = ''"ts":{Epoch}\.'';
+    };
+  };
+
  services.llama-cpp = {
    enable = true;
    model = toString (