diff --git a/services/llama-cpp.nix b/services/llama-cpp.nix index d9f7f76..ce6de79 100644 --- a/services/llama-cpp.nix +++ b/services/llama-cpp.nix @@ -20,6 +20,48 @@ in }) ]; + # Per-vhost Caddy access log for fail2ban to tail. llama.cpp's own + # "Invalid API Key" warning has no client IP, and behind Caddy the + # llama-server access log only sees 127.0.0.1. Caddy's JSON log has + # the real client IP via request.remote_ip. + services.caddy.virtualHosts."llm.${service_configs.https.domain}".extraConfig = '' + log { + output file /var/log/caddy/access-llama-cpp.log + format json + } + ''; + + # Ensure the log file exists on boot so fail2ban can start before Caddy + # has received its first request. + systemd.tmpfiles.rules = [ + "d /var/log/caddy 755 caddy caddy" + "f /var/log/caddy/access-llama-cpp.log 644 caddy caddy" + ]; + + # Ban IPs that repeatedly fail API key validation. llama.cpp's public + # endpoints (/, /index.html, /bundle.{js,css}, /health, /v1/models, + # /v1/health, /models, /api/tags, /props) bypass auth, so any 401 on + # this vhost is an authenticated-endpoint failure -- no need to filter + # on the Authorization header the way caddy-auth does. + services.fail2ban.jails.llama-cpp = { + enabled = true; + settings = { + backend = "auto"; + port = "http,https"; + logpath = "/var/log/caddy/access-llama-cpp.log"; + # defaults: maxretry=5, findtime=10m, bantime=10m + + # NAT hairpinning sends LAN traffic via the router IP. Don't ban + # 192.168.1.0/24 or we lock ourselves out. + ignoreip = "127.0.0.1/8 ::1 192.168.1.0/24"; + }; + filter.Definition = { + failregex = ''^.*"remote_ip":"".*"status":401.*$''; + ignoreregex = ""; + datepattern = ''"ts":{Epoch}\.''; + }; + }; + services.llama-cpp = { enable = true; model = toString ( diff --git a/tests/fail2ban-llama-cpp.nix b/tests/fail2ban-llama-cpp.nix new file mode 100644 index 0000000..33f65b4 --- /dev/null +++ b/tests/fail2ban-llama-cpp.nix @@ -0,0 +1,103 @@ +{ + config, + lib, + pkgs, + ... +}: +pkgs.testers.runNixOSTest { + name = "fail2ban-llama-cpp"; + + nodes = { + server = + { + config, + pkgs, + lib, + ... + }: + { + imports = [ + ../modules/server-security.nix + ]; + + # Minimal Caddy that stands in for the llama-cpp reverse_proxy. + # Every request returns 401, mimicking llama.cpp's api-key middleware + # on an invalid key. We only care that Caddy writes the 401 with the + # real client IP to the same access log the production jail tails. + services.caddy = { + enable = true; + virtualHosts.":80".extraConfig = '' + log { + output file /var/log/caddy/access-llama-cpp.log + format json + } + respond "Invalid API Key" 401 + ''; + }; + + # Jail definition mirrors services/llama-cpp.nix. ignoreip omitted + # so the test VM subnet isn't exempted; maxretry lowered for speed. + services.fail2ban.jails.llama-cpp = { + enabled = true; + settings = { + backend = "auto"; + port = "http,https"; + logpath = "/var/log/caddy/access-llama-cpp.log"; + maxretry = 3; + }; + filter.Definition = { + failregex = ''^.*"remote_ip":"".*"status":401.*$''; + ignoreregex = ""; + datepattern = ''"ts":{Epoch}\.''; + }; + }; + + systemd.tmpfiles.rules = [ + "d /var/log/caddy 755 caddy caddy" + "f /var/log/caddy/access-llama-cpp.log 644 caddy caddy" + ]; + + networking.firewall.allowedTCPPorts = [ 80 ]; + }; + + client = { + environment.systemPackages = [ pkgs.curl ]; + }; + }; + + testScript = '' + import time + import re + + start_all() + server.wait_for_unit("caddy.service") + server.wait_for_unit("fail2ban.service") + server.wait_for_open_port(80) + time.sleep(2) + + with subtest("Verify llama-cpp jail is active"): + status = server.succeed("fail2ban-client status") + assert "llama-cpp" in status, f"llama-cpp jail not found in: {status}" + + with subtest("Generate failed API key attempts"): + # Force IPv4 for consistent IP tracking across the NAT fabric. + for i in range(4): + client.execute( + "curl -4 -s -H 'Authorization: Bearer badkey' http://server/v1/chat/completions || true" + ) + time.sleep(1) + + with subtest("Verify IP is banned"): + time.sleep(5) + status = server.succeed("fail2ban-client status llama-cpp") + print(f"llama-cpp jail status: {status}") + match = re.search(r"Currently banned:\s*(\d+)", status) + assert match and int(match.group(1)) >= 1, ( + f"Expected at least 1 banned IP, got: {status}" + ) + + with subtest("Verify banned client cannot connect"): + exit_code = client.execute("curl -4 -s --max-time 3 http://server/ 2>&1")[0] + assert exit_code != 0, "Connection should be blocked" + ''; +} diff --git a/tests/tests.nix b/tests/tests.nix index 8493569..dda877a 100644 --- a/tests/tests.nix +++ b/tests/tests.nix @@ -21,6 +21,7 @@ in fail2banVaultwardenTest = handleTest ./fail2ban-vaultwarden.nix; fail2banImmichTest = handleTest ./fail2ban-immich.nix; fail2banJellyfinTest = handleTest ./fail2ban-jellyfin.nix; + fail2banLlamaCppTest = handleTest ./fail2ban-llama-cpp.nix; # jellyfin annotation service test jellyfinAnnotationsTest = handleTest ./jellyfin-annotations.nix;