llama.cpp: fail2ban for invalid api keys
This commit is contained in:
@@ -20,6 +20,48 @@ in
|
||||
})
|
||||
];
|
||||
|
||||
# Per-vhost Caddy access log for fail2ban to tail. llama.cpp's own
|
||||
# "Invalid API Key" warning has no client IP, and behind Caddy the
|
||||
# llama-server access log only sees 127.0.0.1. Caddy's JSON log has
|
||||
# the real client IP via request.remote_ip.
|
||||
services.caddy.virtualHosts."llm.${service_configs.https.domain}".extraConfig = ''
|
||||
log {
|
||||
output file /var/log/caddy/access-llama-cpp.log
|
||||
format json
|
||||
}
|
||||
'';
|
||||
|
||||
# Ensure the log file exists on boot so fail2ban can start before Caddy
|
||||
# has received its first request.
|
||||
systemd.tmpfiles.rules = [
|
||||
"d /var/log/caddy 755 caddy caddy"
|
||||
"f /var/log/caddy/access-llama-cpp.log 644 caddy caddy"
|
||||
];
|
||||
|
||||
# Ban IPs that repeatedly fail API key validation. llama.cpp's public
|
||||
# endpoints (/, /index.html, /bundle.{js,css}, /health, /v1/models,
|
||||
# /v1/health, /models, /api/tags, /props) bypass auth, so any 401 on
|
||||
# this vhost is an authenticated-endpoint failure -- no need to filter
|
||||
# on the Authorization header the way caddy-auth does.
|
||||
services.fail2ban.jails.llama-cpp = {
|
||||
enabled = true;
|
||||
settings = {
|
||||
backend = "auto";
|
||||
port = "http,https";
|
||||
logpath = "/var/log/caddy/access-llama-cpp.log";
|
||||
# defaults: maxretry=5, findtime=10m, bantime=10m
|
||||
|
||||
# NAT hairpinning sends LAN traffic via the router IP. Don't ban
|
||||
# 192.168.1.0/24 or we lock ourselves out.
|
||||
ignoreip = "127.0.0.1/8 ::1 192.168.1.0/24";
|
||||
};
|
||||
filter.Definition = {
|
||||
failregex = ''^.*"remote_ip":"<HOST>".*"status":401.*$'';
|
||||
ignoreregex = "";
|
||||
datepattern = ''"ts":{Epoch}\.'';
|
||||
};
|
||||
};
|
||||
|
||||
services.llama-cpp = {
|
||||
enable = true;
|
||||
model = toString (
|
||||
|
||||
103
tests/fail2ban-llama-cpp.nix
Normal file
103
tests/fail2ban-llama-cpp.nix
Normal file
@@ -0,0 +1,103 @@
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
pkgs.testers.runNixOSTest {
|
||||
name = "fail2ban-llama-cpp";
|
||||
|
||||
nodes = {
|
||||
server =
|
||||
{
|
||||
config,
|
||||
pkgs,
|
||||
lib,
|
||||
...
|
||||
}:
|
||||
{
|
||||
imports = [
|
||||
../modules/server-security.nix
|
||||
];
|
||||
|
||||
# Minimal Caddy that stands in for the llama-cpp reverse_proxy.
|
||||
# Every request returns 401, mimicking llama.cpp's api-key middleware
|
||||
# on an invalid key. We only care that Caddy writes the 401 with the
|
||||
# real client IP to the same access log the production jail tails.
|
||||
services.caddy = {
|
||||
enable = true;
|
||||
virtualHosts.":80".extraConfig = ''
|
||||
log {
|
||||
output file /var/log/caddy/access-llama-cpp.log
|
||||
format json
|
||||
}
|
||||
respond "Invalid API Key" 401
|
||||
'';
|
||||
};
|
||||
|
||||
# Jail definition mirrors services/llama-cpp.nix. ignoreip omitted
|
||||
# so the test VM subnet isn't exempted; maxretry lowered for speed.
|
||||
services.fail2ban.jails.llama-cpp = {
|
||||
enabled = true;
|
||||
settings = {
|
||||
backend = "auto";
|
||||
port = "http,https";
|
||||
logpath = "/var/log/caddy/access-llama-cpp.log";
|
||||
maxretry = 3;
|
||||
};
|
||||
filter.Definition = {
|
||||
failregex = ''^.*"remote_ip":"<HOST>".*"status":401.*$'';
|
||||
ignoreregex = "";
|
||||
datepattern = ''"ts":{Epoch}\.'';
|
||||
};
|
||||
};
|
||||
|
||||
systemd.tmpfiles.rules = [
|
||||
"d /var/log/caddy 755 caddy caddy"
|
||||
"f /var/log/caddy/access-llama-cpp.log 644 caddy caddy"
|
||||
];
|
||||
|
||||
networking.firewall.allowedTCPPorts = [ 80 ];
|
||||
};
|
||||
|
||||
client = {
|
||||
environment.systemPackages = [ pkgs.curl ];
|
||||
};
|
||||
};
|
||||
|
||||
testScript = ''
|
||||
import time
|
||||
import re
|
||||
|
||||
start_all()
|
||||
server.wait_for_unit("caddy.service")
|
||||
server.wait_for_unit("fail2ban.service")
|
||||
server.wait_for_open_port(80)
|
||||
time.sleep(2)
|
||||
|
||||
with subtest("Verify llama-cpp jail is active"):
|
||||
status = server.succeed("fail2ban-client status")
|
||||
assert "llama-cpp" in status, f"llama-cpp jail not found in: {status}"
|
||||
|
||||
with subtest("Generate failed API key attempts"):
|
||||
# Force IPv4 for consistent IP tracking across the NAT fabric.
|
||||
for i in range(4):
|
||||
client.execute(
|
||||
"curl -4 -s -H 'Authorization: Bearer badkey' http://server/v1/chat/completions || true"
|
||||
)
|
||||
time.sleep(1)
|
||||
|
||||
with subtest("Verify IP is banned"):
|
||||
time.sleep(5)
|
||||
status = server.succeed("fail2ban-client status llama-cpp")
|
||||
print(f"llama-cpp jail status: {status}")
|
||||
match = re.search(r"Currently banned:\s*(\d+)", status)
|
||||
assert match and int(match.group(1)) >= 1, (
|
||||
f"Expected at least 1 banned IP, got: {status}"
|
||||
)
|
||||
|
||||
with subtest("Verify banned client cannot connect"):
|
||||
exit_code = client.execute("curl -4 -s --max-time 3 http://server/ 2>&1")[0]
|
||||
assert exit_code != 0, "Connection should be blocked"
|
||||
'';
|
||||
}
|
||||
@@ -21,6 +21,7 @@ in
|
||||
fail2banVaultwardenTest = handleTest ./fail2ban-vaultwarden.nix;
|
||||
fail2banImmichTest = handleTest ./fail2ban-immich.nix;
|
||||
fail2banJellyfinTest = handleTest ./fail2ban-jellyfin.nix;
|
||||
fail2banLlamaCppTest = handleTest ./fail2ban-llama-cpp.nix;
|
||||
|
||||
# jellyfin annotation service test
|
||||
jellyfinAnnotationsTest = handleTest ./jellyfin-annotations.nix;
|
||||
|
||||
Reference in New Issue
Block a user