From 0aeb6c5523e0c047487a069f0a15b8a37c1ee04f Mon Sep 17 00:00:00 2001 From: Simon Gardling Date: Thu, 2 Apr 2026 18:02:23 -0400 Subject: [PATCH] llama-cpp: add API key auth via --api-key-file Generate and encrypt a Bearer token for llama-cpp's built-in auth. Remove caddy_auth from the vhost since basic auth blocks Bearer-only clients. Internal sidecars (xmrig-pause, annotations) connect directly to localhost and are unaffected (/slots is public). --- modules/age-secrets.nix | 8 ++++++++ secrets/llama-cpp-api-key.age | Bin 0 -> 299 bytes services/llama-cpp.nix | 6 +++++- 3 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 secrets/llama-cpp-api-key.age diff --git a/modules/age-secrets.nix b/modules/age-secrets.nix index 2effde8..63c612d 100644 --- a/modules/age-secrets.nix +++ b/modules/age-secrets.nix @@ -159,5 +159,13 @@ owner = "gitea-runner"; group = "gitea-runner"; }; + + # llama-cpp API key for bearer token auth + llama-cpp-api-key = { + file = ../secrets/llama-cpp-api-key.age; + mode = "0400"; + owner = "root"; + group = "root"; + }; }; } diff --git a/secrets/llama-cpp-api-key.age b/secrets/llama-cpp-api-key.age new file mode 100644 index 0000000000000000000000000000000000000000..354f21164f62f605d8f974a44128dbddb79c69c8 GIT binary patch literal 299 zcmZQ@_Y83kiVO&0(7yTi+Q%)wS6TLSC(N1IbxUB=OLM^^X)euIerG>AJK0~emgluH z`$@gMr^C#*r*E5WfA_L%j_<*&q_7MQ`;*a`Y0?^pEyFsiE3!(&ezAVzldy^0d2fDr zMwXP`C;8~Br2#KiZ+zo(`L>Dg{glA;-WJ}QQ-1Aupkb(>TOP3Lvd7Zjkp{j`cFkJ& zY-`}>^S-_(+szB*Zr(ZQsIdGhN95c2rCFRxS!z*1EAnowQegdev1oPHjSp&DX1!Pb z%o+J_zlGN))hBm9`C5fkomV`-S{cCMB!5la@^DLt@h2N)_FD_uYgUv?NcDJ`J-)i? zKr^qa!1JuWmX2AQFX)|>jw(7_F12TFadT_d;bm#Ziw(U`>@9V))|1N6tospvs5x48 IVkxT+02t|z82|tP literal 0 HcmV?d00001 diff --git a/services/llama-cpp.nix b/services/llama-cpp.nix index 22a766e..e1e6337 100644 --- a/services/llama-cpp.nix +++ b/services/llama-cpp.nix @@ -29,14 +29,18 @@ "turbo4" "-fa" "on" + "--api-key-file" + config.age.secrets.llama-cpp-api-key.path ]; }; # have to do this in order to get vulkan to work systemd.services.llama-cpp.serviceConfig.DynamicUser = lib.mkForce false; + # Auth handled by llama-cpp --api-key-file (Bearer token). + # No caddy_auth — the API key is the auth layer, and caddy_auth's basic + # auth would block Bearer-only clients like oh-my-pi. services.caddy.virtualHosts."llm.${service_configs.https.domain}".extraConfig = '' - import ${config.age.secrets.caddy_auth.path} reverse_proxy :${toString config.services.llama-cpp.port} ''; }