diff --git a/modules/age-secrets.nix b/modules/age-secrets.nix index 2effde8..63c612d 100644 --- a/modules/age-secrets.nix +++ b/modules/age-secrets.nix @@ -159,5 +159,13 @@ owner = "gitea-runner"; group = "gitea-runner"; }; + + # llama-cpp API key for bearer token auth + llama-cpp-api-key = { + file = ../secrets/llama-cpp-api-key.age; + mode = "0400"; + owner = "root"; + group = "root"; + }; }; } diff --git a/secrets/llama-cpp-api-key.age b/secrets/llama-cpp-api-key.age new file mode 100644 index 0000000..354f211 Binary files /dev/null and b/secrets/llama-cpp-api-key.age differ diff --git a/services/llama-cpp.nix b/services/llama-cpp.nix index 22a766e..e1e6337 100644 --- a/services/llama-cpp.nix +++ b/services/llama-cpp.nix @@ -29,14 +29,18 @@ "turbo4" "-fa" "on" + "--api-key-file" + config.age.secrets.llama-cpp-api-key.path ]; }; # have to do this in order to get vulkan to work systemd.services.llama-cpp.serviceConfig.DynamicUser = lib.mkForce false; + # Auth handled by llama-cpp --api-key-file (Bearer token). + # No caddy_auth — the API key is the auth layer, and caddy_auth's basic + # auth would block Bearer-only clients like oh-my-pi. services.caddy.virtualHosts."llm.${service_configs.https.domain}".extraConfig = '' - import ${config.age.secrets.caddy_auth.path} reverse_proxy :${toString config.services.llama-cpp.port} ''; }