llama-cpp: remove folder

2026-04-06 12:48:28 -04:00
parent 6d47f02a0f
commit a12dcb01ec
3 changed files with 1 additions and 6 deletions
--- a/services/llama-cpp.nix
+++ b/services/llama-cpp.nix
@@ -0,0 +1,74 @@
+{
+  pkgs,
+  service_configs,
+  config,
+  inputs,
+  lib,
+  utils,
+  ...
+}:
+let
+  cfg = config.services.llama-cpp;
+  modelUrl = "https://huggingface.co/bartowski/google_gemma-4-E2B-it-GGUF/resolve/main/google_gemma-4-E2B-it-Q4_K_M.gguf";
+  modelAlias = lib.removeSuffix ".gguf" (baseNameOf modelUrl);
+in
+{
+  services.llama-cpp = {
+    enable = true;
+    model = toString (
+      pkgs.fetchurl {
+        url = modelUrl;
+        sha256 = "5efe645db4e1909c7a1f4a9608df18e6c14383f5e86777fc49f769f9ba7d5fdf";
+      }
+    );
+    port = service_configs.ports.private.llama_cpp.port;
+    host = "0.0.0.0";
+    package = (lib.optimizePackage inputs.llamacpp.packages.${pkgs.system}.default);
+    extraFlags = [
+      # "-ngl"
+      # "12"
+      "-c"
+      "65536"
+      "-ctk"
+      "turbo3"
+      "-ctv"
+      "turbo3"
+      "-fa"
+      "on"
+      "--api-key-file"
+      config.age.secrets.llama-cpp-api-key.path
+      "--metrics"
+      "--alias"
+      modelAlias
+      "-b"
+      "4096"
+      "-ub"
+      "4096"
+    ];
+  };
+
+  # have to do this in order to get vulkan to work
+  systemd.services.llama-cpp.serviceConfig.DynamicUser = lib.mkForce false;
+
+  # llama-server tries to create ~/.cache; ProtectSystem=strict + impermanent
+  # root make /root read-only. Give it a writable cache dir and point HOME there.
+  systemd.services.llama-cpp.serviceConfig.CacheDirectory = "llama-cpp";
+  systemd.services.llama-cpp.environment.HOME = "/var/cache/llama-cpp";
+
+  # upstream module hardcodes --log-disable; override ExecStart to keep logs
+  # so we can see prompt processing progress via journalctl
+  systemd.services.llama-cpp.serviceConfig.ExecStart = lib.mkForce (
+    "${cfg.package}/bin/llama-server"
+    + " --host ${cfg.host}"
+    + " --port ${toString cfg.port}"
+    + " -m ${cfg.model}"
+    + " ${utils.escapeSystemdExecArgs cfg.extraFlags}"
+  );
+
+  # Auth handled by llama-cpp --api-key-file (Bearer token).
+  # No caddy_auth — the API key is the auth layer, and caddy_auth's basic
+  # auth would block Bearer-only clients like oh-my-pi.
+  services.caddy.virtualHosts."llm.${service_configs.https.domain}".extraConfig = ''
+    reverse_proxy :${toString config.services.llama-cpp.port}
+  '';
+}