llama.cpp: things

2026-04-11 10:27:38 -04:00
parent dad3867144
commit 12469de580
2 changed files with 2 additions and 91 deletions
--- a/services/llama-cpp.nix
+++ b/services/llama-cpp.nix
@@ -9,7 +9,7 @@
 }:
 let
  cfg = config.services.llama-cpp;
-  modelUrl = "https://huggingface.co/bartowski/google_gemma-4-E2B-it-GGUF/resolve/main/google_gemma-4-E2B-it-Q4_K_M.gguf";
+  modelUrl = "https://huggingface.co/bartowski/google_gemma-4-E2B-it-GGUF/resolve/main/google_gemma-4-E2B-it-IQ2_M.gguf";
  modelAlias = lib.removeSuffix ".gguf" (baseNameOf modelUrl);
 in
 {
@@ -25,7 +25,7 @@ in
    model = toString (
      pkgs.fetchurl {
        url = modelUrl;
-        sha256 = "5efe645db4e1909c7a1f4a9608df18e6c14383f5e86777fc49f769f9ba7d5fdf";
+        sha256 = "17e869ac54d0e59faa884d5319fc55ad84cd866f50f0b3073fbb25accc875a23";
      }
    );
    port = service_configs.ports.private.llama_cpp.port;
@@ -33,7 +33,6 @@ in
    package = lib.optimizePackage (
      inputs.llamacpp.packages.${pkgs.system}.vulkan.overrideAttrs (old: {
        patches = (old.patches or [ ]) ++ [
-          ../patches/llamacpp/0003-gemma4-tokenizer-fix.patch
        ];
      })
    );