From 0a927ea893366de32aab1b2ec75e2c0b78911c7c Mon Sep 17 00:00:00 2001 From: Simon Gardling Date: Mon, 6 Apr 2026 02:12:46 -0400 Subject: [PATCH] llama-cpp: maybe use vulkan? --- flake.lock | 12 ++++++------ flake.nix | 3 ++- services/llama-cpp/llama-cpp.nix | 6 +++--- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/flake.lock b/flake.lock index f081ed1..f0331e1 100644 --- a/flake.lock +++ b/flake.lock @@ -325,16 +325,16 @@ ] }, "locked": { - "lastModified": 1775236905, - "narHash": "sha256-tHshzR/k6D/r5UhJCfJ9b/mJgsbn7ODtnZrDlimhOOI=", - "owner": "TheTom", + "lastModified": 1774922513, + "narHash": "sha256-TKk1i8AZzxy4/z0MkqKxoGf/CQDvoL+jo8JDtZeCRy8=", + "owner": "apollosenvy", "repo": "llama-cpp-turboquant", - "rev": "bc05a6803e48f17e0f2c7a99fce9b50d03882de7", + "rev": "9e80e93ceb115bc5055997c373d8c09bfa47a565", "type": "github" }, "original": { - "owner": "TheTom", - "ref": "feature/turboquant-kv-cache", + "owner": "apollosenvy", + "ref": "pr/vulkan-turbo3", "repo": "llama-cpp-turboquant", "type": "github" } diff --git a/flake.nix b/flake.nix index 56319ee..728fa02 100644 --- a/flake.nix +++ b/flake.nix @@ -29,7 +29,8 @@ }; llamacpp = { - url = "github:TheTom/llama-cpp-turboquant/feature/turboquant-kv-cache"; + # url = "github:TheTom/llama-cpp-turboquant/feature/turboquant-kv-cache"; + url = "github:apollosenvy/llama-cpp-turboquant/pr/vulkan-turbo3"; inputs.nixpkgs.follows = "nixpkgs"; }; diff --git a/services/llama-cpp/llama-cpp.nix b/services/llama-cpp/llama-cpp.nix index 1691d35..72d8725 100644 --- a/services/llama-cpp/llama-cpp.nix +++ b/services/llama-cpp/llama-cpp.nix @@ -23,10 +23,10 @@ in ); port = service_configs.ports.private.llama_cpp.port; host = "0.0.0.0"; - package = (lib.optimizePackage inputs.llamacpp.packages.${pkgs.system}.default); + package = (lib.optimizePackage inputs.llamacpp.packages.${pkgs.system}.vulkan); extraFlags = [ - # "-ngl" - # "12" + "-ngl" + "999" "-c" "65536" "-ctk"