diff --git a/flake.lock b/flake.lock index f0331e1..f081ed1 100644 --- a/flake.lock +++ b/flake.lock @@ -325,16 +325,16 @@ ] }, "locked": { - "lastModified": 1774922513, - "narHash": "sha256-TKk1i8AZzxy4/z0MkqKxoGf/CQDvoL+jo8JDtZeCRy8=", - "owner": "apollosenvy", + "lastModified": 1775236905, + "narHash": "sha256-tHshzR/k6D/r5UhJCfJ9b/mJgsbn7ODtnZrDlimhOOI=", + "owner": "TheTom", "repo": "llama-cpp-turboquant", - "rev": "9e80e93ceb115bc5055997c373d8c09bfa47a565", + "rev": "bc05a6803e48f17e0f2c7a99fce9b50d03882de7", "type": "github" }, "original": { - "owner": "apollosenvy", - "ref": "pr/vulkan-turbo3", + "owner": "TheTom", + "ref": "feature/turboquant-kv-cache", "repo": "llama-cpp-turboquant", "type": "github" } diff --git a/flake.nix b/flake.nix index 728fa02..56319ee 100644 --- a/flake.nix +++ b/flake.nix @@ -29,8 +29,7 @@ }; llamacpp = { - # url = "github:TheTom/llama-cpp-turboquant/feature/turboquant-kv-cache"; - url = "github:apollosenvy/llama-cpp-turboquant/pr/vulkan-turbo3"; + url = "github:TheTom/llama-cpp-turboquant/feature/turboquant-kv-cache"; inputs.nixpkgs.follows = "nixpkgs"; }; diff --git a/services/llama-cpp/llama-cpp.nix b/services/llama-cpp/llama-cpp.nix index f5e64cd..8e77a3d 100644 --- a/services/llama-cpp/llama-cpp.nix +++ b/services/llama-cpp/llama-cpp.nix @@ -23,10 +23,10 @@ in ); port = service_configs.ports.private.llama_cpp.port; host = "0.0.0.0"; - package = (lib.optimizePackage inputs.llamacpp.packages.${pkgs.system}.vulkan); + package = (lib.optimizePackage inputs.llamacpp.packages.${pkgs.system}.default); extraFlags = [ - "-ngl" - "999" + # "-ngl" + # "12" "-c" "65536" "-ctk"