diff --git a/flake.lock b/flake.lock index 931a4bd..f6cc9e2 100644 --- a/flake.lock +++ b/flake.lock @@ -325,16 +325,16 @@ ] }, "locked": { - "lastModified": 1775603401, - "narHash": "sha256-kp+cnqLX+K4M6gBc5Iy4S+G0xkz78qVEcO1xmNTrtgM=", - "owner": "TheTom", + "lastModified": 1774922513, + "narHash": "sha256-TKk1i8AZzxy4/z0MkqKxoGf/CQDvoL+jo8JDtZeCRy8=", + "owner": "apollosenvy", "repo": "llama-cpp-turboquant", - "rev": "a4e8af4455d34d4872f967e615c8212643c2123e", + "rev": "9e80e93ceb115bc5055997c373d8c09bfa47a565", "type": "github" }, "original": { - "owner": "TheTom", - "ref": "feature/turboquant-kv-cache", + "owner": "apollosenvy", + "ref": "pr/vulkan-turbo3", "repo": "llama-cpp-turboquant", "type": "github" } diff --git a/flake.nix b/flake.nix index 56319ee..728fa02 100644 --- a/flake.nix +++ b/flake.nix @@ -29,7 +29,8 @@ }; llamacpp = { - url = "github:TheTom/llama-cpp-turboquant/feature/turboquant-kv-cache"; + # url = "github:TheTom/llama-cpp-turboquant/feature/turboquant-kv-cache"; + url = "github:apollosenvy/llama-cpp-turboquant/pr/vulkan-turbo3"; inputs.nixpkgs.follows = "nixpkgs"; }; diff --git a/services/llama-cpp.nix b/services/llama-cpp.nix index 86d6557..1f1d834 100644 --- a/services/llama-cpp.nix +++ b/services/llama-cpp.nix @@ -23,10 +23,10 @@ in ); port = service_configs.ports.private.llama_cpp.port; host = "0.0.0.0"; - package = (lib.optimizePackage inputs.llamacpp.packages.${pkgs.system}.default); + package = (lib.optimizePackage inputs.llamacpp.packages.${pkgs.system}.vulkan); extraFlags = [ - # "-ngl" - # "12" + "-ngl" + "999" "-c" "65536" "-ctk"