llama-cpp: patch for vulkan support instead
Some checks failed
Build and Deploy / deploy (push) Failing after 58s

This commit is contained in:
2026-04-07 20:04:06 -04:00
parent fdc1596bce
commit 76acd10371
4 changed files with 20983 additions and 9 deletions

12
flake.lock generated
View File

@@ -325,16 +325,16 @@
] ]
}, },
"locked": { "locked": {
"lastModified": 1774922513, "lastModified": 1775603401,
"narHash": "sha256-TKk1i8AZzxy4/z0MkqKxoGf/CQDvoL+jo8JDtZeCRy8=", "narHash": "sha256-kp+cnqLX+K4M6gBc5Iy4S+G0xkz78qVEcO1xmNTrtgM=",
"owner": "apollosenvy", "owner": "TheTom",
"repo": "llama-cpp-turboquant", "repo": "llama-cpp-turboquant",
"rev": "9e80e93ceb115bc5055997c373d8c09bfa47a565", "rev": "a4e8af4455d34d4872f967e615c8212643c2123e",
"type": "github" "type": "github"
}, },
"original": { "original": {
"owner": "apollosenvy", "owner": "TheTom",
"ref": "pr/vulkan-turbo3", "ref": "feature/turboquant-kv-cache",
"repo": "llama-cpp-turboquant", "repo": "llama-cpp-turboquant",
"type": "github" "type": "github"
} }

View File

@@ -29,8 +29,7 @@
}; };
llamacpp = { llamacpp = {
# url = "github:TheTom/llama-cpp-turboquant/feature/turboquant-kv-cache"; url = "github:TheTom/llama-cpp-turboquant/feature/turboquant-kv-cache";
url = "github:apollosenvy/llama-cpp-turboquant/pr/vulkan-turbo3";
inputs.nixpkgs.follows = "nixpkgs"; inputs.nixpkgs.follows = "nixpkgs";
}; };

File diff suppressed because it is too large Load Diff

View File

@@ -23,7 +23,11 @@ in
); );
port = service_configs.ports.private.llama_cpp.port; port = service_configs.ports.private.llama_cpp.port;
host = "0.0.0.0"; host = "0.0.0.0";
package = (lib.optimizePackage inputs.llamacpp.packages.${pkgs.system}.vulkan); package = lib.optimizePackage (
inputs.llamacpp.packages.${pkgs.system}.vulkan.overrideAttrs (old: {
patches = (old.patches or [ ]) ++ [ ../patches/0002-llamacpp-vulkan-turbo3.patch ];
})
);
extraFlags = [ extraFlags = [
"-ngl" "-ngl"
"999" "999"