Compare commits

..

2 Commits

Author SHA1 Message Date
6d47f02a0f llama-cpp: set batch size to 4096
All checks were successful
Build and Deploy / deploy (push) Successful in 1m22s
2026-04-06 02:29:37 -04:00
9addb1569a Revert "llama-cpp: maybe use vulkan?"
This reverts commit 0a927ea893.
2026-04-06 02:28:26 -04:00
3 changed files with 14 additions and 11 deletions

12
flake.lock generated
View File

@@ -325,16 +325,16 @@
]
},
"locked": {
"lastModified": 1774922513,
"narHash": "sha256-TKk1i8AZzxy4/z0MkqKxoGf/CQDvoL+jo8JDtZeCRy8=",
"owner": "apollosenvy",
"lastModified": 1775236905,
"narHash": "sha256-tHshzR/k6D/r5UhJCfJ9b/mJgsbn7ODtnZrDlimhOOI=",
"owner": "TheTom",
"repo": "llama-cpp-turboquant",
"rev": "9e80e93ceb115bc5055997c373d8c09bfa47a565",
"rev": "bc05a6803e48f17e0f2c7a99fce9b50d03882de7",
"type": "github"
},
"original": {
"owner": "apollosenvy",
"ref": "pr/vulkan-turbo3",
"owner": "TheTom",
"ref": "feature/turboquant-kv-cache",
"repo": "llama-cpp-turboquant",
"type": "github"
}

View File

@@ -29,8 +29,7 @@
};
llamacpp = {
# url = "github:TheTom/llama-cpp-turboquant/feature/turboquant-kv-cache";
url = "github:apollosenvy/llama-cpp-turboquant/pr/vulkan-turbo3";
url = "github:TheTom/llama-cpp-turboquant/feature/turboquant-kv-cache";
inputs.nixpkgs.follows = "nixpkgs";
};

View File

@@ -23,10 +23,10 @@ in
);
port = service_configs.ports.private.llama_cpp.port;
host = "0.0.0.0";
package = (lib.optimizePackage inputs.llamacpp.packages.${pkgs.system}.vulkan);
package = (lib.optimizePackage inputs.llamacpp.packages.${pkgs.system}.default);
extraFlags = [
"-ngl"
"999"
# "-ngl"
# "12"
"-c"
"65536"
"-ctk"
@@ -40,6 +40,10 @@ in
"--metrics"
"--alias"
modelAlias
"-b"
"4096"
"-ub"
"4096"
];
};