llama-cpp: maybe use vulkan?
All checks were successful
Build and Deploy / deploy (push) Successful in 8m30s
All checks were successful
Build and Deploy / deploy (push) Successful in 8m30s
This commit is contained in:
12
flake.lock
generated
12
flake.lock
generated
@@ -325,16 +325,16 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1775236905,
|
"lastModified": 1774922513,
|
||||||
"narHash": "sha256-tHshzR/k6D/r5UhJCfJ9b/mJgsbn7ODtnZrDlimhOOI=",
|
"narHash": "sha256-TKk1i8AZzxy4/z0MkqKxoGf/CQDvoL+jo8JDtZeCRy8=",
|
||||||
"owner": "TheTom",
|
"owner": "apollosenvy",
|
||||||
"repo": "llama-cpp-turboquant",
|
"repo": "llama-cpp-turboquant",
|
||||||
"rev": "bc05a6803e48f17e0f2c7a99fce9b50d03882de7",
|
"rev": "9e80e93ceb115bc5055997c373d8c09bfa47a565",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
"owner": "TheTom",
|
"owner": "apollosenvy",
|
||||||
"ref": "feature/turboquant-kv-cache",
|
"ref": "pr/vulkan-turbo3",
|
||||||
"repo": "llama-cpp-turboquant",
|
"repo": "llama-cpp-turboquant",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -29,7 +29,8 @@
|
|||||||
};
|
};
|
||||||
|
|
||||||
llamacpp = {
|
llamacpp = {
|
||||||
url = "github:TheTom/llama-cpp-turboquant/feature/turboquant-kv-cache";
|
# url = "github:TheTom/llama-cpp-turboquant/feature/turboquant-kv-cache";
|
||||||
|
url = "github:apollosenvy/llama-cpp-turboquant/pr/vulkan-turbo3";
|
||||||
inputs.nixpkgs.follows = "nixpkgs";
|
inputs.nixpkgs.follows = "nixpkgs";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -23,10 +23,10 @@ in
|
|||||||
);
|
);
|
||||||
port = service_configs.ports.private.llama_cpp.port;
|
port = service_configs.ports.private.llama_cpp.port;
|
||||||
host = "0.0.0.0";
|
host = "0.0.0.0";
|
||||||
package = (lib.optimizePackage inputs.llamacpp.packages.${pkgs.system}.default);
|
package = (lib.optimizePackage inputs.llamacpp.packages.${pkgs.system}.vulkan);
|
||||||
extraFlags = [
|
extraFlags = [
|
||||||
# "-ngl"
|
"-ngl"
|
||||||
# "12"
|
"999"
|
||||||
"-c"
|
"-c"
|
||||||
"65536"
|
"65536"
|
||||||
"-ctk"
|
"-ctk"
|
||||||
|
|||||||
Reference in New Issue
Block a user