diff --git a/configuration.nix b/configuration.nix index 515fa79..f318840 100644 --- a/configuration.nix +++ b/configuration.nix @@ -45,6 +45,8 @@ ./services/soulseek.nix + ./services/llama-cpp.nix + ./services/ups.nix ./services/bitwarden.nix diff --git a/flake.lock b/flake.lock index c0185ea..7125090 100644 --- a/flake.lock +++ b/flake.lock @@ -150,6 +150,24 @@ "type": "github" } }, + "flake-parts": { + "inputs": { + "nixpkgs-lib": "nixpkgs-lib" + }, + "locked": { + "lastModified": 1730504689, + "narHash": "sha256-hgmguH29K2fvs9szpq2r3pz2/8cJd2LPS+b4tfNFCwE=", + "owner": "hercules-ci", + "repo": "flake-parts", + "rev": "506278e768c2a08bec68eb62932193e341f55c90", + "type": "github" + }, + "original": { + "owner": "hercules-ci", + "repo": "flake-parts", + "type": "github" + } + }, "flake-utils": { "inputs": { "systems": "systems_4" @@ -276,6 +294,27 @@ "type": "github" } }, + "llamacpp": { + "inputs": { + "flake-parts": "flake-parts", + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1774806340, + "narHash": "sha256-KC0ZkqR8HYHOjQbX3+kxIPv0fsYcjNFYDU9WgZZwNE4=", + "owner": "ggml-org", + "repo": "llama.cpp", + "rev": "7c203670f8d746382247ed369fea7fbf10df8ae0", + "type": "github" + }, + "original": { + "owner": "ggml-org", + "repo": "llama.cpp", + "type": "github" + } + }, "nix-minecraft": { "inputs": { "flake-compat": "flake-compat_3", @@ -330,6 +369,18 @@ "type": "github" } }, + "nixpkgs-lib": { + "locked": { + "lastModified": 1730504152, + "narHash": "sha256-lXvH/vOfb4aGYyvFmZK/HlsNsr/0CVWlwYvo2rxJk3s=", + "type": "tarball", + "url": "https://github.com/NixOS/nixpkgs/archive/cc2f28000298e1269cea6612cd06ec9979dd5d7f.tar.gz" + }, + "original": { + "type": "tarball", + "url": "https://github.com/NixOS/nixpkgs/archive/cc2f28000298e1269cea6612cd06ec9979dd5d7f.tar.gz" + } + }, "nixpkgs-p2pool-module": { "flake": false, "locked": { @@ -395,6 +446,7 @@ "home-manager": "home-manager", "impermanence": "impermanence", "lanzaboote": "lanzaboote", + "llamacpp": "llamacpp", "nix-minecraft": "nix-minecraft", "nixos-hardware": "nixos-hardware", "nixpkgs": "nixpkgs", diff --git a/flake.nix b/flake.nix index e8c4ba9..ec32b6b 100644 --- a/flake.nix +++ b/flake.nix @@ -28,6 +28,11 @@ inputs.nixpkgs.follows = "nixpkgs"; }; + llamacpp = { + url = "github:ggml-org/llama.cpp"; + inputs.nixpkgs.follows = "nixpkgs"; + }; + srvos = { url = "github:nix-community/srvos"; inputs.nixpkgs.follows = "nixpkgs"; diff --git a/service-configs.nix b/service-configs.nix index ea35cf5..7ad7830 100644 --- a/service-configs.nix +++ b/service-configs.nix @@ -149,6 +149,10 @@ rec { port = 5000; proto = "tcp"; }; + llama_cpp = { + port = 6688; + proto = "tcp"; + }; }; }; diff --git a/services/llama-cpp.nix b/services/llama-cpp.nix new file mode 100644 index 0000000..de2501c --- /dev/null +++ b/services/llama-cpp.nix @@ -0,0 +1,36 @@ +{ + pkgs, + service_configs, + config, + inputs, + lib, + ... +}: +{ + services.llama-cpp = { + enable = true; + model = builtins.toString ( + pkgs.fetchurl { + url = "https://huggingface.co/mradermacher/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled-GGUF/resolve/main/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled.Q5_K_M.gguf"; + sha256 = "1b08df702dc729104de8894d3f6c6f52505e9f07c5d9b236b3efda3ae187bda2"; + } + ); + port = service_configs.ports.private.llama_cpp.port; + host = "0.0.0.0"; + package = (lib.optimizePackage inputs.llamacpp.packages.${pkgs.system}.vulkan); + extraFlags = [ + "-ngl" + "12" + "-c" + "16384" + ]; + }; + + # have to do this in order to get vulkan to work + systemd.services.llama-cpp.serviceConfig.DynamicUser = lib.mkForce false; + + services.caddy.virtualHosts."llm.${service_configs.https.domain}".extraConfig = '' + import ${config.age.secrets.caddy_auth.path} + reverse_proxy :${builtins.toString config.services.llama-cpp.port} + ''; +}