diff --git a/flake.nix b/flake.nix index 56319ee..38833e4 100644 --- a/flake.nix +++ b/flake.nix @@ -123,7 +123,7 @@ name = "nixpkgs-patched"; src = nixpkgs; patches = [ - ./patches/0001-firefox-syncserver-add-postgresql-backend-support.patch + ./patches/nixpkgs/0001-firefox-syncserver-add-postgresql-backend-support.patch ]; }; diff --git a/patches/0002-llamacpp-vulkan-turbo3.patch b/patches/llamacpp/0002-llamacpp-vulkan-turbo3.patch similarity index 100% rename from patches/0002-llamacpp-vulkan-turbo3.patch rename to patches/llamacpp/0002-llamacpp-vulkan-turbo3.patch diff --git a/patches/llamacpp/0003-gemma4-tokenizer-fix.patch b/patches/llamacpp/0003-gemma4-tokenizer-fix.patch new file mode 100644 index 0000000..e01692a --- /dev/null +++ b/patches/llamacpp/0003-gemma4-tokenizer-fix.patch @@ -0,0 +1,88 @@ +From 320c29c2dbe3c8df56374a9ec19a7fe5c124d4f8 Mon Sep 17 00:00:00 2001 +From: Piotr Wilkin +Date: Tue, 7 Apr 2026 00:54:00 +0200 +Subject: [PATCH 1/2] YATF (Yet Another Tokenizer Fix) for Gemma 4. With tests! + +--- + convert_hf_to_gguf_update.py | 1 + + models/ggml-vocab-gemma-4.gguf | Bin 0 -> 15776467 bytes + models/ggml-vocab-gemma-4.gguf.inp | 111 +++++++++++++++++++++++++++++ + models/ggml-vocab-gemma-4.gguf.out | 46 ++++++++++++ + src/llama-vocab.cpp | 13 +++- + tests/CMakeLists.txt | 1 + + 6 files changed, 170 insertions(+), 2 deletions(-) + create mode 100644 models/ggml-vocab-gemma-4.gguf + create mode 100644 models/ggml-vocab-gemma-4.gguf.inp + create mode 100644 models/ggml-vocab-gemma-4.gguf.out + +diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py +index 086f1c22863..f1d70d62e73 100755 +--- a/convert_hf_to_gguf_update.py ++++ b/convert_hf_to_gguf_update.py +@@ -114,6 +114,7 @@ class TOKENIZER_TYPE(IntEnum): + {"name": "viking", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Viking-7B", }, # Also used for Viking 13B and 33B + {"name": "gemma", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2b", }, + {"name": "gemma-2", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2-9b", }, ++ {"name": "gemma-4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/google/gemma-4-E2B-it", }, + {"name": "jais", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/core42/jais-13b", }, + {"name": "jais-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/inceptionai/Jais-2-8B-Chat", }, + {"name": "t5", "tokt": TOKENIZER_TYPE.UGM, "repo": "https://huggingface.co/google-t5/t5-small", }, +diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp +index de9a9466bc7..e9e276ab999 100644 +--- a/src/llama-vocab.cpp ++++ b/src/llama-vocab.cpp +@@ -658,9 +658,18 @@ struct llm_tokenizer_bpe_session { + const auto token = vocab.text_to_token(str); + + if (token == LLAMA_TOKEN_NULL) { ++ static const char * hex = "0123456789ABCDEF"; + for (auto j = str.begin(); j != str.end(); ++j) { +- std::string byte_str(1, *j); +- auto token_multibyte = vocab.text_to_token(byte_str); ++ llama_token token_multibyte = LLAMA_TOKEN_NULL; ++ if (tokenizer.byte_encode) { ++ std::string byte_str(1, *j); ++ token_multibyte = vocab.text_to_token(byte_str); ++ } else { ++ // For non-byte-encoded BPE (e.g. gemma-4), byte tokens use <0xXX> format ++ const uint8_t ch = (uint8_t)*j; ++ const char buf[7] = { '<', '0', 'x', hex[ch >> 4], hex[ch & 15], '>', 0 }; ++ token_multibyte = vocab.text_to_token(buf); ++ } + if (token_multibyte != LLAMA_TOKEN_NULL) { + output.push_back(token_multibyte); + } +diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt +index 5e87c8b34e1..cd4bc5ef1d3 100644 +--- a/tests/CMakeLists.txt ++++ b/tests/CMakeLists.txt +@@ -124,6 +124,7 @@ llama_test(test-tokenizer-0 NAME test-tokenizer-0-command-r ARGS ${PROJE + llama_test(test-tokenizer-0 NAME test-tokenizer-0-deepseek-coder ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-deepseek-coder.gguf) + llama_test(test-tokenizer-0 NAME test-tokenizer-0-deepseek-llm ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-deepseek-llm.gguf) + llama_test(test-tokenizer-0 NAME test-tokenizer-0-falcon ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-falcon.gguf) ++llama_test(test-tokenizer-0 NAME test-tokenizer-0-gemma-4 ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-gemma-4.gguf) + llama_test(test-tokenizer-0 NAME test-tokenizer-0-gpt-2 ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-gpt-2.gguf) + llama_test(test-tokenizer-0 NAME test-tokenizer-0-llama-bpe ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-llama-bpe.gguf) + llama_test(test-tokenizer-0 NAME test-tokenizer-0-llama-spm ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-llama-spm.gguf) + +From 0e98596dec124c6968132ef042c21ccdb20d1304 Mon Sep 17 00:00:00 2001 +From: Piotr Wilkin +Date: Tue, 7 Apr 2026 00:58:08 +0200 +Subject: [PATCH 2/2] Remove unnecessary hash from update script. + +--- + convert_hf_to_gguf_update.py | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py +index f1d70d62e73..086f1c22863 100755 +--- a/convert_hf_to_gguf_update.py ++++ b/convert_hf_to_gguf_update.py +@@ -114,7 +114,6 @@ class TOKENIZER_TYPE(IntEnum): + {"name": "viking", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Viking-7B", }, # Also used for Viking 13B and 33B + {"name": "gemma", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2b", }, + {"name": "gemma-2", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2-9b", }, +- {"name": "gemma-4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/google/gemma-4-E2B-it", }, + {"name": "jais", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/core42/jais-13b", }, + {"name": "jais-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/inceptionai/Jais-2-8B-Chat", }, + {"name": "t5", "tokt": TOKENIZER_TYPE.UGM, "repo": "https://huggingface.co/google-t5/t5-small", }, diff --git a/patches/0001-firefox-syncserver-add-postgresql-backend-support.patch b/patches/nixpkgs/0001-firefox-syncserver-add-postgresql-backend-support.patch similarity index 100% rename from patches/0001-firefox-syncserver-add-postgresql-backend-support.patch rename to patches/nixpkgs/0001-firefox-syncserver-add-postgresql-backend-support.patch diff --git a/services/llama-cpp.nix b/services/llama-cpp.nix index cdf9955..c6056d2 100644 --- a/services/llama-cpp.nix +++ b/services/llama-cpp.nix @@ -25,7 +25,10 @@ in host = "0.0.0.0"; package = lib.optimizePackage ( inputs.llamacpp.packages.${pkgs.system}.vulkan.overrideAttrs (old: { - patches = (old.patches or [ ]) ++ [ ../patches/0002-llamacpp-vulkan-turbo3.patch ]; + patches = (old.patches or [ ]) ++ [ + ../patches/llamacpp/0002-llamacpp-vulkan-turbo3.patch + ../patches/llamacpp/0003-gemma4-tokenizer-fix.patch + ]; }) ); extraFlags = [