llama.cpp: things
This commit is contained in:
@@ -1,88 +0,0 @@
|
|||||||
From 320c29c2dbe3c8df56374a9ec19a7fe5c124d4f8 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Piotr Wilkin <piotr.wilkin@syndatis.com>
|
|
||||||
Date: Tue, 7 Apr 2026 00:54:00 +0200
|
|
||||||
Subject: [PATCH 1/2] YATF (Yet Another Tokenizer Fix) for Gemma 4. With tests!
|
|
||||||
|
|
||||||
---
|
|
||||||
convert_hf_to_gguf_update.py | 1 +
|
|
||||||
models/ggml-vocab-gemma-4.gguf | Bin 0 -> 15776467 bytes
|
|
||||||
models/ggml-vocab-gemma-4.gguf.inp | 111 +++++++++++++++++++++++++++++
|
|
||||||
models/ggml-vocab-gemma-4.gguf.out | 46 ++++++++++++
|
|
||||||
src/llama-vocab.cpp | 13 +++-
|
|
||||||
tests/CMakeLists.txt | 1 +
|
|
||||||
6 files changed, 170 insertions(+), 2 deletions(-)
|
|
||||||
create mode 100644 models/ggml-vocab-gemma-4.gguf
|
|
||||||
create mode 100644 models/ggml-vocab-gemma-4.gguf.inp
|
|
||||||
create mode 100644 models/ggml-vocab-gemma-4.gguf.out
|
|
||||||
|
|
||||||
diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py
|
|
||||||
index 086f1c22863..f1d70d62e73 100755
|
|
||||||
--- a/convert_hf_to_gguf_update.py
|
|
||||||
+++ b/convert_hf_to_gguf_update.py
|
|
||||||
@@ -114,6 +114,7 @@ class TOKENIZER_TYPE(IntEnum):
|
|
||||||
{"name": "viking", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Viking-7B", }, # Also used for Viking 13B and 33B
|
|
||||||
{"name": "gemma", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2b", },
|
|
||||||
{"name": "gemma-2", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2-9b", },
|
|
||||||
+ {"name": "gemma-4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/google/gemma-4-E2B-it", },
|
|
||||||
{"name": "jais", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/core42/jais-13b", },
|
|
||||||
{"name": "jais-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/inceptionai/Jais-2-8B-Chat", },
|
|
||||||
{"name": "t5", "tokt": TOKENIZER_TYPE.UGM, "repo": "https://huggingface.co/google-t5/t5-small", },
|
|
||||||
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
|
|
||||||
index de9a9466bc7..e9e276ab999 100644
|
|
||||||
--- a/src/llama-vocab.cpp
|
|
||||||
+++ b/src/llama-vocab.cpp
|
|
||||||
@@ -658,9 +658,18 @@ struct llm_tokenizer_bpe_session {
|
|
||||||
const auto token = vocab.text_to_token(str);
|
|
||||||
|
|
||||||
if (token == LLAMA_TOKEN_NULL) {
|
|
||||||
+ static const char * hex = "0123456789ABCDEF";
|
|
||||||
for (auto j = str.begin(); j != str.end(); ++j) {
|
|
||||||
- std::string byte_str(1, *j);
|
|
||||||
- auto token_multibyte = vocab.text_to_token(byte_str);
|
|
||||||
+ llama_token token_multibyte = LLAMA_TOKEN_NULL;
|
|
||||||
+ if (tokenizer.byte_encode) {
|
|
||||||
+ std::string byte_str(1, *j);
|
|
||||||
+ token_multibyte = vocab.text_to_token(byte_str);
|
|
||||||
+ } else {
|
|
||||||
+ // For non-byte-encoded BPE (e.g. gemma-4), byte tokens use <0xXX> format
|
|
||||||
+ const uint8_t ch = (uint8_t)*j;
|
|
||||||
+ const char buf[7] = { '<', '0', 'x', hex[ch >> 4], hex[ch & 15], '>', 0 };
|
|
||||||
+ token_multibyte = vocab.text_to_token(buf);
|
|
||||||
+ }
|
|
||||||
if (token_multibyte != LLAMA_TOKEN_NULL) {
|
|
||||||
output.push_back(token_multibyte);
|
|
||||||
}
|
|
||||||
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
|
|
||||||
index 5e87c8b34e1..cd4bc5ef1d3 100644
|
|
||||||
--- a/tests/CMakeLists.txt
|
|
||||||
+++ b/tests/CMakeLists.txt
|
|
||||||
@@ -124,6 +124,7 @@ llama_test(test-tokenizer-0 NAME test-tokenizer-0-command-r ARGS ${PROJE
|
|
||||||
llama_test(test-tokenizer-0 NAME test-tokenizer-0-deepseek-coder ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-deepseek-coder.gguf)
|
|
||||||
llama_test(test-tokenizer-0 NAME test-tokenizer-0-deepseek-llm ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-deepseek-llm.gguf)
|
|
||||||
llama_test(test-tokenizer-0 NAME test-tokenizer-0-falcon ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-falcon.gguf)
|
|
||||||
+llama_test(test-tokenizer-0 NAME test-tokenizer-0-gemma-4 ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-gemma-4.gguf)
|
|
||||||
llama_test(test-tokenizer-0 NAME test-tokenizer-0-gpt-2 ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-gpt-2.gguf)
|
|
||||||
llama_test(test-tokenizer-0 NAME test-tokenizer-0-llama-bpe ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-llama-bpe.gguf)
|
|
||||||
llama_test(test-tokenizer-0 NAME test-tokenizer-0-llama-spm ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-llama-spm.gguf)
|
|
||||||
|
|
||||||
From 0e98596dec124c6968132ef042c21ccdb20d1304 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Piotr Wilkin <piotr.wilkin@syndatis.com>
|
|
||||||
Date: Tue, 7 Apr 2026 00:58:08 +0200
|
|
||||||
Subject: [PATCH 2/2] Remove unnecessary hash from update script.
|
|
||||||
|
|
||||||
---
|
|
||||||
convert_hf_to_gguf_update.py | 1 -
|
|
||||||
1 file changed, 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py
|
|
||||||
index f1d70d62e73..086f1c22863 100755
|
|
||||||
--- a/convert_hf_to_gguf_update.py
|
|
||||||
+++ b/convert_hf_to_gguf_update.py
|
|
||||||
@@ -114,7 +114,6 @@ class TOKENIZER_TYPE(IntEnum):
|
|
||||||
{"name": "viking", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Viking-7B", }, # Also used for Viking 13B and 33B
|
|
||||||
{"name": "gemma", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2b", },
|
|
||||||
{"name": "gemma-2", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2-9b", },
|
|
||||||
- {"name": "gemma-4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/google/gemma-4-E2B-it", },
|
|
||||||
{"name": "jais", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/core42/jais-13b", },
|
|
||||||
{"name": "jais-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/inceptionai/Jais-2-8B-Chat", },
|
|
||||||
{"name": "t5", "tokt": TOKENIZER_TYPE.UGM, "repo": "https://huggingface.co/google-t5/t5-small", },
|
|
||||||
@@ -9,7 +9,7 @@
|
|||||||
}:
|
}:
|
||||||
let
|
let
|
||||||
cfg = config.services.llama-cpp;
|
cfg = config.services.llama-cpp;
|
||||||
modelUrl = "https://huggingface.co/bartowski/google_gemma-4-E2B-it-GGUF/resolve/main/google_gemma-4-E2B-it-Q4_K_M.gguf";
|
modelUrl = "https://huggingface.co/bartowski/google_gemma-4-E2B-it-GGUF/resolve/main/google_gemma-4-E2B-it-IQ2_M.gguf";
|
||||||
modelAlias = lib.removeSuffix ".gguf" (baseNameOf modelUrl);
|
modelAlias = lib.removeSuffix ".gguf" (baseNameOf modelUrl);
|
||||||
in
|
in
|
||||||
{
|
{
|
||||||
@@ -25,7 +25,7 @@ in
|
|||||||
model = toString (
|
model = toString (
|
||||||
pkgs.fetchurl {
|
pkgs.fetchurl {
|
||||||
url = modelUrl;
|
url = modelUrl;
|
||||||
sha256 = "5efe645db4e1909c7a1f4a9608df18e6c14383f5e86777fc49f769f9ba7d5fdf";
|
sha256 = "17e869ac54d0e59faa884d5319fc55ad84cd866f50f0b3073fbb25accc875a23";
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
port = service_configs.ports.private.llama_cpp.port;
|
port = service_configs.ports.private.llama_cpp.port;
|
||||||
@@ -33,7 +33,6 @@ in
|
|||||||
package = lib.optimizePackage (
|
package = lib.optimizePackage (
|
||||||
inputs.llamacpp.packages.${pkgs.system}.vulkan.overrideAttrs (old: {
|
inputs.llamacpp.packages.${pkgs.system}.vulkan.overrideAttrs (old: {
|
||||||
patches = (old.patches or [ ]) ++ [
|
patches = (old.patches or [ ]) ++ [
|
||||||
../patches/llamacpp/0003-gemma4-tokenizer-fix.patch
|
|
||||||
];
|
];
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
|
|||||||
Reference in New Issue
Block a user