diff --git a/flake.nix b/flake.nix
index 56319ee..38833e4 100644
--- a/flake.nix
+++ b/flake.nix
@@ -123,7 +123,7 @@
         name = "nixpkgs-patched";
         src = nixpkgs;
         patches = [
-          ./patches/0001-firefox-syncserver-add-postgresql-backend-support.patch
+          ./patches/nixpkgs/0001-firefox-syncserver-add-postgresql-backend-support.patch
         ];
       };
 
diff --git a/patches/0002-llamacpp-vulkan-turbo3.patch b/patches/llamacpp/0002-llamacpp-vulkan-turbo3.patch
similarity index 100%
rename from patches/0002-llamacpp-vulkan-turbo3.patch
rename to patches/llamacpp/0002-llamacpp-vulkan-turbo3.patch
diff --git a/patches/llamacpp/0003-gemma4-tokenizer-fix.patch b/patches/llamacpp/0003-gemma4-tokenizer-fix.patch
new file mode 100644
index 0000000..e01692a
--- /dev/null
+++ b/patches/llamacpp/0003-gemma4-tokenizer-fix.patch
@@ -0,0 +1,88 @@
+From 320c29c2dbe3c8df56374a9ec19a7fe5c124d4f8 Mon Sep 17 00:00:00 2001
+From: Piotr Wilkin <piotr.wilkin@syndatis.com>
+Date: Tue, 7 Apr 2026 00:54:00 +0200
+Subject: [PATCH 1/2] YATF (Yet Another Tokenizer Fix) for Gemma 4. With tests!
+
+---
+ convert_hf_to_gguf_update.py       |   1 +
+ models/ggml-vocab-gemma-4.gguf     | Bin 0 -> 15776467 bytes
+ models/ggml-vocab-gemma-4.gguf.inp | 111 +++++++++++++++++++++++++++++
+ models/ggml-vocab-gemma-4.gguf.out |  46 ++++++++++++
+ src/llama-vocab.cpp                |  13 +++-
+ tests/CMakeLists.txt               |   1 +
+ 6 files changed, 170 insertions(+), 2 deletions(-)
+ create mode 100644 models/ggml-vocab-gemma-4.gguf
+ create mode 100644 models/ggml-vocab-gemma-4.gguf.inp
+ create mode 100644 models/ggml-vocab-gemma-4.gguf.out
+
+diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py
+index 086f1c22863..f1d70d62e73 100755
+--- a/convert_hf_to_gguf_update.py
++++ b/convert_hf_to_gguf_update.py
+@@ -114,6 +114,7 @@ class TOKENIZER_TYPE(IntEnum):
+     {"name": "viking",           "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Viking-7B", }, # Also used for Viking 13B and 33B
+     {"name": "gemma",            "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2b", },
+     {"name": "gemma-2",          "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2-9b", },
++    {"name": "gemma-4",          "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/google/gemma-4-E2B-it", },
+     {"name": "jais",             "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/core42/jais-13b", },
+     {"name": "jais-2",           "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/inceptionai/Jais-2-8B-Chat", },
+     {"name": "t5",               "tokt": TOKENIZER_TYPE.UGM, "repo": "https://huggingface.co/google-t5/t5-small", },
+diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
+index de9a9466bc7..e9e276ab999 100644
+--- a/src/llama-vocab.cpp
++++ b/src/llama-vocab.cpp
+@@ -658,9 +658,18 @@ struct llm_tokenizer_bpe_session {
+                 const auto token = vocab.text_to_token(str);
+ 
+                 if (token == LLAMA_TOKEN_NULL) {
++                    static const char * hex = "0123456789ABCDEF";
+                     for (auto j = str.begin(); j != str.end(); ++j) {
+-                        std::string byte_str(1, *j);
+-                        auto token_multibyte = vocab.text_to_token(byte_str);
++                        llama_token token_multibyte = LLAMA_TOKEN_NULL;
++                        if (tokenizer.byte_encode) {
++                            std::string byte_str(1, *j);
++                            token_multibyte = vocab.text_to_token(byte_str);
++                        } else {
++                            // For non-byte-encoded BPE (e.g. gemma-4), byte tokens use <0xXX> format
++                            const uint8_t ch = (uint8_t)*j;
++                            const char buf[7] = { '<', '0', 'x', hex[ch >> 4], hex[ch & 15], '>', 0 };
++                            token_multibyte = vocab.text_to_token(buf);
++                        }
+                         if (token_multibyte != LLAMA_TOKEN_NULL) {
+                             output.push_back(token_multibyte);
+                         }
+diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
+index 5e87c8b34e1..cd4bc5ef1d3 100644
+--- a/tests/CMakeLists.txt
++++ b/tests/CMakeLists.txt
+@@ -124,6 +124,7 @@ llama_test(test-tokenizer-0 NAME test-tokenizer-0-command-r         ARGS ${PROJE
+ llama_test(test-tokenizer-0 NAME test-tokenizer-0-deepseek-coder    ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-deepseek-coder.gguf)
+ llama_test(test-tokenizer-0 NAME test-tokenizer-0-deepseek-llm      ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-deepseek-llm.gguf)
+ llama_test(test-tokenizer-0 NAME test-tokenizer-0-falcon            ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-falcon.gguf)
++llama_test(test-tokenizer-0 NAME test-tokenizer-0-gemma-4           ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-gemma-4.gguf)
+ llama_test(test-tokenizer-0 NAME test-tokenizer-0-gpt-2             ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-gpt-2.gguf)
+ llama_test(test-tokenizer-0 NAME test-tokenizer-0-llama-bpe         ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-llama-bpe.gguf)
+ llama_test(test-tokenizer-0 NAME test-tokenizer-0-llama-spm         ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-llama-spm.gguf)
+
+From 0e98596dec124c6968132ef042c21ccdb20d1304 Mon Sep 17 00:00:00 2001
+From: Piotr Wilkin <piotr.wilkin@syndatis.com>
+Date: Tue, 7 Apr 2026 00:58:08 +0200
+Subject: [PATCH 2/2] Remove unnecessary hash  from update script.
+
+---
+ convert_hf_to_gguf_update.py | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py
+index f1d70d62e73..086f1c22863 100755
+--- a/convert_hf_to_gguf_update.py
++++ b/convert_hf_to_gguf_update.py
+@@ -114,7 +114,6 @@ class TOKENIZER_TYPE(IntEnum):
+     {"name": "viking",           "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Viking-7B", }, # Also used for Viking 13B and 33B
+     {"name": "gemma",            "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2b", },
+     {"name": "gemma-2",          "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2-9b", },
+-    {"name": "gemma-4",          "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/google/gemma-4-E2B-it", },
+     {"name": "jais",             "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/core42/jais-13b", },
+     {"name": "jais-2",           "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/inceptionai/Jais-2-8B-Chat", },
+     {"name": "t5",               "tokt": TOKENIZER_TYPE.UGM, "repo": "https://huggingface.co/google-t5/t5-small", },
diff --git a/patches/0001-firefox-syncserver-add-postgresql-backend-support.patch b/patches/nixpkgs/0001-firefox-syncserver-add-postgresql-backend-support.patch
similarity index 100%
rename from patches/0001-firefox-syncserver-add-postgresql-backend-support.patch
rename to patches/nixpkgs/0001-firefox-syncserver-add-postgresql-backend-support.patch
diff --git a/services/llama-cpp.nix b/services/llama-cpp.nix
index cdf9955..c6056d2 100644
--- a/services/llama-cpp.nix
+++ b/services/llama-cpp.nix
@@ -25,7 +25,10 @@ in
     host = "0.0.0.0";
     package = lib.optimizePackage (
       inputs.llamacpp.packages.${pkgs.system}.vulkan.overrideAttrs (old: {
-        patches = (old.patches or [ ]) ++ [ ../patches/0002-llamacpp-vulkan-turbo3.patch ];
+        patches = (old.patches or [ ]) ++ [
+          ../patches/llamacpp/0002-llamacpp-vulkan-turbo3.patch
+          ../patches/llamacpp/0003-gemma4-tokenizer-fix.patch
+        ];
       })
     );
     extraFlags = [