Compare commits
1 Commits
final-befo
...
0e75c0036f
| Author | SHA1 | Date | |
|---|---|---|---|
|
0e75c0036f
|
24
patches/llamacpp/0004-gemma4-graph-fix.patch
Normal file
24
patches/llamacpp/0004-gemma4-graph-fix.patch
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
From b934a8ca49f9e764fa21d45ff2ce1168a3a7c914 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Georgi Gerganov <ggerganov@gmail.com>
|
||||||
|
Date: Mon, 6 Apr 2026 11:50:22 +0300
|
||||||
|
Subject: [PATCH] models : set gemma 4 FFN MoE prec to F32
|
||||||
|
|
||||||
|
---
|
||||||
|
src/llama-graph.cpp | 4 ++--
|
||||||
|
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp
|
||||||
|
index 0e7d96ca10d..aa8a35721fa 100644
|
||||||
|
--- a/src/llama-graph.cpp
|
||||||
|
+++ b/src/llama-graph.cpp
|
||||||
|
@@ -1185,8 +1185,8 @@ ggml_tensor * llm_graph_context::build_ffn(
|
||||||
|
|
||||||
|
if (down) {
|
||||||
|
cur = build_lora_mm(down, cur);
|
||||||
|
- if (arch == LLM_ARCH_GLM4 || arch == LLM_ARCH_GLM4_MOE || arch == LLM_ARCH_JAIS2) {
|
||||||
|
- // GLM4, GLM4_MOE, and JAIS2 seem to have numerical issues with half-precision accumulators
|
||||||
|
+ if (arch == LLM_ARCH_GLM4 || arch == LLM_ARCH_GLM4_MOE || arch == LLM_ARCH_JAIS2 || arch == LLM_ARCH_GEMMA4) {
|
||||||
|
+ // certain models seem to have numerical issues with half-precision accumulators
|
||||||
|
ggml_mul_mat_set_prec(cur, GGML_PREC_F32);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -27,6 +27,7 @@ in
|
|||||||
inputs.llamacpp.packages.${pkgs.system}.vulkan.overrideAttrs (old: {
|
inputs.llamacpp.packages.${pkgs.system}.vulkan.overrideAttrs (old: {
|
||||||
patches = (old.patches or [ ]) ++ [
|
patches = (old.patches or [ ]) ++ [
|
||||||
../patches/llamacpp/0003-gemma4-tokenizer-fix.patch
|
../patches/llamacpp/0003-gemma4-tokenizer-fix.patch
|
||||||
|
../patches/llamacpp/0004-gemma4-graph-fix.patch
|
||||||
];
|
];
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
|
|||||||
Reference in New Issue
Block a user