docs: clarify enable_thinking compatibility for Gemma 4 models

JamePeng · JamePeng · commit 7bd717589495 · 2026-04-04T19:10:43.000+08:00
- Update `Gemma4ChatHandler` class docstring and `__init__` args documentation.
- Specify that the `enable_thinking` toggle is exclusively supported by Gemma4 31B and 26BA4B variants.
- Explicitly note that E2B and E4B models do not currently support this feature to prevent configuration errors.

Signed-off-by: JamePeng &lt;jame_peng@sina.com&gt;
diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py
@@ -4338,6 +4338,10 @@ class Gemma3ChatHandler(MTMDChatHandler):
 class Gemma4ChatHandler(MTMDChatHandler):
     """
     Handler for Gemma 4 models.
+
+    Note on `enable_thinking`:
+        The `enable_thinking` toggle is currently ONLY supported by Gemma4 31B and 26BA4B models.
+        It is NOT supported by Gemma4 E2B and E4B models.
     """
 
     # The special token in Gemma 4
@@ -4641,6 +4645,8 @@ def __init__(self, enable_thinking: bool = True, **kwargs):
         Args:
             enable_thinking (bool): Controls whether the <|think|> tag is injected and
                                     manages <|channel>thought behavior.
+                                    Note: ONLY supported on Gemma4 31B and 26BA4B models.
+                                    NOT supported on Gemma4 E2B and E4B models.
         """
         self.enable_thinking = enable_thinking
         super().__init__(**kwargs)