|
32 | 32 |
|
33 | 33 | logger = logging.getLogger("google_adk." + __name__) |
34 | 34 |
|
| 35 | +# Gemini API requires a minimum of 4096 tokens for cached content. |
| 36 | +_GEMINI_MIN_CACHE_TOKENS = 4096 |
| 37 | + |
35 | 38 | if TYPE_CHECKING: |
36 | 39 | from google.genai import Client |
37 | 40 |
|
@@ -119,6 +122,19 @@ async def handle_context_caching( |
119 | 122 | ) |
120 | 123 | return cache_metadata |
121 | 124 |
|
| 125 | + # Cache creation failed (e.g., below Gemini's 4096 token minimum). |
| 126 | + # Preserve the original contents_count so the fingerprint stays |
| 127 | + # stable for subsequent calls instead of resetting to total. |
| 128 | + logger.debug( |
| 129 | + "Cache creation failed, preserving prefix fingerprint " |
| 130 | + "(contents_count=%d)", |
| 131 | + cache_contents_count, |
| 132 | + ) |
| 133 | + return CacheMetadata( |
| 134 | + fingerprint=current_fingerprint, |
| 135 | + contents_count=cache_contents_count, |
| 136 | + ) |
| 137 | + |
122 | 138 | # Fingerprints don't match - recalculate with total contents |
123 | 139 | logger.debug( |
124 | 140 | "Fingerprints don't match, returning fingerprint-only metadata" |
@@ -304,6 +320,15 @@ async def _create_new_cache_with_contents( |
304 | 320 | ) |
305 | 321 | return None |
306 | 322 |
|
| 323 | + # Check client-side to avoid unnecessary API round-trips. |
| 324 | + if llm_request.cacheable_contents_token_count < _GEMINI_MIN_CACHE_TOKENS: |
| 325 | + logger.info( |
| 326 | + "Request below Gemini minimum cache size (%d < %d tokens)", |
| 327 | + llm_request.cacheable_contents_token_count, |
| 328 | + _GEMINI_MIN_CACHE_TOKENS, |
| 329 | + ) |
| 330 | + return None |
| 331 | + |
307 | 332 | try: |
308 | 333 | # Create cache using Gemini API directly |
309 | 334 | return await self._create_gemini_cache(llm_request, cache_contents_count) |
|
0 commit comments