Fix some logic

tc-wolf · tc-wolf · commit c9bf03a9ebc2 · 2025-03-17T15:38:02.000-05:00
- Raise error if save_logits but model will not produce
  - `logits_all` False
- Get rid of extraneous comma that was causing bytes set to fail
  equality check
diff --git a/llama_cpp/llama_cache.py b/llama_cpp/llama_cache.py
@@ -253,6 +253,11 @@ def build_cache(
         """
         cache = LlamaStaticDiskCache(cache_dir, capacity_bytes)
 
+        if save_logits and not model.context_params.logits_all:
+            raise ValueError(
+                "Cannot save logits in cache when model is not configured to return logits."
+            )
+
         for p in prompts:
             if seed:
                 model.set_seed(seed)
@@ -273,6 +278,9 @@ def build_cache(
                 if (
                     model.context_params.logits_all
                     or model.draft_model is not None
+                    # This may be overly cautious, `embed` method does not use
+                    # numpy scores, instead uses logit values on llama.cpp
+                    # context.
                     or model.context_params.embeddings
                 ):
                     # Erroring instead of falling back to just saving with scores
@@ -365,7 +373,7 @@ def reload_from_cache_state(
         if (
             bytes_set := llama_cpp.llama_state_set_data(
                 model._ctx.ctx, llama_state, ctypes.sizeof(llama_state)
-            ),
+            )
         ) != state_size:
             raise RuntimeError(
                 "Failed to set llama state data - mismatch between bytes set "