Skip to content

Commit 2753b85

Browse files
committed
Format
1 parent 4f8cf52 commit 2753b85

File tree

1 file changed

+6
-9
lines changed

1 file changed

+6
-9
lines changed

llama_cpp/llama.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,7 @@ def __init__(
127127
self.last_n_tokens_size = last_n_tokens_size
128128
self.n_batch = min(n_ctx, n_batch)
129129
self.eval_tokens: Deque[llama_cpp.llama_token] = deque(maxlen=n_ctx)
130-
self.eval_logits: Deque[List[float]] = deque(
131-
maxlen=n_ctx if logits_all else 1
132-
)
130+
self.eval_logits: Deque[List[float]] = deque(maxlen=n_ctx if logits_all else 1)
133131

134132
self.cache: Optional[LlamaCache] = None
135133

@@ -547,12 +545,6 @@ def _create_completion(
547545
finish_reason = "stop"
548546
break
549547

550-
if self.cache and len(completion_tokens) == 0:
551-
if prompt_tokens not in self.cache:
552-
if self.verbose:
553-
print("Llama._create_completion: cache miss", file=sys.stderr)
554-
self.cache[prompt_tokens] = self.save_state()
555-
556548
completion_tokens.append(token)
557549

558550
all_text = self.detokenize(completion_tokens)
@@ -611,6 +603,11 @@ def _create_completion(
611603
finish_reason = "length"
612604
break
613605

606+
if self.cache:
607+
if self.verbose:
608+
print("Llama._create_completion: cache save", file=sys.stderr)
609+
self.cache[prompt_tokens + completion_tokens] = self.save_state()
610+
614611
if stream:
615612
yield {
616613
"id": completion_id,

0 commit comments

Comments
 (0)