File tree Expand file tree Collapse file tree 1 file changed +6
-9
lines changed Expand file tree Collapse file tree 1 file changed +6
-9
lines changed Original file line number Diff line number Diff line change @@ -127,9 +127,7 @@ def __init__(
127
127
self .last_n_tokens_size = last_n_tokens_size
128
128
self .n_batch = min (n_ctx , n_batch )
129
129
self .eval_tokens : Deque [llama_cpp .llama_token ] = deque (maxlen = n_ctx )
130
- self .eval_logits : Deque [List [float ]] = deque (
131
- maxlen = n_ctx if logits_all else 1
132
- )
130
+ self .eval_logits : Deque [List [float ]] = deque (maxlen = n_ctx if logits_all else 1 )
133
131
134
132
self .cache : Optional [LlamaCache ] = None
135
133
@@ -547,12 +545,6 @@ def _create_completion(
547
545
finish_reason = "stop"
548
546
break
549
547
550
- if self .cache and len (completion_tokens ) == 0 :
551
- if prompt_tokens not in self .cache :
552
- if self .verbose :
553
- print ("Llama._create_completion: cache miss" , file = sys .stderr )
554
- self .cache [prompt_tokens ] = self .save_state ()
555
-
556
548
completion_tokens .append (token )
557
549
558
550
all_text = self .detokenize (completion_tokens )
@@ -611,6 +603,11 @@ def _create_completion(
611
603
finish_reason = "length"
612
604
break
613
605
606
+ if self .cache :
607
+ if self .verbose :
608
+ print ("Llama._create_completion: cache save" , file = sys .stderr )
609
+ self .cache [prompt_tokens + completion_tokens ] = self .save_state ()
610
+
614
611
if stream :
615
612
yield {
616
613
"id" : completion_id ,
You can’t perform that action at this time.
0 commit comments