You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: llama_cpp/server/types.py
+17-2Lines changed: 17 additions & 2 deletions
Original file line number
Diff line number
Diff line change
@@ -1,6 +1,6 @@
1
1
from __future__ importannotations
2
2
3
-
fromtypingimportList, Optional, Union, Dict
3
+
fromtypingimportList, Optional, Union, Dict, Any
4
4
fromtyping_extensionsimportTypedDict, Literal
5
5
6
6
frompydanticimportBaseModel, Field
@@ -54,6 +54,11 @@
54
54
description="Whether to stream the results as they are generated. Useful for chatbots.",
55
55
)
56
56
57
+
include_usage=Field(
58
+
default=False,
59
+
description="If set, an additional chunk will be streamed before the data: [DONE] message. The usage field on this chunk shows the token usage statistics for the entire request, and the choices field will always be an empty array. All other chunks will also include a usage field, but with a null value.",
60
+
)
61
+
57
62
top_k_field=Field(
58
63
default=40,
59
64
ge=0,
@@ -127,6 +132,12 @@ class CreateCompletionRequest(BaseModel):
127
132
)
128
133
stop: Optional[Union[str, List[str]]] =stop_field
129
134
stream: bool=stream_field
135
+
136
+
stream_options: Optional[Dict[str, Any]] =Field(
137
+
default=None,
138
+
description="Options for streaming response. Only set this when you set stream: true.",
139
+
)
140
+
130
141
logprobs: Optional[int] =Field(
131
142
default=None,
132
143
ge=0,
@@ -216,7 +227,7 @@ class CreateChatCompletionRequest(BaseModel):
216
227
min_tokens: int=min_tokens_field
217
228
logprobs: Optional[bool] =Field(
218
229
default=False,
219
-
description="Whether to output the logprobs or not. Default is True"
230
+
description="Whether to output the logprobs or not. Default is True",
220
231
)
221
232
top_logprobs: Optional[int] =Field(
222
233
default=None,
@@ -228,6 +239,10 @@ class CreateChatCompletionRequest(BaseModel):
228
239
min_p: float=min_p_field
229
240
stop: Optional[Union[str, List[str]]] =stop_field
230
241
stream: bool=stream_field
242
+
stream_options: Optional[Dict[str, Any]] =Field(
243
+
default=None,
244
+
description="Options for streaming response. Only set this when you set stream: true.",
0 commit comments