Skip to content

Commit b47b954

Browse files
committed
llama_cpp server: delete some ignored / unused parameters
`n`, `presence_penalty`, `frequency_penalty`, `best_of`, `logit_bias`, `user`: not supported, excluded from the calls into llama. decision: delete it
1 parent e40fcb0 commit b47b954

File tree

2 files changed

+3
-29
lines changed

2 files changed

+3
-29
lines changed

llama_cpp/llama_types.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,6 @@ class Completion(TypedDict):
6060
class ChatCompletionMessage(TypedDict):
6161
role: Union[Literal["assistant"], Literal["user"], Literal["system"]]
6262
content: str
63-
user: NotRequired[str]
64-
6563

6664
class ChatCompletionChoice(TypedDict):
6765
index: int

llama_cpp/server/app.py

Lines changed: 3 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -83,13 +83,7 @@ class CreateCompletionRequest(BaseModel):
8383
# ignored, but marked as required for the sake of compatibility with openai's api
8484
model: str = model_field
8585

86-
n: Optional[int] = 1
8786
logprobs: Optional[int] = Field(None)
88-
presence_penalty: Optional[float] = 0
89-
frequency_penalty: Optional[float] = 0
90-
best_of: Optional[int] = 1
91-
logit_bias: Optional[Dict[str, float]] = Field(None)
92-
user: Optional[str] = Field(None)
9387

9488
# llama.cpp specific parameters
9589
top_k: int = 40
@@ -120,13 +114,7 @@ def create_completion(
120114
completion_or_chunks = llama(
121115
**request.dict(
122116
exclude={
123-
"model",
124-
"n",
125-
"frequency_penalty",
126-
"presence_penalty",
127-
"best_of",
128-
"logit_bias",
129-
"user",
117+
"model"
130118
}
131119
)
132120
)
@@ -141,7 +129,6 @@ class CreateEmbeddingRequest(BaseModel):
141129
# ignored, but marked as required for the sake of compatibility with openai's api
142130
model: str = model_field
143131
input: str
144-
user: Optional[str]
145132

146133
class Config:
147134
schema_extra = {
@@ -161,7 +148,7 @@ class Config:
161148
def create_embedding(
162149
request: CreateEmbeddingRequest, llama: llama_cpp.Llama = Depends(get_llama)
163150
):
164-
return llama.create_embedding(**request.dict(exclude={"model", "user"}))
151+
return llama.create_embedding(**request.dict(exclude={"model"}))
165152

166153

167154
class ChatCompletionRequestMessage(BaseModel):
@@ -181,12 +168,6 @@ class CreateChatCompletionRequest(BaseModel):
181168

182169
# ignored, but marked as required for the sake of compatibility with openai's api
183170
model: str = model_field
184-
185-
n: Optional[int] = 1
186-
presence_penalty: Optional[float] = 0
187-
frequency_penalty: Optional[float] = 0
188-
logit_bias: Optional[Dict[str, float]] = Field(None)
189-
user: Optional[str] = Field(None)
190171

191172
# llama.cpp specific parameters
192173
repeat_penalty: float = 1.1
@@ -220,12 +201,7 @@ def create_chat_completion(
220201
completion_or_chunks = llama.create_chat_completion(
221202
**request.dict(
222203
exclude={
223-
"model",
224-
"n",
225-
"presence_penalty",
226-
"frequency_penalty",
227-
"logit_bias",
228-
"user",
204+
"model"
229205
}
230206
),
231207
)

0 commit comments

Comments
 (0)