Skip to content

Commit 80e2073

Browse files
committed
Update to llama.cpp b419
1 parent acf50f1 commit 80e2073

File tree

7 files changed

+205
-242
lines changed

7 files changed

+205
-242
lines changed

examples/low_level_api/low_level_api_llama_cpp.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,9 @@
6666
llama_cpp.llama_token_data_array(_arr, len(_arr), False))
6767

6868
_arr = (llama_cpp.c_int * len(last_n_tokens_data))(*last_n_tokens_data)
69-
llama_cpp.llama_sample_repetition_penalty(ctx, candidates_p,
69+
llama_cpp.llama_sample_repetition_penalties(ctx, candidates_p,
7070
_arr,
71-
last_n_repeat, repeat_penalty)
72-
llama_cpp.llama_sample_frequency_and_presence_penalties(ctx, candidates_p,
73-
_arr,
74-
last_n_repeat, frequency_penalty, presence_penalty)
71+
last_n_repeat, repeat_penalty, frequency_penalty, presence_penalty)
7572

7673
llama_cpp.llama_sample_top_k(ctx, candidates_p, k=40, min_keep=1)
7774
llama_cpp.llama_sample_top_p(ctx, candidates_p, p=0.8, min_keep=1)

llama_cpp/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
from .llama_cpp import *
22
from .llama import *
33

4-
__version__ = "0.2.11"
4+
__version__ = "0.2.11"
5+
6+
__all__ = [
7+
"llama_cpp",
8+
"llama"
9+
]

llama_cpp/llama.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -595,20 +595,14 @@ def _sample(
595595
candidates.data = candidates_data.ctypes.data_as(llama_cpp.llama_token_data_p)
596596
candidates.sorted = llama_cpp.c_bool(False)
597597
candidates.size = llama_cpp.c_size_t(n_vocab)
598-
llama_cpp.llama_sample_repetition_penalty(
598+
llama_cpp.llama_sample_repetition_penalties(
599599
ctx=self.ctx,
600600
last_tokens_data=last_n_tokens_data,
601601
last_tokens_size=last_n_tokens_size,
602602
candidates=llama_cpp.ctypes.byref(candidates), # type: ignore
603-
penalty=repeat_penalty,
604-
)
605-
llama_cpp.llama_sample_frequency_and_presence_penalties(
606-
ctx=self.ctx,
607-
candidates=llama_cpp.ctypes.byref(candidates), # type: ignore
608-
last_tokens_data=last_n_tokens_data,
609-
last_tokens_size=last_n_tokens_size,
610-
alpha_frequency=frequency_penalty,
611-
alpha_presence=presence_penalty,
603+
penalty_repeat=repeat_penalty,
604+
penalty_freq=frequency_penalty,
605+
penalty_present=presence_penalty
612606
)
613607
if not penalize_nl:
614608
candidates.data[self._token_nl].logit = llama_cpp.c_float(nl_logit)

llama_cpp/llama_chat_format.py

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55

66
def _get_system_message(
7-
messages: List[llama_types.ChatCompletionRequestMessage],
7+
messages: List[llama_types.ChatCompletionRequestMessage],
88
) -> str:
99
"""Get the first system message."""
1010
for message in messages:
@@ -14,7 +14,7 @@ def _get_system_message(
1414

1515

1616
def _map_roles(
17-
messages: List[llama_types.ChatCompletionRequestMessage], role_map: Dict[str, str]
17+
messages: List[llama_types.ChatCompletionRequestMessage], role_map: Dict[str, str]
1818
) -> List[Tuple[str, Optional[str]]]:
1919
"""Map the message roles."""
2020
output: List[Tuple[str, Optional[str]]] = []
@@ -26,7 +26,7 @@ def _map_roles(
2626

2727

2828
def _format_llama2(
29-
system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str
29+
system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str
3030
) -> str:
3131
"""Format the prompt with the llama2 style."""
3232
ret = system_message + sep
@@ -39,7 +39,7 @@ def _format_llama2(
3939

4040

4141
def _format_add_colon_single(
42-
system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str
42+
system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str
4343
) -> str:
4444
"""Format the prompt with the add-colon-single style."""
4545
ret = system_message + sep
@@ -52,7 +52,7 @@ def _format_add_colon_single(
5252

5353

5454
def _format_add_colon_two(
55-
system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str, sep2: str
55+
system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str, sep2: str
5656
) -> str:
5757
"""Format the prompt with the add-colon-two style."""
5858
seps = [sep, sep2]
@@ -66,7 +66,7 @@ def _format_add_colon_two(
6666

6767

6868
def _format_no_colon_single(
69-
system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str
69+
system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str
7070
) -> str:
7171
"""Format the prompt with the no-colon-single style."""
7272
ret = system_message
@@ -79,7 +79,7 @@ def _format_no_colon_single(
7979

8080

8181
def _format_add_colon_space_single(
82-
system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str
82+
system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str
8383
) -> str:
8484
"""Format the prompt with the add-colon-space-single style."""
8585
ret = system_message + sep
@@ -92,7 +92,7 @@ def _format_add_colon_space_single(
9292

9393

9494
def _format_chatml(
95-
system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str
95+
system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str
9696
) -> str:
9797
"""Format the prompt with the chatml style."""
9898
ret = "" if system_message == "" else system_message + sep + "\n"
@@ -112,9 +112,9 @@ class ChatFormatterResponse:
112112

113113
class ChatFormatter(Protocol):
114114
def __call__(
115-
self,
116-
messages: List[llama_types.ChatCompletionRequestMessage],
117-
**kwargs: Any,
115+
self,
116+
messages: List[llama_types.ChatCompletionRequestMessage],
117+
**kwargs: Any,
118118
) -> ChatFormatterResponse:
119119
...
120120

@@ -141,8 +141,8 @@ def get_chat_format(name: str):
141141

142142
@register_chat_format("llama-2")
143143
def format_llama2(
144-
messages: List[llama_types.ChatCompletionRequestMessage],
145-
**kwargs: Any,
144+
messages: List[llama_types.ChatCompletionRequestMessage],
145+
**kwargs: Any,
146146
) -> ChatFormatterResponse:
147147
_system_template = "[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n"
148148
_roles = dict(user="[INST]", assistant="[/INST]")
@@ -157,8 +157,8 @@ def format_llama2(
157157

158158
@register_chat_format("alpaca")
159159
def format_alpaca(
160-
messages: List[llama_types.ChatCompletionRequestMessage],
161-
**kwargs: Any,
160+
messages: List[llama_types.ChatCompletionRequestMessage],
161+
**kwargs: Any,
162162
) -> ChatFormatterResponse:
163163
_roles = dict(user="### Instruction", assistant="### Response")
164164
_sep = "\n\n"
@@ -171,8 +171,8 @@ def format_alpaca(
171171

172172
@register_chat_format("vicuna")
173173
def format(
174-
messages: List[llama_types.ChatCompletionRequestMessage],
175-
**kwargs: Any,
174+
messages: List[llama_types.ChatCompletionRequestMessage],
175+
**kwargs: Any,
176176
) -> ChatFormatterResponse:
177177
_system_message = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions."
178178
_roles = dict(user="USER", assistant="ASSISTANT")
@@ -187,8 +187,8 @@ def format(
187187

188188
@register_chat_format("oasst_llama")
189189
def format_oasst_llama(
190-
messages: List[llama_types.ChatCompletionRequestMessage],
191-
**kwargs: Any,
190+
messages: List[llama_types.ChatCompletionRequestMessage],
191+
**kwargs: Any,
192192
) -> ChatFormatterResponse:
193193
_system_template = "[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n"
194194
_roles = dict(user="<|prompter|>", assistant="<|assistant|>")
@@ -203,8 +203,8 @@ def format_oasst_llama(
203203

204204
@register_chat_format("openbuddy")
205205
def format_openbuddy(
206-
messages: List[llama_types.ChatCompletionRequestMessage],
207-
**kwargs: Any,
206+
messages: List[llama_types.ChatCompletionRequestMessage],
207+
**kwargs: Any,
208208
) -> ChatFormatterResponse:
209209
_system_message = """Consider a conversation between User (a human) and Assistant (named Buddy).
210210
Buddy is an INTP-T, a friendly, intelligent and multilingual AI assistant, by OpenBuddy team. GitHub: https://github.com/OpenBuddy/OpenBuddy
@@ -228,8 +228,8 @@ def format_openbuddy(
228228

229229
@register_chat_format("redpajama-incite")
230230
def format_redpajama_incite(
231-
messages: List[llama_types.ChatCompletionRequestMessage],
232-
**kwargs: Any,
231+
messages: List[llama_types.ChatCompletionRequestMessage],
232+
**kwargs: Any,
233233
) -> ChatFormatterResponse:
234234
_system_message = _get_system_message(messages)
235235
_roles = dict(user="<human>", assistant="<bot>")
@@ -244,8 +244,8 @@ def format_redpajama_incite(
244244

245245
@register_chat_format("snoozy")
246246
def format_snoozy(
247-
messages: List[llama_types.ChatCompletionRequestMessage],
248-
**kwargs: Any,
247+
messages: List[llama_types.ChatCompletionRequestMessage],
248+
**kwargs: Any,
249249
) -> ChatFormatterResponse:
250250
system_template = "### Instruction:\n{system_message}"
251251
default_system_message = "The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response."
@@ -266,8 +266,8 @@ def format_snoozy(
266266

267267
@register_chat_format("phind")
268268
def format_phind(
269-
messages: List[llama_types.ChatCompletionRequestMessage],
270-
**kwargs: Any,
269+
messages: List[llama_types.ChatCompletionRequestMessage],
270+
**kwargs: Any,
271271
) -> ChatFormatterResponse:
272272
_roles = dict(user="### User Message", assistant="### Assistant")
273273
_sep = "\n\n"
@@ -280,8 +280,8 @@ def format_phind(
280280

281281
@register_chat_format("open-orca")
282282
def format_open_orca(
283-
messages: List[llama_types.ChatCompletionRequestMessage],
284-
**kwargs: Any,
283+
messages: List[llama_types.ChatCompletionRequestMessage],
284+
**kwargs: Any,
285285
) -> ChatFormatterResponse:
286286
system_template = "{system_message}"
287287
system_message = (
@@ -307,8 +307,8 @@ def format_open_orca(
307307

308308
@register_chat_format("chatml")
309309
def format_chatml(
310-
messages: List[llama_types.ChatCompletionRequestMessage],
311-
**kwargs: Any,
310+
messages: List[llama_types.ChatCompletionRequestMessage],
311+
**kwargs: Any,
312312
) -> ChatFormatterResponse:
313313
system_template = """<|im_start|>system
314314
{system_message}"""
@@ -319,4 +319,4 @@ def format_chatml(
319319
_messages = _map_roles(messages, _roles)
320320
_messages.append((_roles["assistant"], None))
321321
_prompt = _format_chatml(system_message, _messages, _sep)
322-
return ChatFormatterResponse(prompt=_prompt)
322+
return ChatFormatterResponse(prompt=_prompt, stop=_sep)

0 commit comments

Comments
 (0)