Skip to content

Commit 690c563

Browse files
committed
Merge branch 'main' of github.com:abetlen/llama_cpp_python into main
2 parents c0fc0a1 + 8e44a32 commit 690c563

File tree

6 files changed

+54
-17
lines changed

6 files changed

+54
-17
lines changed

README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,8 @@ The gguf-converted files for this model can be found here: [functionary-7b-v1](h
207207
messages = [
208208
{
209209
"role": "system",
210-
"content": "A chat between a curious user and an artificial intelligence assitant. The assistant gives helpful, detailed, and polite answers to the user's questions. The assistant callse functions with appropriate input when necessary"
210+
"content": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. The assistant calls functions with appropriate input when necessary"
211+
211212
},
212213
{
213214
"role": "user",
@@ -265,7 +266,8 @@ Then you'll need to use a custom chat handler to load the clip model and process
265266
>>> llm = Llama(
266267
model_path="./path/to/llava/llama-model.gguf",
267268
chat_handler=chat_handler,
268-
n_ctx=2048 # n_ctx should be increased to accomodate the image embedding
269+
n_ctx=2048, # n_ctx should be increased to accomodate the image embedding
270+
logits_all=True,# needed to make llava work
269271
)
270272
>>> llm.create_chat_completion(
271273
messages = [

llama_cpp/_utils.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,18 @@ def __enter__(self):
1717
if self.disable:
1818
return self
1919

20+
# Check if sys.stdout and sys.stderr have fileno method
21+
if not hasattr(self.sys.stdout, 'fileno') or not hasattr(self.sys.stderr, 'fileno'):
22+
return self # Return the instance without making changes
23+
2024
self.outnull_file = self.open(self.os.devnull, "w")
2125
self.errnull_file = self.open(self.os.devnull, "w")
2226

2327
self.old_stdout_fileno_undup = self.sys.stdout.fileno()
2428
self.old_stderr_fileno_undup = self.sys.stderr.fileno()
2529

26-
self.old_stdout_fileno = self.os.dup(self.sys.stdout.fileno())
27-
self.old_stderr_fileno = self.os.dup(self.sys.stderr.fileno())
30+
self.old_stdout_fileno = self.os.dup(self.old_stdout_fileno_undup)
31+
self.old_stderr_fileno = self.os.dup(self.old_stderr_fileno_undup)
2832

2933
self.old_stdout = self.sys.stdout
3034
self.old_stderr = self.sys.stderr
@@ -40,14 +44,16 @@ def __exit__(self, *_):
4044
if self.disable:
4145
return
4246

43-
self.sys.stdout = self.old_stdout
44-
self.sys.stderr = self.old_stderr
47+
# Check if sys.stdout and sys.stderr have fileno method
48+
if hasattr(self.sys.stdout, 'fileno') and hasattr(self.sys.stderr, 'fileno'):
49+
self.sys.stdout = self.old_stdout
50+
self.sys.stderr = self.old_stderr
4551

46-
self.os.dup2(self.old_stdout_fileno, self.old_stdout_fileno_undup)
47-
self.os.dup2(self.old_stderr_fileno, self.old_stderr_fileno_undup)
52+
self.os.dup2(self.old_stdout_fileno, self.old_stdout_fileno_undup)
53+
self.os.dup2(self.old_stderr_fileno, self.old_stderr_fileno_undup)
4854

49-
self.os.close(self.old_stdout_fileno)
50-
self.os.close(self.old_stderr_fileno)
55+
self.os.close(self.old_stdout_fileno)
56+
self.os.close(self.old_stderr_fileno)
5157

52-
self.outnull_file.close()
53-
self.errnull_file.close()
58+
self.outnull_file.close()
59+
self.errnull_file.close()

llama_cpp/llama.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2280,10 +2280,14 @@ def token_nl(self) -> int:
22802280
return self._model.token_nl()
22812281

22822282
@staticmethod
2283-
def logits_to_logprobs(logits: List[float]) -> List[float]:
2284-
exps = [math.exp(float(x)) for x in logits]
2285-
sum_exps = sum(exps)
2286-
return [math.log(x / sum_exps) for x in exps]
2283+
def logits_to_logprobs(logits: npt.NDArray[np.single]) -> npt.NDArray[np.single]:
2284+
maximum = np.max(logits)
2285+
tmp = np.subtract(logits, maximum, dtype=np.single)
2286+
np.exp(tmp, out=tmp)
2287+
normalizer = 1.0 / np.sum(tmp)
2288+
np.multiply(normalizer, tmp, out=tmp)
2289+
np.log(tmp, out=tmp)
2290+
return tmp
22872291

22882292
@staticmethod
22892293
def longest_token_prefix(a: Sequence[int], b: Sequence[int]):

llama_cpp/llama_chat_format.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,23 @@ def format_zephyr(
637637
_prompt = _format_chatml(system_message, _messages, _sep)
638638
return ChatFormatterResponse(prompt=_prompt, stop=_sep)
639639

640+
641+
@register_chat_format("pygmalion")
642+
def format_pygmalion(
643+
messages: List[llama_types.ChatCompletionRequestMessage],
644+
**kwargs: Any,
645+
) -> ChatFormatterResponse:
646+
system_template = """<|system|>{system_message}"""
647+
system_message = _get_system_message(messages)
648+
system_message = system_template.format(system_message=system_message)
649+
_roles = dict(user="<|user|>", assistant="<|model|>")
650+
_sep = "\n"
651+
_messages = _map_roles(messages, _roles)
652+
_messages.append((_roles["assistant"], None))
653+
_prompt = _format_chatml(system_message, _messages, _sep)
654+
return ChatFormatterResponse(prompt=_prompt, stop=_sep)
655+
656+
640657
@register_chat_format("chatml")
641658
def format_chatml(
642659
messages: List[llama_types.ChatCompletionRequestMessage],

llama_cpp/server/__main__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,5 +96,6 @@ def parse_bool_arg(arg):
9696
app = create_app(settings=settings)
9797

9898
uvicorn.run(
99-
app, host=os.getenv("HOST", settings.host), port=int(os.getenv("PORT", settings.port))
99+
app, host=os.getenv("HOST", settings.host), port=int(os.getenv("PORT", settings.port)),
100+
ssl_keyfile=settings.ssl_keyfile, ssl_certfile=settings.ssl_certfile
100101
)

llama_cpp/server/app.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,13 @@ class Settings(BaseSettings):
150150
# Server Params
151151
host: str = Field(default="localhost", description="Listen address")
152152
port: int = Field(default=8000, description="Listen port")
153+
# SSL Params
154+
ssl_keyfile: Optional[str] = Field(
155+
default=None, description="SSL key file for HTTPS"
156+
)
157+
ssl_certfile: Optional[str] = Field(
158+
default=None, description="SSL certificate file for HTTPS"
159+
)
153160
interrupt_requests: bool = Field(
154161
default=True,
155162
description="Whether to interrupt requests when a new request is received.",

0 commit comments

Comments
 (0)