Merge branch 'main' of github.com:abetlen/llama_cpp_python into main

abetlen · abetlen · commit 690c563b60b2 · 2023-12-13T21:43:19.000-05:00
diff --git a/README.md b/README.md
@@ -207,7 +207,8 @@ The gguf-converted files for this model can be found here: [functionary-7b-v1](h
       messages = [
         {
           "role": "system",
-          "content": "A chat between a curious user and an artificial intelligence assitant. The assistant gives helpful, detailed, and polite answers to the user's questions. The assistant callse functions with appropriate input when necessary"
+          "content": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. The assistant calls functions with appropriate input when necessary"
+          
         },
         {
           "role": "user",
@@ -265,7 +266,8 @@ Then you'll need to use a custom chat handler to load the clip model and process
 >>> llm = Llama(
   model_path="./path/to/llava/llama-model.gguf",
   chat_handler=chat_handler,
-  n_ctx=2048 # n_ctx should be increased to accomodate the image embedding
+  n_ctx=2048, # n_ctx should be increased to accomodate the image embedding
+  logits_all=True,# needed to make llava work
 )
 >>> llm.create_chat_completion(
     messages = [
diff --git a/llama_cpp/_utils.py b/llama_cpp/_utils.py
@@ -17,14 +17,18 @@ def __enter__(self):
         if self.disable:
             return self
 
+        # Check if sys.stdout and sys.stderr have fileno method
+        if not hasattr(self.sys.stdout, 'fileno') or not hasattr(self.sys.stderr, 'fileno'):
+            return self  # Return the instance without making changes
+
         self.outnull_file = self.open(self.os.devnull, "w")
         self.errnull_file = self.open(self.os.devnull, "w")
 
         self.old_stdout_fileno_undup = self.sys.stdout.fileno()
         self.old_stderr_fileno_undup = self.sys.stderr.fileno()
 
-        self.old_stdout_fileno = self.os.dup(self.sys.stdout.fileno())
-        self.old_stderr_fileno = self.os.dup(self.sys.stderr.fileno())
+        self.old_stdout_fileno = self.os.dup(self.old_stdout_fileno_undup)
+        self.old_stderr_fileno = self.os.dup(self.old_stderr_fileno_undup)
 
         self.old_stdout = self.sys.stdout
         self.old_stderr = self.sys.stderr
@@ -40,14 +44,16 @@ def __exit__(self, *_):
         if self.disable:
             return
 
-        self.sys.stdout = self.old_stdout
-        self.sys.stderr = self.old_stderr
+        # Check if sys.stdout and sys.stderr have fileno method
+        if hasattr(self.sys.stdout, 'fileno') and hasattr(self.sys.stderr, 'fileno'):
+            self.sys.stdout = self.old_stdout
+            self.sys.stderr = self.old_stderr
 
-        self.os.dup2(self.old_stdout_fileno, self.old_stdout_fileno_undup)
-        self.os.dup2(self.old_stderr_fileno, self.old_stderr_fileno_undup)
+            self.os.dup2(self.old_stdout_fileno, self.old_stdout_fileno_undup)
+            self.os.dup2(self.old_stderr_fileno, self.old_stderr_fileno_undup)
 
-        self.os.close(self.old_stdout_fileno)
-        self.os.close(self.old_stderr_fileno)
+            self.os.close(self.old_stdout_fileno)
+            self.os.close(self.old_stderr_fileno)
 
-        self.outnull_file.close()
-        self.errnull_file.close()
+            self.outnull_file.close()
+            self.errnull_file.close()
diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
@@ -2280,10 +2280,14 @@ def token_nl(self) -> int:
         return self._model.token_nl()
 
     @staticmethod
-    def logits_to_logprobs(logits: List[float]) -> List[float]:
-        exps = [math.exp(float(x)) for x in logits]
-        sum_exps = sum(exps)
-        return [math.log(x / sum_exps) for x in exps]
+    def logits_to_logprobs(logits: npt.NDArray[np.single]) -> npt.NDArray[np.single]:
+        maximum = np.max(logits)
+        tmp = np.subtract(logits, maximum, dtype=np.single)
+        np.exp(tmp, out=tmp)
+        normalizer = 1.0 / np.sum(tmp)
+        np.multiply(normalizer, tmp, out=tmp)
+        np.log(tmp, out=tmp)
+        return tmp
 
     @staticmethod
     def longest_token_prefix(a: Sequence[int], b: Sequence[int]):
diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py
@@ -637,6 +637,23 @@ def format_zephyr(
     _prompt = _format_chatml(system_message, _messages, _sep)
     return ChatFormatterResponse(prompt=_prompt, stop=_sep)
 
+
+@register_chat_format("pygmalion")
+def format_pygmalion(
+    messages: List[llama_types.ChatCompletionRequestMessage],
+    **kwargs: Any,
+) -> ChatFormatterResponse:
+    system_template = """<|system|>{system_message}"""
+    system_message = _get_system_message(messages)
+    system_message = system_template.format(system_message=system_message)
+    _roles = dict(user="<|user|>", assistant="<|model|>")
+    _sep = "\n"
+    _messages = _map_roles(messages, _roles)
+    _messages.append((_roles["assistant"], None))
+    _prompt = _format_chatml(system_message, _messages, _sep)
+    return ChatFormatterResponse(prompt=_prompt, stop=_sep)
+
+
 @register_chat_format("chatml")
 def format_chatml(
     messages: List[llama_types.ChatCompletionRequestMessage],
diff --git a/llama_cpp/server/__main__.py b/llama_cpp/server/__main__.py
@@ -96,5 +96,6 @@ def parse_bool_arg(arg):
     app = create_app(settings=settings)
 
     uvicorn.run(
-        app, host=os.getenv("HOST", settings.host), port=int(os.getenv("PORT", settings.port))
+        app, host=os.getenv("HOST", settings.host), port=int(os.getenv("PORT", settings.port)),
+        ssl_keyfile=settings.ssl_keyfile, ssl_certfile=settings.ssl_certfile
     )
diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py
@@ -150,6 +150,13 @@ class Settings(BaseSettings):
     # Server Params
     host: str = Field(default="localhost", description="Listen address")
     port: int = Field(default=8000, description="Listen port")
+    # SSL Params
+    ssl_keyfile: Optional[str] = Field(
+        default=None, description="SSL key file for HTTPS"
+    )
+    ssl_certfile: Optional[str] = Field(
+        default=None, description="SSL certificate file for HTTPS"
+    )
     interrupt_requests: bool = Field(
         default=True,
         description="Whether to interrupt requests when a new request is received.",

Original file line number	Diff line number	Diff line change
`@@ -207,7 +207,8 @@ The gguf-converted files for this model can be found here: [functionary-7b-v1](h`
`207`	`207`	`messages = [`
`208`	`208`	`{`
`209`	`209`	`"role": "system",`
`210`		`- "content": "A chat between a curious user and an artificial intelligence assitant. The assistant gives helpful, detailed, and polite answers to the user's questions. The assistant callse functions with appropriate input when necessary"`
	`210`	`+ "content": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. The assistant calls functions with appropriate input when necessary"`
	`211`	`+`
`211`	`212`	`},`
`212`	`213`	`{`
`213`	`214`	`"role": "user",`
`@@ -265,7 +266,8 @@ Then you'll need to use a custom chat handler to load the clip model and process`
`265`	`266`	`>>> llm = Llama(`
`266`	`267`	`model_path="./path/to/llava/llama-model.gguf",`
`267`	`268`	`chat_handler=chat_handler,`
`268`		`- n_ctx=2048 # n_ctx should be increased to accomodate the image embedding`
	`269`	`+ n_ctx=2048, # n_ctx should be increased to accomodate the image embedding`
	`270`	`+ logits_all=True,# needed to make llava work`
`269`	`271`	`)`
`270`	`272`	`>>> llm.create_chat_completion(`
`271`	`273`	`messages = [`
Original file line number	Diff line number	Diff line change
`@@ -96,5 +96,6 @@ def parse_bool_arg(arg):`
`96`	`96`	`app = create_app(settings=settings)`
`97`	`97`
`98`	`98`	`uvicorn.run(`
`99`		`- app, host=os.getenv("HOST", settings.host), port=int(os.getenv("PORT", settings.port))`
	`99`	`+ app, host=os.getenv("HOST", settings.host), port=int(os.getenv("PORT", settings.port)),`
	`100`	`+ ssl_keyfile=settings.ssl_keyfile, ssl_certfile=settings.ssl_certfile`
`100`	`101`	`)`