abetlen · abetlen · Nov 3, 2023 · Oct 2, 2023 · Oct 2, 2023 · Oct 2, 2023
diff --git a/examples/notebooks/Functions.ipynb b/examples/notebooks/Functions.ipynb
@@ -0,0 +1,225 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\n",
+      "  \"id\": \"chatcmpl-a6db1bbb-a128-4c28-88fe-30717ec806b2\",\n",
+      "  \"object\": \"chat.completion\",\n",
+      "  \"created\": 1698989577,\n",
+      "  \"model\": \"gpt-3.5-turbo-0613\",\n",
+      "  \"choices\": [\n",
+      "    {\n",
+      "      \"index\": 0,\n",
+      "      \"message\": {\n",
+      "        \"role\": \"assistant\",\n",
+      "        \"content\": \"The current weather in Boston is sunny with a temperature of 72 degrees\"\n",
+      "      },\n",
+      "      \"finish_reason\": \"length\"\n",
+      "    }\n",
+      "  ],\n",
+      "  \"usage\": {\n",
+      "    \"prompt_tokens\": 135,\n",
+      "    \"completion_tokens\": 16,\n",
+      "    \"total_tokens\": 151\n",
+      "  }\n",
+      "}\n"
+     ]
+    }
+   ],
+   "source": [
+    "import openai\n",
+    "import json\n",
+    "\n",
+    "openai.api_key = \"sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\" # can be anything\n",
+    "openai.api_base = \"http://100.64.159.73:8000/v1\"\n",
+    "\n",
+    "# Example dummy function hard coded to return the same weather\n",
+    "# In production, this could be your backend API or an external API\n",
+    "def get_current_weather(location, unit=\"fahrenheit\"):\n",
+    "    \"\"\"Get the current weather in a given location\"\"\"\n",
+    "    weather_info = {\n",
+    "        \"location\": location,\n",
+    "        \"temperature\": \"72\",\n",
+    "        \"unit\": unit,\n",
+    "        \"forecast\": [\"sunny\", \"windy\"],\n",
+    "    }\n",
+    "    return json.dumps(weather_info)\n",
+    "\n",
+    "def run_conversation():\n",
+    "    # Step 1: send the conversation and available functions to GPT\n",
+    "    messages = [{\"role\": \"user\", \"content\": \"What's the weather like in Boston?\"}]\n",
+    "    functions = [\n",
+    "        {\n",
+    "            \"name\": \"get_current_weather\",\n",
+    "            \"description\": \"Get the current weather in a given location\",\n",
+    "            \"parameters\": {\n",
+    "                \"type\": \"object\",\n",
+    "                \"properties\": {\n",
+    "                    \"location\": {\n",
+    "                        \"type\": \"string\",\n",
+    "                        \"description\": \"The city and state, e.g. San Francisco, CA\",\n",
+    "                    },\n",
+    "                    \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]},\n",
+    "                },\n",
+    "                \"required\": [\"location\"],\n",
+    "            },\n",
+    "        }\n",
+    "    ]\n",
+    "    response = openai.ChatCompletion.create(\n",
+    "        model=\"gpt-3.5-turbo-0613\",\n",
+    "        messages=messages,\n",
+    "        functions=functions,\n",
+    "        function_call=\"auto\",  # auto is default, but we'll be explicit\n",
+    "    )\n",
+    "    response_message = response[\"choices\"][0][\"message\"]\n",
+    "\n",
+    "    # Step 2: check if GPT wanted to call a function\n",
+    "    if response_message.get(\"function_call\"):\n",
+    "        # Step 3: call the function\n",
+    "        # Note: the JSON response may not always be valid; be sure to handle errors\n",
+    "        available_functions = {\n",
+    "            \"get_current_weather\": get_current_weather,\n",
+    "        }  # only one function in this example, but you can have multiple\n",
+    "        function_name = response_message[\"function_call\"][\"name\"]\n",
+    "        fuction_to_call = available_functions[function_name]\n",
+    "        function_args = json.loads(response_message[\"function_call\"][\"arguments\"])\n",
+    "        function_response = fuction_to_call(\n",
+    "            location=function_args.get(\"location\"),\n",
+    "            unit=function_args.get(\"unit\"),\n",
+    "        )\n",
+    "\n",
+    "        # Step 4: send the info on the function call and function response to GPT\n",
+    "        messages.append(response_message)  # extend conversation with assistant's reply\n",
+    "        messages.append(\n",
+    "            {\n",
+    "                \"role\": \"function\",\n",
+    "                \"name\": function_name,\n",
+    "                \"content\": function_response,\n",
+    "            }\n",
+    "        )  # extend conversation with function response\n",
+    "        second_response = openai.ChatCompletion.create(\n",
+    "            model=\"gpt-3.5-turbo-0613\",\n",
+    "            messages=messages,\n",
+    "        )  # get a new response from GPT where it can see the function response\n",
+    "        return second_response\n",
+    "    else:\n",
+    "        print(response)\n",
+    "        print(\"No function\")\n",
+    "\n",
+    "print(run_conversation())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "name='Jason' age=25\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pydantic import BaseModel\n",
+    "from instructor import patch\n",
+    "\n",
+    "patch()\n",
+    "\n",
+    "class UserDetail(BaseModel):\n",
+    "    name: str\n",
+    "    age: int\n",
+    "\n",
+    "user: UserDetail = openai.ChatCompletion.create(\n",
+    "    model=\"gpt-3.5-turbo\",\n",
+    "    response_model=UserDetail,\n",
+    "    messages=[\n",
+    "        {\"role\": \"user\", \"content\": \"Extract Jason is 25 years old\"},\n",
+    "    ]\n",
+    ")\n",
+    "print(user)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\n",
+      "  \"id\": \"chatcmpl-59bcefad-9df5-4d6b-802c-5537b3e9044e\",\n",
+      "  \"object\": \"chat.completion\",\n",
+      "  \"created\": 1698989585,\n",
+      "  \"model\": \"gpt-3.5-turbo-0613\",\n",
+      "  \"choices\": [\n",
+      "    {\n",
+      "      \"index\": 0,\n",
+      "      \"message\": {\n",
+      "        \"role\": \"assistant\",\n",
+      "        \"content\": \"I don't have up-to-date information on the current weather conditions\"\n",
+      "      },\n",
+      "      \"finish_reason\": \"length\"\n",
+      "    }\n",
+      "  ],\n",
+      "  \"usage\": {\n",
+      "    \"prompt_tokens\": 62,\n",
+      "    \"completion_tokens\": 16,\n",
+      "    \"total_tokens\": 78\n",
+      "  }\n",
+      "}\n"
+     ]
+    }
+   ],
+   "source": [
+    "response = openai.ChatCompletion.create(\n",
+    "    model=\"gpt-3.5-turbo-0613\",\n",
+    "    messages=[\n",
+    "        {\"role\": \"user\", \"content\": \"What's the weather like in Boston?\"}\n",
+    "    ]\n",
+    ")\n",
+    "print(response)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python-3.8.10",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5+"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
@@ -24,7 +24,7 @@
 from . import llama_cpp
 from .llama_types import *
 from .llama_grammar import LlamaGrammar
-from . import llama_chat_format
+import llama_cpp.llama_chat_format as llama_chat_format
 
 import numpy as np
 import numpy.typing as npt
@@ -428,7 +428,7 @@ def __init__(
 
         if self.verbose:
             print(llama_cpp.llama_print_system_info().decode("utf-8"), file=sys.stderr)
-        
+
         self.chat_format = chat_format
 
         self._n_vocab = self.n_vocab()
@@ -1539,78 +1539,6 @@ def __call__(
             grammar=grammar,
         )
 
-    def _convert_text_completion_to_chat(
-        self, completion: Completion
-    ) -> ChatCompletion:
-        return {
-            "id": "chat" + completion["id"],
-            "object": "chat.completion",
-            "created": completion["created"],
-            "model": completion["model"],
-            "choices": [
-                {
-                    "index": 0,
-                    "message": {
-                        "role": "assistant",
-                        "content": completion["choices"][0]["text"],
-                    },
-                    "finish_reason": completion["choices"][0]["finish_reason"],
-                }
-            ],
-            "usage": completion["usage"],
-        }
-
-    def _convert_text_completion_chunks_to_chat(
-        self,
-        chunks: Iterator[CompletionChunk],
-    ) -> Iterator[ChatCompletionChunk]:
-        for i, chunk in enumerate(chunks):
-            if i == 0:
-                yield {
-                    "id": "chat" + chunk["id"],
-                    "model": chunk["model"],
-                    "created": chunk["created"],
-                    "object": "chat.completion.chunk",
-                    "choices": [
-                        {
-                            "index": 0,
-                            "delta": {
-                                "role": "assistant",
-                            },
-                            "finish_reason": None,
-                        }
-                    ],
-                }
-            yield {
-                "id": "chat" + chunk["id"],
-                "model": chunk["model"],
-                "created": chunk["created"],
-                "object": "chat.completion.chunk",
-                "choices": [
-                    {
-                        "index": 0,
-                        "delta": {
-                            "content": chunk["choices"][0]["text"],
-                        }
-                        if chunk["choices"][0]["finish_reason"] is None
-                        else {},
-                        "finish_reason": chunk["choices"][0]["finish_reason"],
-                    }
-                ],
-            }
-
-    def _convert_completion_to_chat(
-        self,
-        completion_or_chunks: Union[Completion, Iterator[CompletionChunk]],
-        stream: bool = False,
-    ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
-        if stream:
-            chunks: Iterator[CompletionChunk] = completion_or_chunks  # type: ignore
-            return self._convert_text_completion_chunks_to_chat(chunks)
-        else:
-            completion: Completion = completion_or_chunks  # type: ignore
-            return self._convert_text_completion_to_chat(completion)
-
     def create_chat_completion(
         self,
         messages: List[ChatCompletionRequestMessage],
@@ -1648,19 +1576,12 @@ def create_chat_completion(
         Returns:
             Generated chat completion or a stream of chat completion chunks.
         """
-
-        format = llama_chat_format.get_chat_format(self.chat_format)
-        result = format(
+        handler = llama_chat_format.get_chat_completion_handler(self.chat_format)
+        return handler(
+            self,
             messages=messages,
-        )
-        prompt = result.prompt
-        if result.stop is not None:
-            stop = [] if stop is None else [stop] if isinstance(stop, str) else stop
-            rstop = result.stop if isinstance(result.stop, list) else [result.stop]
-            stop = stop + rstop
-
-        completion_or_chunks = self.create_completion(
-            prompt=prompt,
+            functions=functions,
+            function_call=function_call,
             temperature=temperature,
             top_p=top_p,
             top_k=top_k,
@@ -1678,7 +1599,6 @@ def create_chat_completion(
             logits_processor=logits_processor,
             grammar=grammar,
         )
-        return self._convert_completion_to_chat(completion_or_chunks, stream=stream)  # type: ignore
 
     def _free_model(self, *, _lbatch_free=llama_cpp._lib.llama_batch_free, _lfree_model=llama_cpp._lib.llama_free_model, _free=llama_cpp._lib.llama_free):
         batch = getattr(self, 'batch', None)