rohit-ganguly · pamelafox · May 8, 2025 · Mar 3, 2025 · Apr 14, 2025 · Apr 21, 2025
diff --git a/.github/workflows/app-tests.yaml b/.github/workflows/app-tests.yaml
@@ -85,7 +85,7 @@ jobs:
             architecture: x64
 
         - name: Install uv
-          uses: astral-sh/setup-uv@v5
+          uses: astral-sh/setup-uv@v6
           with:
             enable-cache: true
             version: "0.4.20"

diff --git a/.github/workflows/evaluate.yaml b/.github/workflows/evaluate.yaml
@@ -82,7 +82,7 @@ jobs:
           python-version: '3.12'
 
       - name: Install uv
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v6
         with:
           enable-cache: true
           version: "0.4.20"

diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -14,3 +14,4 @@ pytest-snapshot
 locust
 psycopg2
 dotenv-azd
+freezegun
diff --git a/src/backend/fastapi_app/__init__.py b/src/backend/fastapi_app/__init__.py
@@ -34,7 +34,13 @@ class State(TypedDict):
 @asynccontextmanager
 async def lifespan(app: fastapi.FastAPI) -> AsyncIterator[State]:
     context = await common_parameters()
-    azure_credential = await get_azure_credential()
+    azure_credential = None
+    if (
+        os.getenv("OPENAI_CHAT_HOST") == "azure"
+        or os.getenv("OPENAI_EMBED_HOST") == "azure"
+        or os.getenv("POSTGRES_HOST", "").endswith(".database.azure.com")
+    ):
+        azure_credential = await get_azure_credential()
     engine = await create_postgres_engine_from_env(azure_credential)
     sessionmaker = await create_async_sessionmaker(engine)
     chat_client = await create_openai_chat_client(azure_credential)
@@ -53,6 +59,7 @@ def create_app(testing: bool = False):
         if not testing:
             load_dotenv(override=True)
         logging.basicConfig(level=logging.INFO)
+
     # Turn off particularly noisy INFO level logs from Azure Core SDK:
     logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(logging.WARNING)
     logging.getLogger("azure.identity").setLevel(logging.WARNING)

diff --git a/src/backend/fastapi_app/api_models.py b/src/backend/fastapi_app/api_models.py
@@ -1,8 +1,9 @@
 from enum import Enum
-from typing import Any, Optional
+from typing import Any, Optional, Union
 
 from openai.types.chat import ChatCompletionMessageParam
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
+from pydantic_ai.messages import ModelRequest, ModelResponse
 
 
 class AIChatRoles(str, Enum):
@@ -40,6 +41,30 @@ class ChatRequest(BaseModel):
     context: ChatRequestContext
     sessionState: Optional[Any] = None
 
+
+class ItemPublic(BaseModel):
+    id: int
+    name: str
+    location: str
+    cuisine: str
+    rating: int
+    price_level: int
+    review_count: int
+    hours: int
+    tags: str
+    description: str
+    menu_summary: str
+    top_reviews: str
+    vibe: str
+
+
+class ItemWithDistance(ItemPublic):
+    distance: float
+
+    def __init__(self, **data):
+        super().__init__(**data)
+        self.distance = round(self.distance, 2)
+
 
 class ThoughtStep(BaseModel):
     title: str
@@ -48,7 +73,7 @@ class ThoughtStep(BaseModel):
 
 
 class RAGContext(BaseModel):
-    data_points: dict[int, dict[str, Any]]
+    data_points: dict[int, ItemPublic]
     thoughts: list[ThoughtStep]
     followup_questions: Optional[list[str]] = None
 
@@ -69,34 +94,39 @@ class RetrievalResponseDelta(BaseModel):
     sessionState: Optional[Any] = None
 
 
-class ItemPublic(BaseModel):
-    id: int
-    name: str
-    location: str
-    cuisine: str
-    rating: int
-    price_level: int
-    review_count: int
-    hours: int
-    tags: str
-    description: str
-    menu_summary: str
-    top_reviews: str
-    vibe: str
-
-
-class ItemWithDistance(ItemPublic):
-    distance: float
-
-    def __init__(self, **data):
-        super().__init__(**data)
-        self.distance = round(self.distance, 2)
-
-
 class ChatParams(ChatRequestOverrides):
     prompt_template: str
     response_token_limit: int = 1024
     enable_text_search: bool
     enable_vector_search: bool
     original_user_query: str
-    past_messages: list[ChatCompletionMessageParam]
+    past_messages: list[Union[ModelRequest, ModelResponse]]
+
+
+class Filter(BaseModel):
+    column: str
+    comparison_operator: str
+    value: Any
+
+
+class PriceLevelFilter(Filter):
+    column: str = Field(default="price_level", description="The column to filter on (always 'price_level' for this filter)")
+    comparison_operator: str = Field(description="The operator for price level comparison ('>', '<', '>=', '<=', '=')")
+    value: float = Field(description="Value to compare against, either 1, 2, 3, 4")
+
+
+class RatingFilter(Filter):
+    column: str = Field(default="rating", description="The column to filter on (always 'rating' for this filter)")
+    comparison_operator: str = Field(description="The operator for rating comparison ('>', '<', '>=', '<=', '=')")
+    value: str = Field(description="Value to compare against, either 0 1 2 3 4")
+
+
+class SearchResults(BaseModel):
+    query: str
+    """The original search query"""
+
+    items: list[ItemPublic]
+    """List of items that match the search query and filters"""
+
+    filters: list[Filter]
+    """List of filters applied to the search results"""
diff --git a/src/backend/fastapi_app/openai_clients.py b/src/backend/fastapi_app/openai_clients.py
@@ -9,12 +9,12 @@
 
 
 async def create_openai_chat_client(
-    azure_credential: Union[azure.identity.AzureDeveloperCliCredential, azure.identity.ManagedIdentityCredential],
+    azure_credential: Union[azure.identity.AzureDeveloperCliCredential, azure.identity.ManagedIdentityCredential, None],
 ) -> Union[openai.AsyncAzureOpenAI, openai.AsyncOpenAI]:
     openai_chat_client: Union[openai.AsyncAzureOpenAI, openai.AsyncOpenAI]
     OPENAI_CHAT_HOST = os.getenv("OPENAI_CHAT_HOST")
     if OPENAI_CHAT_HOST == "azure":
-        api_version = os.environ["AZURE_OPENAI_VERSION"] or "2024-03-01-preview"
+        api_version = os.environ["AZURE_OPENAI_VERSION"] or "2024-10-21"
         azure_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
         azure_deployment = os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"]
         if api_key := os.getenv("AZURE_OPENAI_KEY"):
@@ -29,7 +29,7 @@ async def create_openai_chat_client(
                 azure_deployment=azure_deployment,
                 api_key=api_key,
             )
-        else:
+        elif azure_credential:
             logger.info(
                 "Setting up Azure OpenAI client for chat completions using Azure Identity, endpoint %s, deployment %s",
                 azure_endpoint,
@@ -44,6 +44,8 @@ async def create_openai_chat_client(
                 azure_deployment=azure_deployment,
                 azure_ad_token_provider=token_provider,
             )
+        else:
+            raise ValueError("Azure OpenAI client requires either an API key or Azure Identity credential.")
     elif OPENAI_CHAT_HOST == "ollama":
         logger.info("Setting up OpenAI client for chat completions using Ollama")
         openai_chat_client = openai.AsyncOpenAI(
@@ -67,7 +69,7 @@ async def create_openai_chat_client(
 
 
 async def create_openai_embed_client(
-    azure_credential: Union[azure.identity.AzureDeveloperCliCredential, azure.identity.ManagedIdentityCredential],
+    azure_credential: Union[azure.identity.AzureDeveloperCliCredential, azure.identity.ManagedIdentityCredential, None],
 ) -> Union[openai.AsyncAzureOpenAI, openai.AsyncOpenAI]:
     openai_embed_client: Union[openai.AsyncAzureOpenAI, openai.AsyncOpenAI]
     OPENAI_EMBED_HOST = os.getenv("OPENAI_EMBED_HOST")
@@ -87,7 +89,7 @@ async def create_openai_embed_client(
                 azure_deployment=azure_deployment,
                 api_key=api_key,
             )
-        else:
+        elif azure_credential:
             logger.info(
                 "Setting up Azure OpenAI client for embeddings using Azure Identity, endpoint %s, deployment %s",
                 azure_endpoint,
@@ -102,6 +104,8 @@ async def create_openai_embed_client(
                 azure_deployment=azure_deployment,
                 azure_ad_token_provider=token_provider,
             )
+        else:
+            raise ValueError("Azure OpenAI client requires either an API key or Azure Identity credential.")
     elif OPENAI_EMBED_HOST == "ollama":
         logger.info("Setting up OpenAI client for embeddings using Ollama")
         openai_embed_client = openai.AsyncOpenAI(

diff --git a/src/backend/fastapi_app/postgres_searcher.py b/src/backend/fastapi_app/postgres_searcher.py
@@ -5,6 +5,7 @@
 from sqlalchemy import Float, Integer, column, select, text
 from sqlalchemy.ext.asyncio import AsyncSession
 
+from fastapi_app.api_models import Filter
 from fastapi_app.embeddings import compute_text_embedding
 from fastapi_app.postgres_models import Item
 
@@ -26,21 +27,24 @@ def __init__(
         self.embed_dimensions = embed_dimensions
         self.embedding_column = embedding_column
 
-    def build_filter_clause(self, filters) -> tuple[str, str]:
+    def build_filter_clause(self, filters: Optional[list[Filter]]) -> tuple[str, str]:
         if filters is None:
             return "", ""
         filter_clauses = []
         for filter in filters:
-            if isinstance(filter["value"], str):
-                filter["value"] = f"'{filter['value']}'"
-            filter_clauses.append(f"{filter['column']} {filter['comparison_operator']} {filter['value']}")
+            filter_value = f"'{filter.value}'" if isinstance(filter.value, str) else filter.value
+            filter_clauses.append(f"{filter.column} {filter.comparison_operator} {filter_value}")
         filter_clause = " AND ".join(filter_clauses)
         if len(filter_clause) > 0:
             return f"WHERE {filter_clause}", f"AND {filter_clause}"
         return "", ""
 
     async def search(
-        self, query_text: Optional[str], query_vector: list[float], top: int = 5, filters: Optional[list[dict]] = None
+        self,
+        query_text: Optional[str],
+        query_vector: list[float],
+        top: int = 5,
+        filters: Optional[list[Filter]] = None,
     ):
         filter_clause_where, filter_clause_and = self.build_filter_clause(filters)
         table_name = Item.__tablename__
@@ -106,7 +110,7 @@ async def search_and_embed(
         top: int = 5,
         enable_vector_search: bool = False,
         enable_text_search: bool = False,
-        filters: Optional[list[dict]] = None,
+        filters: Optional[list[Filter]] = None,
     ) -> list[Item]:
         """
         Search rows by query text. Optionally converts the query text to a vector if enable_vector_search is True.

diff --git a/src/backend/fastapi_app/prompts/query.txt b/src/backend/fastapi_app/prompts/query.txt
@@ -1,6 +1,5 @@
-Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching database rows.
-You have access to an Azure PostgreSQL database with a restaurants table that has name, description, menu summary, vibe, ratings, etc.
-Generate a search query based on the conversation and the new question.
-If the question is not in English, translate the question to English before generating the search query.
-If you cannot generate a search query, return the original user question.
-DO NOT return anything besides the query.
+Your job is to find search results based off the user's question and past messages.
+You have access to only these tools:
+1. **search_database**: This tool allows you to search a table for restaurants based on a query.
+  You can pass in a search query and optional filters.
+Once you get the search results, you're done.
diff --git a/src/backend/fastapi_app/prompts/query_fewshots.json b/src/backend/fastapi_app/prompts/query_fewshots.json
@@ -1,34 +1,76 @@
 [
-    {"role": "user", "content": "good options for ethiopian restaurants?"},
-    {"role": "assistant", "tool_calls": [
-        {
-            "id": "call_abc123",
-            "type": "function",
-            "function": {
-                "arguments": "{\"search_query\":\"ethiopian\"}",
-                "name": "search_database"
-            }
-        }
-    ]},
-    {
-        "role": "tool",
-        "tool_call_id": "call_abc123",
-        "content": "Search results for ethiopian: ..."
-    },
-    {"role": "user", "content": "are there any inexpensive chinese restaurants?"},
-    {"role": "assistant", "tool_calls": [
-        {
-            "id": "call_abc456",
-            "type": "function",
-            "function": {
-                "arguments": "{\"search_query\":\"chinese\",\"price_level_filter\":{\"comparison_operator\":\"<\",\"value\":3}}",
-                "name": "search_database"
-            }
-        }
-    ]},
-    {
-        "role": "tool",
-        "tool_call_id": "call_abc456",
-        "content": "Search results for chinese: ..."
-    }
+  {
+    "parts": [
+      {
+        "content": "good options for ethiopian restaurants?",
+        "timestamp": "2025-05-07T19:02:46.977501Z",
+        "part_kind": "user-prompt"
+      }
+    ],
+    "instructions": null,
+    "kind": "request"
+  },
+  {
+    "parts": [
+      {
+        "tool_name": "search_database",
+        "args": "{\"search_query\":\"ethiopian\"}",
+        "tool_call_id": "call_4HeBCmo2uioV6CyoePEGyZPc",
+        "part_kind": "tool-call"
+      }
+    ],
+    "model_name": "gpt-4o-mini-2024-07-18",
+    "timestamp": "2025-05-07T19:02:47Z",
+    "kind": "response"
+  },
+  {
+    "parts": [
+      {
+        "tool_name": "search_database",
+        "content": "Search results for ethiopian: ...",
+        "tool_call_id": "call_4HeBCmo2uioV6CyoePEGyZPc",
+        "timestamp": "2025-05-07T19:02:48.242408Z",
+        "part_kind": "tool-return"
+      }
+    ],
+    "instructions": null,
+    "kind": "request"
+  },
+  {
+    "parts": [
+      {
+        "content": "are there any inexpensive chinese restaurants?",
+        "timestamp": "2025-05-07T19:02:46.977501Z",
+        "part_kind": "user-prompt"
+      }
+    ],
+    "instructions": null,
+    "kind": "request"
+  },
+  {
+    "parts": [
+      {
+        "tool_name": "search_database",
+        "args": "{\"search_query\":\"chinese\",\"price_level_filter\":{\"comparison_operator\":\"<\",\"value\":3}}",
+        "tool_call_id": "call_4HeBCmo2uioV6CyoePEGyZPc",
+        "part_kind": "tool-call"
+      }
+    ],
+    "model_name": "gpt-4o-mini-2024-07-18",
+    "timestamp": "2025-05-07T19:02:47Z",
+    "kind": "response"
+  },
+  {
+    "parts": [
+      {
+        "tool_name": "search_database",
+        "content": "Search results for chinese: ...",
+        "tool_call_id": "call_4HeBCmo2uioV6CyoePEGyZPc",
+        "timestamp": "2025-05-07T19:02:48.242408Z",
+        "part_kind": "tool-return"
+      }
+    ],
+    "instructions": null,
+    "kind": "request"
+  }
 ]