Skip to content

feat(client): add optional override default cache ttl capability for prompt caching #1249

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions langfuse/_client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ class Langfuse:
blocked_instrumentation_scopes (Optional[List[str]]): List of instrumentation scope names to block from being exported to Langfuse. Spans from these scopes will be filtered out before being sent to the API. Useful for filtering out spans from specific libraries or frameworks. For exported spans, you can see the instrumentation scope name in the span metadata in Langfuse (`metadata.scope.name`)
additional_headers (Optional[Dict[str, str]]): Additional headers to include in all API requests and OTLPSpanExporter requests. These headers will be merged with default headers. Note: If httpx_client is provided, additional_headers must be set directly on your custom httpx_client as well.
tracer_provider(Optional[TracerProvider]): OpenTelemetry TracerProvider to use for Langfuse. This can be useful to set to have disconnected tracing between Langfuse and other OpenTelemetry-span emitting libraries. Note: To track active spans, the context is still shared between TracerProviders. This may lead to broken trace trees.
default_cache_ttl_seconds (Optional[int]): Override the default cache TTL (time-to-live) of 60 seconds with a custom value (in seconds) for prompts.

Example:
```python
Expand Down Expand Up @@ -169,6 +170,7 @@ def __init__(
blocked_instrumentation_scopes: Optional[List[str]] = None,
additional_headers: Optional[Dict[str, str]] = None,
tracer_provider: Optional[TracerProvider] = None,
default_cache_ttl_seconds: Optional[int] = None,
):
self._host = host or cast(
str, os.environ.get(LANGFUSE_HOST, "https://cloud.langfuse.com")
Expand Down Expand Up @@ -237,6 +239,7 @@ def __init__(
blocked_instrumentation_scopes=blocked_instrumentation_scopes,
additional_headers=additional_headers,
tracer_provider=tracer_provider,
default_cache_ttl_seconds=default_cache_ttl_seconds
)
self._mask = self._resources.mask

Expand Down
5 changes: 4 additions & 1 deletion langfuse/_client/resource_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def __new__(
blocked_instrumentation_scopes: Optional[List[str]] = None,
additional_headers: Optional[Dict[str, str]] = None,
tracer_provider: Optional[TracerProvider] = None,
default_cache_ttl_seconds: Optional[int] = None,
) -> "LangfuseResourceManager":
if public_key in cls._instances:
return cls._instances[public_key]
Expand Down Expand Up @@ -131,6 +132,7 @@ def __new__(
blocked_instrumentation_scopes=blocked_instrumentation_scopes,
additional_headers=additional_headers,
tracer_provider=tracer_provider,
default_cache_ttl_seconds=default_cache_ttl_seconds,
)

cls._instances[public_key] = instance
Expand All @@ -156,6 +158,7 @@ def _initialize_instance(
blocked_instrumentation_scopes: Optional[List[str]] = None,
additional_headers: Optional[Dict[str, str]] = None,
tracer_provider: Optional[TracerProvider] = None,
default_cache_ttl_seconds: Optional[int] = None,
) -> None:
self.public_key = public_key
self.secret_key = secret_key
Expand Down Expand Up @@ -255,7 +258,7 @@ def _initialize_instance(
self._media_upload_consumers.append(media_upload_consumer)

# Prompt cache
self.prompt_cache = PromptCache()
self.prompt_cache = PromptCache(default_cache_ttl_seconds=default_cache_ttl_seconds)

# Score ingestion
self._score_ingestion_queue: Queue[Any] = Queue(100_000)
Expand Down
9 changes: 6 additions & 3 deletions langfuse/_utils/prompt_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,12 @@ class PromptCache:
_log = logging.getLogger("langfuse")

def __init__(
self, max_prompt_refresh_workers: int = DEFAULT_PROMPT_CACHE_REFRESH_WORKERS
self,
default_cache_ttl_seconds: Optional[int],
max_prompt_refresh_workers: int = DEFAULT_PROMPT_CACHE_REFRESH_WORKERS,
):
self._cache = {}
self._cache = {}
self._default_cache_ttl_seconds = default_cache_ttl_seconds if default_cache_ttl_seconds is not None else DEFAULT_PROMPT_CACHE_TTL_SECONDS
self._task_manager = PromptCacheTaskManager(threads=max_prompt_refresh_workers)
self._log.debug("Prompt cache initialized.")

Expand All @@ -148,7 +151,7 @@ def get(self, key: str) -> Optional[PromptCacheItem]:

def set(self, key: str, value: PromptClient, ttl_seconds: Optional[int]) -> None:
if ttl_seconds is None:
ttl_seconds = DEFAULT_PROMPT_CACHE_TTL_SECONDS
ttl_seconds = self._default_cache_ttl_seconds

self._cache[key] = PromptCacheItem(value, ttl_seconds)

Expand Down
80 changes: 79 additions & 1 deletion tests/test_prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from langfuse.model import ChatPromptClient, TextPromptClient
from tests.utils import create_uuid, get_api

OVERRIDE_DEFAULT_PROMPT_CACHE_TTL_SECONDS = 120

def test_create_prompt():
langfuse = Langfuse()
Expand Down Expand Up @@ -679,11 +680,25 @@ def test_prompt_end_to_end():

@pytest.fixture
def langfuse():
langfuse_instance = Langfuse()
langfuse_instance = Langfuse(
public_key="test-public-key",
secret_key="test-secret-key",
host="https://mock-host.com",
)
langfuse_instance.api = Mock()

return langfuse_instance

@pytest.fixture
def langfuse_with_override_default_cache():
langfuse_instance = Langfuse(
public_key="test-public-key",
secret_key="test-secret-key",
host="https://mock-host.com",
default_cache_ttl_seconds=OVERRIDE_DEFAULT_PROMPT_CACHE_TTL_SECONDS,
)
langfuse_instance.api = Mock()
return langfuse_instance

# Fetching a new prompt when nothing in cache
def test_get_fresh_prompt(langfuse):
Expand Down Expand Up @@ -1117,6 +1132,69 @@ def test_get_fresh_prompt_when_expired_cache_default_ttl(mock_time, langfuse: La
assert mock_server_call.call_count == 2 # New call
assert result_call_3 == prompt_client

# Should refetch and return new prompt if cached one is expired according overridden to default TTL
@patch.object(PromptCacheItem, "get_epoch_seconds")
def test_get_fresh_prompt_when_expired_cache_overridden_default_ttl(mock_time, langfuse_with_override_default_cache: Langfuse):
langfuse = langfuse_with_override_default_cache

mock_time.return_value = 0

prompt_name = "test_get_fresh_prompt_when_expired_cache_overridden_default_ttl"
prompt = Prompt_Text(
name=prompt_name,
version=1,
prompt="Make me laugh",
labels=[],
type="text",
config={},
tags=[],
)
prompt_client = TextPromptClient(prompt)

mock_server_call = langfuse.api.prompts.get
mock_server_call.return_value = prompt

result_call_1 = langfuse.get_prompt(prompt_name)
assert mock_server_call.call_count == 1
assert result_call_1 == prompt_client

# Set time to just BEFORE cache expiry using DEFAULT TTL (should NOT expire)
mock_time.return_value = DEFAULT_PROMPT_CACHE_TTL_SECONDS - 1

result_call_2 = langfuse.get_prompt(prompt_name)
assert mock_server_call.call_count == 1 # No new call - cache still valid
assert result_call_2 == prompt_client

# Set time to just AFTER cache expiry using DEFAULT TTL (should NOT expire with overridden TTL)
mock_time.return_value = DEFAULT_PROMPT_CACHE_TTL_SECONDS + 1

result_call_3 = langfuse.get_prompt(prompt_name)
assert mock_server_call.call_count == 1 # Still no new call - overridden TTL is longer
assert result_call_3 == prompt_client

# Set time to just BEFORE cache expiry using OVERRIDDEN TTL (should NOT expire)
mock_time.return_value = OVERRIDE_DEFAULT_PROMPT_CACHE_TTL_SECONDS - 1

result_call_4 = langfuse.get_prompt(prompt_name)
assert mock_server_call.call_count == 1 # No new call
assert result_call_4 == prompt_client

# Set time to just AFTER cache expiry using OVERRIDDEN TTL (should expire)
mock_time.return_value = OVERRIDE_DEFAULT_PROMPT_CACHE_TTL_SECONDS + 1

result_call_5 = langfuse.get_prompt(prompt_name)
while True:
if langfuse._resources.prompt_cache._task_manager.active_tasks() == 0:
break
sleep(0.1)

assert mock_server_call.call_count == 2 # New call - cache expired at overridden TTL
assert result_call_5 == prompt_client

# Verify that the overridden TTL is actually being used by checking the cache configuration
assert langfuse._resources.prompt_cache._default_cache_ttl_seconds == OVERRIDE_DEFAULT_PROMPT_CACHE_TTL_SECONDS
assert langfuse._resources.prompt_cache._default_cache_ttl_seconds != DEFAULT_PROMPT_CACHE_TTL_SECONDS


# Should return expired prompt if refetch fails
@patch.object(PromptCacheItem, "get_epoch_seconds")
Expand Down