Skip to content

Commit 14da46f

Browse files
committed
Added cache size to settins object.
1 parent 0e94a70 commit 14da46f

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

llama_cpp/server/app.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ class Settings(BaseSettings):
4545
default=False,
4646
description="Use a cache to reduce processing times for evaluated prompts.",
4747
)
48+
cache_size: int = Field(
49+
default=2 << 30,
50+
description="The size of the cache in bytes. Only used if cache is True.",
51+
)
4852
vocab_only: bool = Field(
4953
default=False, description="Whether to only return the vocabulary."
5054
)
@@ -89,7 +93,7 @@ def create_app(settings: Optional[Settings] = None):
8993
verbose=settings.verbose,
9094
)
9195
if settings.cache:
92-
cache = llama_cpp.LlamaCache()
96+
cache = llama_cpp.LlamaCache(capacity_bytes=settings.cache_size)
9397
llama.set_cache(cache)
9498
return app
9599

0 commit comments

Comments
 (0)