Skip to content

Commit 8ae3bed

Browse files
committed
Update Llama to add params
1 parent 4525236 commit 8ae3bed

File tree

1 file changed

+7
-0
lines changed

1 file changed

+7
-0
lines changed

llama_cpp/llama.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,15 @@ class Llama:
1313
def __init__(
1414
self,
1515
model_path: str,
16+
# NOTE: The following parameters are likely to change in the future.
1617
n_ctx: int = 512,
1718
n_parts: int = -1,
1819
seed: int = 1337,
1920
f16_kv: bool = False,
2021
logits_all: bool = False,
2122
vocab_only: bool = False,
23+
use_mlock: bool = False,
24+
embedding: bool = False,
2225
n_threads: Optional[int] = None,
2326
) -> "Llama":
2427
"""Load a llama.cpp model from `model_path`.
@@ -31,6 +34,8 @@ def __init__(
3134
f16_kv: Use half-precision for key/value cache.
3235
logits_all: Return logits for all tokens, not just the last token.
3336
vocab_only: Only load the vocabulary no weights.
37+
use_mlock: Force the system to keep the model in RAM.
38+
embedding: Embedding mode only.
3439
n_threads: Number of threads to use. If None, the number of threads is automatically determined.
3540
3641
Raises:
@@ -51,6 +56,8 @@ def __init__(
5156
self.params.f16_kv = f16_kv
5257
self.params.logits_all = logits_all
5358
self.params.vocab_only = vocab_only
59+
self.params.use_mlock = use_mlock
60+
self.params.embedding = embedding
5461

5562
self.n_threads = n_threads or multiprocessing.cpu_count()
5663

0 commit comments

Comments
 (0)