surtweig
diff --git a/‎llama_cpp/_internals.py
Lines changed: 134 additions & 0 deletions b/‎llama_cpp/_internals.py
Lines changed: 134 additions & 0 deletions
@@ -6,6 +6,7 @@
 from typing import (
     Dict,
     List,
+    Tuple,
     Optional,
     Sequence,
 )
@@ -707,3 +708,136 @@ def accept(self, ctx_main: LlamaContext, id: int, apply_grammar: bool):
             ctx_main.grammar_accept_token(self.grammar, id)
         self.prev.append(id)
 
+
+from typing import List, Callable, Optional, Union
+import ctypes
+import llama_cpp
+
+class CustomSampler:
+    def __init__(self, apply_func: typing.Callable[[llama_cpp.llama_token_data_array], None]):
+        self.apply_func = apply_func
+
+        def apply_wrapper(sampler: llama_cpp.llama_sampler_p, cur_p: llama_cpp.llama_token_data_array_p):
+            self.apply_func(cur_p)
+
+        def free_wrapper(sampler: llama_cpp.llama_sampler_p):
+            pass
+
+        sampler_i = llama_cpp.llama_sampler_i()
+        sampler_i.apply = llama_cpp.llama_sampler_i_apply(apply_wrapper)
+        self._apply_wrapper_ref = apply_wrapper
+
+        sampler_i.name = llama_cpp.llama_sampler_i_name(0)
+        sampler_i.accept = llama_cpp.llama_sampler_i_accept(0)
+        sampler_i.reset = llama_cpp.llama_sampler_i_reset(0)
+        sampler_i.clone = llama_cpp.llama_sampler_i_clone(0)
+        sampler_i.free = llama_cpp.llama_sampler_i_free(0)
+
+        self.sampler = llama_cpp.llama_sampler()
+        self.sampler.iface = ctypes.pointer(sampler_i)
+        self.sampler.ctx = None
+
+    def get_sampler(self) -> llama_cpp.llama_sampler_p:
+        return ctypes.pointer(self.sampler)
+
+class LlamaSampler:
+    def __init__(self):
+        params = llama_cpp.llama_sampler_chain_params()
+        self.sampler = llama_cpp.llama_sampler_chain_init(params)
+        self.samplers: List[llama_cpp.llama_sampler_p] = []
+        self.custom_samplers: List[Tuple[int, CustomSampler]] = []
+
+    def add_greedy(self):
+        sampler = llama_cpp.llama_sampler_init_greedy()
+        self._add_sampler(sampler)
+
+    def add_dist(self, seed: int):
+        sampler = llama_cpp.llama_sampler_init_dist(seed)
+        self._add_sampler(sampler)
+
+    def add_softmax(self):
+        sampler = llama_cpp.llama_sampler_init_softmax()
+        self._add_sampler(sampler)
+
+    def add_top_k(self, k: int):
+        sampler = llama_cpp.llama_sampler_init_top_k(k)
+        self._add_sampler(sampler)
+
+    def add_top_p(self, p: float, min_keep: int):
+        sampler = llama_cpp.llama_sampler_init_top_p(p, min_keep)
+        self._add_sampler(sampler)
+
+    def add_min_p(self, p: float, min_keep: int):
+        sampler = llama_cpp.llama_sampler_init_min_p(p, min_keep)
+        self._add_sampler(sampler)
+
+    def add_tail_free(self, z: float, min_keep: int):
+        sampler = llama_cpp.llama_sampler_init_tail_free(z, min_keep)
+        self._add_sampler(sampler)
+
+    def add_typical(self, p: float, min_keep: int):
+        sampler = llama_cpp.llama_sampler_init_typical(p, min_keep)
+        self._add_sampler(sampler)
+
+    def add_temp(self, temp: float):
+        sampler = llama_cpp.llama_sampler_init_temp(temp)
+        self._add_sampler(sampler)
+
+    def add_temp_ext(self, t: float, delta: float, exponent: float):
+        sampler = llama_cpp.llama_sampler_init_temp_ext(t, delta, exponent)
+        self._add_sampler(sampler)
+
+    def add_mirostat(self, n_vocab: int, seed: int, tau: float, eta: float, m: int):
+        sampler = llama_cpp.llama_sampler_init_mirostat(
+            n_vocab, seed, tau, eta, m
+        )
+        self._add_sampler(sampler)
+
+    def add_mirostat_v2(self, seed: int, tau: float, eta: float):
+        sampler = llama_cpp.llama_sampler_init_mirostat_v2(seed, tau, eta)
+        self._add_sampler(sampler)
+
+    def add_grammar(self, model: LlamaModel, grammar: LlamaGrammar):
+        sampler = llama_cpp.llama_sampler_init_grammar(model.model, grammar._grammar.encode("utf-8"), grammar._root.encode("utf-8"))
+        self._add_sampler(sampler)
+
+    def add_penalties(self, n_vocab: int, special_eos_id: int, linefeed_id: int, penalty_last_n: int, penalty_repeat: float, penalty_freq: float, penalty_present: float, penalize_nl: bool, ignore_eos: bool):
+        sampler = llama_cpp.llama_sampler_init_penalties(n_vocab, special_eos_id, linefeed_id, penalty_last_n, penalty_repeat, penalty_freq, penalty_present, penalize_nl, ignore_eos)
+        self._add_sampler(sampler)
+
+    def init_logit_bias(self, n_vocab: int, n_logit_bias, logit_bias: llama_cpp.llama_logit_bias_p):
+        sampler = llama_cpp.llama_sampler_init_logit_bias(n_vocab, n_logit_bias, logit_bias)
+        self._add_sampler(sampler)
+
+    def add_custom(self, apply_func: Callable[[llama_cpp.llama_token_data_array], None]):
+        custom_sampler = CustomSampler(apply_func)
+        sampler = custom_sampler.get_sampler()
+        self._add_sampler(sampler)
+        # NOTE: Must remove custom samplers before free or llama.cpp will try to free them
+        self.custom_samplers.append((llama_cpp.llama_sampler_chain_n(self.sampler) - 1, custom_sampler))
+
+    def _add_sampler(self, sampler: llama_cpp.llama_sampler_p):
+        assert self.sampler is not None
+        llama_cpp.llama_sampler_chain_add(self.sampler, sampler)
+        self.samplers.append(sampler)
+
+    def get_seed(self) -> int:
+        assert self.sampler is not None
+        return llama_cpp.llama_sampler_get_seed(self.sampler)
+
+    def sample(self, ctx: LlamaContext, idx: int) -> int:
+        assert self.sampler is not None
+        return llama_cpp.llama_sampler_sample(self.sampler, ctx.ctx, idx)
+
+    def close(self):
+        if self.sampler:
+            # NOTE: Must remove custom samplers before free or llama.cpp will try to free them
+            for i, _ in reversed(self.custom_samplers):
+                llama_cpp.llama_sampler_chain_remove(self.sampler, i)
+            llama_cpp.llama_sampler_free(self.sampler)
+            self.sampler = None
+        self.samplers.clear()
+        self.custom_samplers.clear()
+
+    def __del__(self):
+        self.close()