Skip to content

Commit 328cd0d

Browse files
authored
Merge branch 'abetlen:main' into main
2 parents 4e704d9 + 7c4aead commit 328cd0d

File tree

11 files changed

+232
-245
lines changed

11 files changed

+232
-245
lines changed

.github/workflows/build-and-release.yaml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ jobs:
9292
- uses: actions/checkout@v4
9393
with:
9494
submodules: "recursive"
95-
95+
9696
- uses: actions/setup-python@v5
9797
with:
9898
python-version: "3.9"
@@ -103,6 +103,7 @@ jobs:
103103
python -m pip install --upgrade pip
104104
python -m pip install uv
105105
RUST_LOG=trace python -m uv pip install -e .[all] --verbose
106+
python -m uv pip install build
106107
shell: bash
107108

108109
- name: Install dependencies (Windows)
@@ -113,12 +114,13 @@ jobs:
113114
python -m pip install --upgrade pip
114115
python -m pip install uv
115116
python -m uv pip install -e .[all] --verbose
117+
python -m uv pip install build
116118
shell: cmd
117-
119+
118120
- name: Build source distribution
119121
run: |
120122
python -m build --sdist
121-
123+
122124
- uses: actions/upload-artifact@v4
123125
with:
124126
name: sdist

.github/workflows/publish.yaml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,34 +13,36 @@ jobs:
1313
- uses: actions/checkout@v4
1414
with:
1515
submodules: "recursive"
16-
16+
1717
- name: Set up Python
1818
uses: actions/setup-python@v5
1919
with:
2020
python-version: "3.9"
21-
21+
2222
- name: Install dependencies (Linux/MacOS)
2323
if: runner.os != 'Windows'
2424
run: |
2525
python -m pip install --upgrade pip
2626
python -m pip install uv
2727
RUST_LOG=trace python -m uv pip install -e .[all] --verbose
28+
python -m uv pip install build
2829
shell: bash
2930

3031
- name: Install dependencies (Windows)
3132
if: runner.os == 'Windows'
3233
env:
33-
RUST_LOG: trace
34+
RUST_LOG: trace
3435
run: |
3536
python -m pip install --upgrade pip
3637
python -m pip install uv
3738
python -m uv pip install -e .[all] --verbose
39+
python -m uv pip install build
3840
shell: cmd
39-
41+
4042
- name: Build source distribution
4143
run: |
4244
python -m build --sdist
43-
45+
4446
- name: Publish distribution to PyPI
4547
# TODO: move to tag based releases
4648
# if: startsWith(github.ref, 'refs/tags')

CHANGELOG.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,26 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [0.3.1]
11+
12+
- feat: Update llama.cpp to ggerganov/llama.cpp@c919d5db39c8a7fcb64737f008e4b105ee0acd20
13+
- feat: Expose libggml in internal APIs by @abetlen in #1761
14+
- fix: Fix speculative decoding by @abetlen in 9992c5084a3df2f533e265d10f81d4269b97a1e6 and e975dabf74b3ad85689c9a07719cbb181313139b
15+
- misc: Rename all_text to remaining_text by @xu-song in #1658
16+
17+
## [0.3.0]
18+
19+
- feat: Update llama.cpp to ggerganov/llama.cpp@ea9c32be71b91b42ecc538bd902e93cbb5fb36cb
20+
- feat: Enable detokenizing special tokens with special=True by @benniekiss in #1596
21+
- feat(ci): Speed up CI workflows using uv, add support for CUDA 12.5 wheels by @Smartappli in e529940f45d42ed8aa31334123b8d66bc67b0e78
22+
- feat: Add loading sharded GGUF files from HuggingFace with Llama.from_pretrained(additional_files=[...]) by @Gnurro in 84c092063e8f222758dd3d60bdb2d1d342ac292e
23+
- feat: Add option to configure n_ubatch by @abetlen in 6c44a3f36b089239cb6396bb408116aad262c702
24+
- feat: Update sampling API for llama.cpp. Sampling now uses sampler chain by @abetlen in f8fcb3ea3424bcfba3a5437626a994771a02324b
25+
- fix: Don't store scores internally unless logits_all=True. Reduces memory requirements for large context by @abetlen in 29afcfdff5e75d7df4c13bad0122c98661d251ab
26+
- fix: Fix memory allocation of ndarray in by @xu-song in #1704
27+
- fix: Use system message in og qwen format by @abetlen in 98eb092d3c6e7c142c4ba2faaca6c091718abbb3
28+
29+
1030
## [0.2.90]
1131

1232
- feat: Update llama.cpp to ggerganov/llama.cpp@1d1ccce67613674c75c9c7e3fa4c1e24e428ba48

llama_cpp/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
from .llama_cpp import *
22
from .llama import *
33

4-
__version__ = "0.2.90"
4+
__version__ = "0.3.1"

llama_cpp/_ctypes_extensions.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
from __future__ import annotations
2+
3+
import sys
4+
import os
5+
import ctypes
6+
import functools
7+
import pathlib
8+
9+
from typing import (
10+
Any,
11+
Callable,
12+
List,
13+
Union,
14+
Optional,
15+
TYPE_CHECKING,
16+
TypeVar,
17+
Generic,
18+
)
19+
from typing_extensions import TypeAlias
20+
21+
22+
# Load the library
23+
def load_shared_library(lib_base_name: str, base_path: pathlib.Path):
24+
"""Platform independent shared library loader"""
25+
# Searching for the library in the current directory under the name "libllama" (default name
26+
# for llamacpp) and "llama" (default name for this repo)
27+
lib_paths: List[pathlib.Path] = []
28+
# Determine the file extension based on the platform
29+
if sys.platform.startswith("linux") or sys.platform.startswith("freebsd"):
30+
lib_paths += [
31+
base_path / f"lib{lib_base_name}.so",
32+
]
33+
elif sys.platform == "darwin":
34+
lib_paths += [
35+
base_path / f"lib{lib_base_name}.so",
36+
base_path / f"lib{lib_base_name}.dylib",
37+
]
38+
elif sys.platform == "win32":
39+
lib_paths += [
40+
base_path / f"{lib_base_name}.dll",
41+
base_path / f"lib{lib_base_name}.dll",
42+
]
43+
else:
44+
raise RuntimeError("Unsupported platform")
45+
46+
cdll_args = dict() # type: ignore
47+
48+
# Add the library directory to the DLL search path on Windows (if needed)
49+
if sys.platform == "win32":
50+
os.add_dll_directory(str(base_path))
51+
os.environ["PATH"] = str(base_path) + os.pathsep + os.environ["PATH"]
52+
53+
if sys.platform == "win32" and sys.version_info >= (3, 8):
54+
os.add_dll_directory(str(base_path))
55+
if "CUDA_PATH" in os.environ:
56+
os.add_dll_directory(os.path.join(os.environ["CUDA_PATH"], "bin"))
57+
os.add_dll_directory(os.path.join(os.environ["CUDA_PATH"], "lib"))
58+
if "HIP_PATH" in os.environ:
59+
os.add_dll_directory(os.path.join(os.environ["HIP_PATH"], "bin"))
60+
os.add_dll_directory(os.path.join(os.environ["HIP_PATH"], "lib"))
61+
cdll_args["winmode"] = ctypes.RTLD_GLOBAL
62+
63+
# Try to load the shared library, handling potential errors
64+
for lib_path in lib_paths:
65+
if lib_path.exists():
66+
try:
67+
return ctypes.CDLL(str(lib_path), **cdll_args) # type: ignore
68+
except Exception as e:
69+
raise RuntimeError(f"Failed to load shared library '{lib_path}': {e}")
70+
71+
raise FileNotFoundError(
72+
f"Shared library with base name '{lib_base_name}' not found"
73+
)
74+
75+
76+
# ctypes sane type hint helpers
77+
#
78+
# - Generic Pointer and Array types
79+
# - PointerOrRef type with a type hinted byref function
80+
#
81+
# NOTE: Only use these for static type checking not for runtime checks
82+
# no good will come of that
83+
84+
if TYPE_CHECKING:
85+
CtypesCData = TypeVar("CtypesCData", bound=ctypes._CData) # type: ignore
86+
87+
CtypesArray: TypeAlias = ctypes.Array[CtypesCData] # type: ignore
88+
89+
CtypesPointer: TypeAlias = ctypes._Pointer[CtypesCData] # type: ignore
90+
91+
CtypesVoidPointer: TypeAlias = ctypes.c_void_p
92+
93+
class CtypesRef(Generic[CtypesCData]):
94+
pass
95+
96+
CtypesPointerOrRef: TypeAlias = Union[
97+
CtypesPointer[CtypesCData], CtypesRef[CtypesCData]
98+
]
99+
100+
CtypesFuncPointer: TypeAlias = ctypes._FuncPointer # type: ignore
101+
102+
F = TypeVar("F", bound=Callable[..., Any])
103+
104+
105+
def ctypes_function_for_shared_library(lib: ctypes.CDLL):
106+
"""Decorator for defining ctypes functions with type hints"""
107+
108+
def ctypes_function(
109+
name: str, argtypes: List[Any], restype: Any, enabled: bool = True
110+
):
111+
def decorator(f: F) -> F:
112+
if enabled:
113+
func = getattr(lib, name)
114+
func.argtypes = argtypes
115+
func.restype = restype
116+
functools.wraps(f)(func)
117+
return func
118+
else:
119+
return f
120+
121+
return decorator
122+
123+
return ctypes_function
124+
125+
126+
def _byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCData]:
127+
"""Type-annotated version of ctypes.byref"""
128+
...
129+
130+
131+
byref = _byref if TYPE_CHECKING else ctypes.byref

llama_cpp/_ggml.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
"""Internal module use at your own risk
2+
3+
This module provides a minimal interface for working with ggml tensors from llama-cpp-python
4+
"""
5+
import os
6+
import pathlib
7+
8+
import llama_cpp._ctypes_extensions as ctypes_ext
9+
10+
libggml_base_path = pathlib.Path(os.path.abspath(os.path.dirname(__file__))) / "lib"
11+
libggml = ctypes_ext.load_shared_library("ggml", libggml_base_path)
12+

llama_cpp/_logger.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,20 @@
1010
# GGML_LOG_LEVEL_WARN = 2,
1111
# GGML_LOG_LEVEL_ERROR = 3,
1212
# GGML_LOG_LEVEL_DEBUG = 4,
13+
# GGML_LOG_LEVEL_CONT = 5, // continue previous log
1314
# };
1415
GGML_LOG_LEVEL_TO_LOGGING_LEVEL = {
1516
0: logging.CRITICAL,
1617
1: logging.INFO,
1718
2: logging.WARNING,
1819
3: logging.ERROR,
1920
4: logging.DEBUG,
21+
5: logging.DEBUG,
2022
}
2123

2224
logger = logging.getLogger("llama-cpp-python")
2325

26+
_last_log_level = GGML_LOG_LEVEL_TO_LOGGING_LEVEL[0]
2427

2528
# typedef void (*ggml_log_callback)(enum ggml_log_level level, const char * text, void * user_data);
2629
@llama_cpp.llama_log_callback
@@ -29,8 +32,12 @@ def llama_log_callback(
2932
text: bytes,
3033
user_data: ctypes.c_void_p,
3134
):
35+
# TODO: Correctly implement continue previous log
36+
global _last_log_level
37+
log_level = GGML_LOG_LEVEL_TO_LOGGING_LEVEL[level] if level != 5 else _last_log_level
3238
if logger.level <= GGML_LOG_LEVEL_TO_LOGGING_LEVEL[level]:
3339
print(text.decode("utf-8"), end="", flush=True, file=sys.stderr)
40+
_last_log_level = log_level
3441

3542

3643
llama_cpp.llama_log_set(llama_log_callback, ctypes.c_void_p(0))

llama_cpp/llama.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -807,8 +807,10 @@ def sample(
807807
grammar=grammar,
808808
)
809809

810+
ridx = idx - self.n_tokens if idx is not None else -1
811+
810812
assert self.ctx is not None
811-
token = self._sampler.sample(self._ctx, -1)
813+
token = self._sampler.sample(self._ctx, ridx)
812814
if tmp_sampler:
813815
self._sampler = None
814816
return token
@@ -928,7 +930,7 @@ def generate(
928930

929931
sample_idx += 1
930932
if stopping_criteria is not None and stopping_criteria(
931-
self._input_ids, self._scores[-1, :]
933+
self._input_ids[: sample_idx], self._scores[sample_idx - self.n_tokens, :]
932934
):
933935
return
934936
tokens_or_none = yield token
@@ -1517,15 +1519,15 @@ def logit_bias_processor(
15171519

15181520
if stream:
15191521
remaining_tokens = completion_tokens[returned_tokens:]
1520-
all_text = self.detokenize(
1522+
remaining_text = self.detokenize(
15211523
remaining_tokens,
15221524
prev_tokens=prompt_tokens + completion_tokens[:returned_tokens],
15231525
)
1524-
any_stop = [s for s in stop_sequences if s in all_text]
1526+
any_stop = [s for s in stop_sequences if s in remaining_text]
15251527
if len(any_stop) > 0:
1526-
end = min(all_text.index(stop) for stop in any_stop)
1528+
end = min(remaining_text.index(stop) for stop in any_stop)
15271529
else:
1528-
end = len(all_text)
1530+
end = len(remaining_text)
15291531

15301532
token_end_position = 0
15311533
for token in remaining_tokens:

0 commit comments

Comments
 (0)