Skip to content

Commit e83c965

Browse files
committed
basic integration of PowerInfer
1 parent f2901d8 commit e83c965

File tree

6 files changed

+35
-26
lines changed

6 files changed

+35
-26
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,3 +176,5 @@ cython_debug/
176176

177177
# downloaded model .bin files
178178
docker/open_llama/*.bin
179+
180+
/.venv/**

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
[submodule "vendor/llama.cpp"]
22
path = vendor/llama.cpp
33
url = https://github.com/ggerganov/llama.cpp.git
4+
[submodule "vendor/PowerInfer"]
5+
path = vendor/PowerInfer
6+
url = https://github.com/SJTU-IPADS/PowerInfer.git

CMakeLists.txt

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ if (LLAMA_BUILD)
1616
set(LLAMA_FMA "Off" CACHE BOOL "llama: enable FMA" FORCE)
1717
set(LLAMA_F16C "Off" CACHE BOOL "llama: enable F16C" FORCE)
1818
endif()
19-
add_subdirectory(vendor/llama.cpp)
19+
add_subdirectory(vendor/PowerInfer)
2020
install(
2121
TARGETS llama
2222
LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
@@ -44,28 +44,28 @@ if (LLAMA_BUILD)
4444
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
4545
)
4646

47-
# Building llava
48-
add_subdirectory(vendor/llama.cpp/examples/llava)
49-
set_target_properties(llava_shared PROPERTIES OUTPUT_NAME "llava")
50-
# Set CUDA_ARCHITECTURES to OFF on windows
51-
if (WIN32)
52-
set_target_properties(llava_shared PROPERTIES CUDA_ARCHITECTURES OFF)
53-
endif()
54-
install(
55-
TARGETS llava_shared
56-
LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
57-
RUNTIME DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
58-
ARCHIVE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
59-
FRAMEWORK DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
60-
RESOURCE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
61-
)
62-
# Temporary fix for https://github.com/scikit-build/scikit-build-core/issues/374
63-
install(
64-
TARGETS llava_shared
65-
LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
66-
RUNTIME DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
67-
ARCHIVE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
68-
FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
69-
RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
70-
)
47+
# # Building llava
48+
# add_subdirectory(vendor/llama.cpp/examples/llava)
49+
# set_target_properties(llava_shared PROPERTIES OUTPUT_NAME "llava")
50+
# # Set CUDA_ARCHITECTURES to OFF on windows
51+
# if (WIN32)
52+
# set_target_properties(llava_shared PROPERTIES CUDA_ARCHITECTURES OFF)
53+
# endif()
54+
# install(
55+
# TARGETS llava_shared
56+
# LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
57+
# RUNTIME DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
58+
# ARCHIVE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
59+
# FRAMEWORK DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
60+
# RESOURCE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
61+
# )
62+
# # Temporary fix for https://github.com/scikit-build/scikit-build-core/issues/374
63+
# install(
64+
# TARGETS llava_shared
65+
# LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
66+
# RUNTIME DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
67+
# ARCHIVE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
68+
# FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
69+
# RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
70+
# )
7171
endif()

examples/high_level_api/high_level_api_inference.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
output = llm(
1313
"Question: What are the names of the planets in the solar system? Answer: ",
14-
max_tokens=48,
14+
max_tokens=512,
1515
stop=["Q:", "\n"],
1616
echo=True,
1717
)

llama_cpp/llama_cpp.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,12 +305,15 @@ class llama_model_params(Structure):
305305
_fields_ = [
306306
("n_gpu_layers", c_int32),
307307
("main_gpu", c_int32),
308+
("vram_budget_gb", c_float),
308309
("tensor_split", c_float_p),
309310
("progress_callback", llama_progress_callback),
310311
("progress_callback_user_data", c_void_p),
311312
("vocab_only", c_bool),
312313
("use_mmap", c_bool),
313314
("use_mlock", c_bool),
315+
("reset_gpu_index", c_bool),
316+
("disable_gpu_index", c_bool),
314317
]
315318

316319

vendor/PowerInfer

Submodule PowerInfer added at 9d72668

0 commit comments

Comments
 (0)