@@ -82,6 +82,8 @@ def _load_shared_library(lib_base_name: str):
82
82
83
83
# Misc
84
84
c_float_p = POINTER (c_float )
85
+ c_float_p_p = POINTER (POINTER (c_float ))
86
+ c_int_p = POINTER (c_int )
85
87
c_uint8_p = POINTER (c_uint8 )
86
88
c_size_t_p = POINTER (c_size_t )
87
89
@@ -112,6 +114,11 @@ def _load_shared_library(lib_base_name: str):
112
114
# struct llama_context;
113
115
llama_context_p = c_void_p
114
116
117
+ # struct clip_ctx;
118
+ clip_ctx_p = c_void_p
119
+
120
+ # struct llava_image_embed;
121
+ llava_image_embed_p = c_void_p ;
115
122
116
123
# typedef int32_t llama_pos;
117
124
llama_pos = c_int32
@@ -1923,3 +1930,63 @@ def llama_dump_timing_info_yaml(stream: ctypes.c_void_p, ctx: llama_context_p):
1923
1930
1924
1931
_lib .llama_dump_timing_info_yaml .argtypes = [ctypes .c_void_p , llama_context_p ]
1925
1932
_lib .llama_dump_timing_info_yaml .restype = None
1933
+
1934
+
1935
+ # LLAVA
1936
+
1937
+
1938
+ # LLAMA_API struct clip_ctx * clip_model_load(const char * fname, const int verbosity);
1939
+ def clip_model_load (fname : Union [c_char_p , bytes ], verbosity : c_int = 0 ) -> clip_ctx_p :
1940
+ """ load mmproj model """
1941
+ return _lib .clip_model_load (fname , verbosity )
1942
+ _lib .clip_model_load .argtypes = [c_char_p , c_int ]
1943
+ _lib .clip_model_load .restype = clip_ctx_p
1944
+
1945
+
1946
+ # LLAMA_API void clip_free(struct clip_ctx * ctx);
1947
+ def clip_free (ctx : clip_ctx_p ):
1948
+ """ free mmproj model """
1949
+ _lib .clip_free (ctx )
1950
+ _lib .clip_free .argtypes = [clip_ctx_p ]
1951
+ _lib .clip_free .restype = None
1952
+
1953
+
1954
+ #LLAMA_API bool llava_validate_embed_size(const llama_context * ctx_llama, const clip_ctx * ctx_clip);
1955
+ def llava_validate_embed_size (ctx_llama : llama_context_p , ctx_clip : clip_ctx_p ) -> c_bool :
1956
+ """ sanity check for clip <-> llava embed size match """
1957
+ return _lib .llava_validate_embed_size (ctx_llama , ctx_clip )
1958
+ _lib .llava_validate_embed_size .argtypes = [llama_context_p , clip_ctx_p ]
1959
+ _lib .llava_validate_embed_size .restype = c_bool
1960
+
1961
+
1962
+ #LLAMA_API struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * ctx_clip, int n_threads, const unsigned char * image_bytes, int image_bytes_length);
1963
+ def llava_image_embed_make_with_bytes (ctx_clip : clip_ctx_p , n_threads : Union [int ,c_int ], image_bytes : c_uint8_p , image_bytes_length : c_size_t ) -> llava_image_embed_p :
1964
+ """ build an image embed by interpreting image_bytes as the contents of an image file with byte size image_bytes_length.
1965
+ supported formats (autodetected): JPG, PNG, TGA, BMP, PSD, GIF, HDR, PIC (ref https://github.com/nothings/stb) """
1966
+ return _lib .llava_image_embed_make_with_bytes (ctx_clip , n_threads , image_bytes , image_bytes_length )
1967
+ _lib .llava_image_embed_make_with_bytes .argtypes = [clip_ctx_p , c_int , c_uint8_p , c_size_t ]
1968
+ _lib .llava_image_embed_make_with_bytes .restype = llava_image_embed_p
1969
+
1970
+
1971
+ #LLAMA_API struct llava_image_embed * llava_image_embed_make_with_filename(struct clip_ctx * ctx_clip, int n_threads, const char * image_path);
1972
+ def llava_image_embed_make_with_filename (ctx_clip : clip_ctx_p , n_threads : Union [c_int , int ], filename : Union [c_char_p , bytes ]) -> llava_image_embed_p :
1973
+ """ build an image embed from a path to an image filename """
1974
+ return _lib .llava_image_embed_make_with_filename (ctx_clip , n_threads , filename )
1975
+ _lib .llava_image_embed_make_with_filename .argtypes = [clip_ctx_p , c_int , c_char_p ]
1976
+ _lib .llava_image_embed_make_with_filename .restype = llava_image_embed_p
1977
+
1978
+ #LLAMA_API void llava_image_embed_free(struct llava_image_embed * embed);
1979
+ def llava_image_embed_free (embed : llava_image_embed_p ):
1980
+ """ free an embedding made with one of the llava_image_embed_make_ methods """
1981
+ _lib .llava_image_embed_free (embed )
1982
+ _lib .llava_image_embed_free .argtypes = [llava_image_embed_p ]
1983
+ _lib .llava_image_embed_free .restype = None
1984
+
1985
+ #LLAMA_API bool llava_eval_image_embed(struct llama_context * ctx_llama, const struct llava_image_embed * embed, int n_batch, int * n_past);
1986
+ def llava_eval_image_embed (ctx : llama_context_p , image_embed : llava_image_embed_p , n_batch : c_int , n_past : c_int_p ) -> c_bool :
1987
+ """ write the image represented by embed into the llama context with batch size n_batch,
1988
+ starting at context pos n_past. on completion, n_past points to the next position in the context after the image embed."""
1989
+ return _lib .llava_eval_image_embed (ctx , image_embed , n_batch , n_past )
1990
+ _lib .llava_eval_image_embed .argtypes = [llama_context_p , llava_image_embed_p , c_int , c_int_p ]
1991
+ _lib .llava_eval_image_embed .restyle = c_bool
1992
+
0 commit comments