mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-26 14:20:31 +01:00
backup
This commit is contained in:
parent
1f80e0e428
commit
d32a8f6142
448
ggml-sycl.cpp
448
ggml-sycl.cpp
File diff suppressed because it is too large
Load Diff
@ -13,6 +13,8 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define GGML_SYCL_MAX_DEVICES 48
|
||||||
|
|
||||||
// backend API
|
// backend API
|
||||||
GGML_API ggml_backend_t ggml_backend_sycl_init(int device);
|
GGML_API ggml_backend_t ggml_backend_sycl_init(int device);
|
||||||
|
|
||||||
@ -30,13 +32,6 @@ GGML_API GGML_CALL void ggml_sycl_get_gpu_list(int *id_list, int max_len);
|
|||||||
GGML_API GGML_CALL void ggml_sycl_get_device_description(int device, char *description, size_t description_size);
|
GGML_API GGML_CALL void ggml_sycl_get_device_description(int device, char *description, size_t description_size);
|
||||||
GGML_API GGML_CALL int ggml_backend_sycl_get_device_count();
|
GGML_API GGML_CALL int ggml_backend_sycl_get_device_count();
|
||||||
GGML_API GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
|
GGML_API GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
|
||||||
GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id);
|
|
||||||
|
|
||||||
// TODO: these are temporary
|
|
||||||
// ref: https://github.com/ggerganov/llama.cpp/pull/6022#issuecomment-1992615670
|
|
||||||
GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index);
|
|
||||||
GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id);
|
|
||||||
GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode();
|
|
||||||
|
|
||||||
// SYCL doesn't support registering host memory, keep here for reference
|
// SYCL doesn't support registering host memory, keep here for reference
|
||||||
// GGML_API GGML_CALL bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
|
// GGML_API GGML_CALL bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
|
||||||
|
@ -136,6 +136,7 @@ typedef sycl::float2 dfloat2;
|
|||||||
|
|
||||||
static const int8_t kvalues_iq4nl[16]={-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
|
static const int8_t kvalues_iq4nl[16]={-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
|
||||||
|
|
||||||
|
static int g_all_sycl_device_count = -1;
|
||||||
static bool g_ggml_backend_sycl_buffer_type_initialized = false;
|
static bool g_ggml_backend_sycl_buffer_type_initialized = false;
|
||||||
|
|
||||||
static ggml_sycl_backend_gpu_mode g_ggml_sycl_backend_gpu_mode =
|
static ggml_sycl_backend_gpu_mode g_ggml_sycl_backend_gpu_mode =
|
||||||
|
@ -13,7 +13,6 @@
|
|||||||
#ifndef GGML_SYCL_PRESETS_HPP
|
#ifndef GGML_SYCL_PRESETS_HPP
|
||||||
#define GGML_SYCL_PRESETS_HPP
|
#define GGML_SYCL_PRESETS_HPP
|
||||||
|
|
||||||
#define GGML_SYCL_MAX_DEVICES 48
|
|
||||||
#define GGML_SYCL_MAX_STREAMS 8
|
#define GGML_SYCL_MAX_STREAMS 8
|
||||||
#define GGML_SYCL_MAX_BUFFERS 256
|
#define GGML_SYCL_MAX_BUFFERS 256
|
||||||
#define GGML_SYCL_NAME "SYCL"
|
#define GGML_SYCL_NAME "SYCL"
|
||||||
|
13
llama.cpp
13
llama.cpp
@ -6518,16 +6518,6 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef GGML_USE_SYCL
|
|
||||||
if (params.split_mode == LLAMA_SPLIT_MODE_NONE) {
|
|
||||||
ggml_backend_sycl_set_single_device_mode(params.main_gpu);
|
|
||||||
//SYCL use device index (0, 1, 2) directly, uer input device id, then convert to device index.
|
|
||||||
params.main_gpu = ggml_backend_sycl_get_device_index(params.main_gpu);
|
|
||||||
} else {
|
|
||||||
ggml_backend_sycl_set_mul_device_mode();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (!llm_load_tensors(
|
if (!llm_load_tensors(
|
||||||
ml, model, params.n_gpu_layers, params.split_mode, params.main_gpu, params.tensor_split, params.use_mlock,
|
ml, model, params.n_gpu_layers, params.split_mode, params.main_gpu, params.tensor_split, params.use_mlock,
|
||||||
params.progress_callback, params.progress_callback_user_data
|
params.progress_callback, params.progress_callback_user_data
|
||||||
@ -16376,8 +16366,7 @@ struct llama_context * llama_new_context_with_model(
|
|||||||
if (model->split_mode == LLAMA_SPLIT_MODE_NONE || model->split_mode == LLAMA_SPLIT_MODE_ROW) {
|
if (model->split_mode == LLAMA_SPLIT_MODE_NONE || model->split_mode == LLAMA_SPLIT_MODE_ROW) {
|
||||||
ggml_backend_t backend = ggml_backend_sycl_init(model->main_gpu);
|
ggml_backend_t backend = ggml_backend_sycl_init(model->main_gpu);
|
||||||
if (backend == nullptr) {
|
if (backend == nullptr) {
|
||||||
int main_gpu_id = ggml_backend_sycl_get_device_id(model->main_gpu);
|
LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d backend\n", __func__, model->main_gpu);
|
||||||
LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d) backend\n", __func__, main_gpu_id, model->main_gpu);
|
|
||||||
llama_free(ctx);
|
llama_free(ctx);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user