mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-10 12:30:50 +01:00
f578b86b21
* move BLAS to a separate backend * rename GGML_USE_OPENBLAS to GGML_USE_BLAS * alloc : reuse same buffer when the same buffer type if used multiple times * set number of threads automatically for openblas and blis * sched : print assignments when GGML_SCHED_DEBUG env variable is set * sched : allow ops with weights on an incompatible buffer type This will cause the weight to be copied to a backend that supports the op, which is very costly. The weight should have been stored in a buffer of a backend that can run the op, but llama.cpp cannot do this automatically at the moment. --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
24 lines
526 B
C
24 lines
526 B
C
#pragma once
|
|
|
|
#include "ggml.h"
|
|
#include "ggml-backend.h"
|
|
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
// backend API
|
|
GGML_API GGML_CALL ggml_backend_t ggml_backend_blas_init(void);
|
|
|
|
GGML_API GGML_CALL bool ggml_backend_is_blas(ggml_backend_t backend);
|
|
|
|
// number of threads used for conversion to float
|
|
// for openblas and blis, this will also set the number of threads used for blas operations
|
|
GGML_API GGML_CALL void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
|
|
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|