mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 05:17:21 +01:00
ggml : add ggml_cpu_has_avx_vnni() (#4589)
* feat: add avx_vnni based on intel documents * ggml: add avx vnni based on intel document * llama: add avx vnni information display * docs: add more details about using oneMKL and oneAPI for intel processors * docs: add more details about using oneMKL and oneAPI for intel processors * docs: add more details about using oneMKL and oneAPI for intel processors * docs: add more details about using oneMKL and oneAPI for intel processors * docs: add more details about using oneMKL and oneAPI for intel processors * Update ggml.c Fix indentation upgate Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
a20f3c7465
commit
24a447e20a
20
README.md
20
README.md
@ -385,17 +385,31 @@ Building the program with BLAS support may lead to some performance improvements
|
|||||||
|
|
||||||
Check [BLIS.md](docs/BLIS.md) for more information.
|
Check [BLIS.md](docs/BLIS.md) for more information.
|
||||||
|
|
||||||
- #### Intel MKL
|
- #### Intel oneMKL
|
||||||
|
- Using manual oneAPI installation:
|
||||||
|
By default, `LLAMA_BLAS_VENDOR` is set to `Generic`, so if you already sourced intel environment script and assign `-DLLAMA_BLAS=ON` in cmake, the mkl version of Blas will automatically been selected. Otherwise please install oneAPI and follow the below steps:
|
||||||
|
```bash
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
source /opt/intel/oneapi/setvars.sh # You can skip this step if in oneapi-runtime docker image, only required for manual installation
|
||||||
|
cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_NATIVE=ON
|
||||||
|
cmake --build . --config Release
|
||||||
|
```
|
||||||
|
|
||||||
By default, `LLAMA_BLAS_VENDOR` is set to `Generic`, so if you already sourced intel environment script and assign `-DLLAMA_BLAS=ON` in cmake, the mkl version of Blas will automatically been selected. You may also specify it by:
|
- Using oneAPI docker image:
|
||||||
|
If you do not want to source the environment vars and install oneAPI manually, you can also build the code using intel docker container: [oneAPI-runtime](https://hub.docker.com/r/intel/oneapi-runtime)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_NATIVE=ON
|
||||||
cmake --build . --config Release
|
cmake --build . --config Release
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Building through oneAPI compilers will make avx_vnni instruction set available for intel processors that do not support avx512 and avx512_vnni.
|
||||||
|
|
||||||
|
Check [Optimizing and Running LLaMA2 on Intel® CPU](https://www.intel.com/content/www/us/en/content-details/791610/optimizing-and-running-llama2-on-intel-cpu.html) for more information.
|
||||||
|
|
||||||
- #### cuBLAS
|
- #### cuBLAS
|
||||||
|
|
||||||
This provides BLAS acceleration using the CUDA cores of your Nvidia GPU. Make sure to have the CUDA toolkit installed. You can download it from your Linux distro's package manager (e.g. `apt install nvidia-cuda-toolkit`) or from here: [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads).
|
This provides BLAS acceleration using the CUDA cores of your Nvidia GPU. Make sure to have the CUDA toolkit installed. You can download it from your Linux distro's package manager (e.g. `apt install nvidia-cuda-toolkit`) or from here: [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads).
|
||||||
|
@ -1394,6 +1394,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
|
|||||||
fprintf(stream, "build_number: %d\n", LLAMA_BUILD_NUMBER);
|
fprintf(stream, "build_number: %d\n", LLAMA_BUILD_NUMBER);
|
||||||
fprintf(stream, "cpu_has_arm_fma: %s\n", ggml_cpu_has_arm_fma() ? "true" : "false");
|
fprintf(stream, "cpu_has_arm_fma: %s\n", ggml_cpu_has_arm_fma() ? "true" : "false");
|
||||||
fprintf(stream, "cpu_has_avx: %s\n", ggml_cpu_has_avx() ? "true" : "false");
|
fprintf(stream, "cpu_has_avx: %s\n", ggml_cpu_has_avx() ? "true" : "false");
|
||||||
|
fprintf(stream, "cpu_has_avx_vnni: %s\n", ggml_cpu_has_avx_vnni() ? "true" : "false");
|
||||||
fprintf(stream, "cpu_has_avx2: %s\n", ggml_cpu_has_avx2() ? "true" : "false");
|
fprintf(stream, "cpu_has_avx2: %s\n", ggml_cpu_has_avx2() ? "true" : "false");
|
||||||
fprintf(stream, "cpu_has_avx512: %s\n", ggml_cpu_has_avx512() ? "true" : "false");
|
fprintf(stream, "cpu_has_avx512: %s\n", ggml_cpu_has_avx512() ? "true" : "false");
|
||||||
fprintf(stream, "cpu_has_avx512_vbmi: %s\n", ggml_cpu_has_avx512_vbmi() ? "true" : "false");
|
fprintf(stream, "cpu_has_avx512_vbmi: %s\n", ggml_cpu_has_avx512_vbmi() ? "true" : "false");
|
||||||
|
8
ggml.c
8
ggml.c
@ -19638,6 +19638,14 @@ int ggml_cpu_has_avx(void) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int ggml_cpu_has_avx_vnni(void) {
|
||||||
|
#if defined(__AVXVNNI__)
|
||||||
|
return 1;
|
||||||
|
#else
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
int ggml_cpu_has_avx2(void) {
|
int ggml_cpu_has_avx2(void) {
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
return 1;
|
return 1;
|
||||||
|
1
ggml.h
1
ggml.h
@ -2198,6 +2198,7 @@ extern "C" {
|
|||||||
//
|
//
|
||||||
|
|
||||||
GGML_API int ggml_cpu_has_avx (void);
|
GGML_API int ggml_cpu_has_avx (void);
|
||||||
|
GGML_API int ggml_cpu_has_avx_vnni (void);
|
||||||
GGML_API int ggml_cpu_has_avx2 (void);
|
GGML_API int ggml_cpu_has_avx2 (void);
|
||||||
GGML_API int ggml_cpu_has_avx512 (void);
|
GGML_API int ggml_cpu_has_avx512 (void);
|
||||||
GGML_API int ggml_cpu_has_avx512_vbmi(void);
|
GGML_API int ggml_cpu_has_avx512_vbmi(void);
|
||||||
|
@ -10780,6 +10780,7 @@ const char * llama_print_system_info(void) {
|
|||||||
|
|
||||||
s = "";
|
s = "";
|
||||||
s += "AVX = " + std::to_string(ggml_cpu_has_avx()) + " | ";
|
s += "AVX = " + std::to_string(ggml_cpu_has_avx()) + " | ";
|
||||||
|
s += "AVX_VNNI = " + std::to_string(ggml_cpu_has_avx_vnni()) + " | ";
|
||||||
s += "AVX2 = " + std::to_string(ggml_cpu_has_avx2()) + " | ";
|
s += "AVX2 = " + std::to_string(ggml_cpu_has_avx2()) + " | ";
|
||||||
s += "AVX512 = " + std::to_string(ggml_cpu_has_avx512()) + " | ";
|
s += "AVX512 = " + std::to_string(ggml_cpu_has_avx512()) + " | ";
|
||||||
s += "AVX512_VBMI = " + std::to_string(ggml_cpu_has_avx512_vbmi()) + " | ";
|
s += "AVX512_VBMI = " + std::to_string(ggml_cpu_has_avx512_vbmi()) + " | ";
|
||||||
|
Loading…
x
Reference in New Issue
Block a user