From 0478174d5959b66096ae6609fcb0df14cab66b51 Mon Sep 17 00:00:00 2001
From: Ouadie EL FAROUKI <ouadie.elfarouki@codeplay.com>
Date: Wed, 7 Aug 2024 11:25:36 +0100
Subject: [PATCH] [SYCL] Updated SYCL device filtering  (#8901)

* Updated device filter to depend on default_selector (fixes non-intel device issues)
* Small related update to example/sycl Readme
---
 examples/sycl/README.md            | 24 +++++++++---------------
 ggml/src/ggml-sycl/dpct/helper.hpp | 19 ++++++++++++++++---
 2 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/examples/sycl/README.md b/examples/sycl/README.md
index 0e3acd35b..8819d87f5 100644
--- a/examples/sycl/README.md
+++ b/examples/sycl/README.md
@@ -12,9 +12,9 @@ This example program provides the tools for llama.cpp for SYCL on Intel GPU.
 
 List all SYCL devices with ID, compute capability, max work group size, ect.
 
-1. Build the llama.cpp for SYCL for all targets.
+1. Build the llama.cpp for SYCL for the specified target *(using GGML_SYCL_TARGET)*.
 
-2. Enable oneAPI running environment
+2. Enable oneAPI running environment *(if GGML_SYCL_TARGET is set to INTEL -default-)*
 
 ```
 source /opt/intel/oneapi/setvars.sh
@@ -29,19 +29,13 @@ source /opt/intel/oneapi/setvars.sh
 Check the ID in startup log, like:
 
 ```
-found 4 SYCL devices:
-  Device 0: Intel(R) Arc(TM) A770 Graphics,	compute capability 1.3,
-    max compute_units 512,	max work group size 1024,	max sub group size 32,	global mem size 16225243136
-  Device 1: Intel(R) FPGA Emulation Device,	compute capability 1.2,
-    max compute_units 24,	max work group size 67108864,	max sub group size 64,	global mem size 67065057280
-  Device 2: 13th Gen Intel(R) Core(TM) i7-13700K,	compute capability 3.0,
-    max compute_units 24,	max work group size 8192,	max sub group size 64,	global mem size 67065057280
-  Device 3: Intel(R) Arc(TM) A770 Graphics,	compute capability 3.0,
-    max compute_units 512,	max work group size 1024,	max sub group size 32,	global mem size 16225243136
+found 2 SYCL devices:
+|  |                   |                                       |       |Max    |        |Max  |Global |                     |
+|  |                   |                                       |       |compute|Max work|sub  |mem    |                     |
+|ID|        Device Type|                                   Name|Version|units  |group   |group|size   |       Driver version|
+|--|-------------------|---------------------------------------|-------|-------|--------|-----|-------|---------------------|
+| 0| [level_zero:gpu:0]|                Intel Arc A770 Graphics|    1.3|    512|    1024|   32| 16225M|            1.3.29138|
+| 1| [level_zero:gpu:1]|                 Intel UHD Graphics 750|    1.3|     32|     512|   32| 62631M|            1.3.29138|
 
 ```
 
-|Attribute|Note|
-|-|-|
-|compute capability 1.3|Level-zero running time, recommended |
-|compute capability 3.0|OpenCL running time, slower than level-zero in most cases|
diff --git a/ggml/src/ggml-sycl/dpct/helper.hpp b/ggml/src/ggml-sycl/dpct/helper.hpp
index ef4609e32..fe4a8f744 100644
--- a/ggml/src/ggml-sycl/dpct/helper.hpp
+++ b/ggml/src/ggml-sycl/dpct/helper.hpp
@@ -874,7 +874,7 @@ namespace dpct
         inline std::string get_preferred_gpu_platform_name() {
             std::string result;
 
-            std::string filter = "level-zero";
+            std::string filter = "";
             char* env = getenv("ONEAPI_DEVICE_SELECTOR");
             if (env) {
                 if (std::strstr(env, "level_zero")) {
@@ -892,11 +892,24 @@ namespace dpct
                 else {
                     throw std::runtime_error("invalid device filter: " + std::string(env));
                 }
+            } else {
+                auto default_device = sycl::device(sycl::default_selector_v);
+                auto default_platform_name = default_device.get_platform().get_info<sycl::info::platform::name>();
+
+                if (std::strstr(default_platform_name.c_str(), "Level-Zero") || default_device.is_cpu()) {
+                    filter = "level-zero";
+                }
+                else if (std::strstr(default_platform_name.c_str(), "CUDA")) {
+                    filter = "cuda";
+                }
+                else if (std::strstr(default_platform_name.c_str(), "HIP")) {
+                    filter = "hip";
+                }
             }
 
-            auto plaform_list = sycl::platform::get_platforms();
+            auto platform_list = sycl::platform::get_platforms();
 
-            for (const auto& platform : plaform_list) {
+            for (const auto& platform : platform_list) {
                 auto devices = platform.get_devices();
                 auto gpu_dev = std::find_if(devices.begin(), devices.end(), [](const sycl::device& d) {
                     return d.is_gpu();