diff --git a/llama.cpp b/llama.cpp
index 68e0a9457..f684ebe0b 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -5443,7 +5443,13 @@ static struct ggml_cgraph * llama_build_graph(
 
     // offload layers
     // TODO: this code will be obsoleted with backend v2
-    {
+#ifdef GGML_USE_CUBLAS
+    const bool do_offload = true;
+#else
+    const bool do_offload = false;
+#endif
+
+    if (do_offload) {
         const int n_layer = model.hparams.n_layer;
 
         const int n_gpu_layers = model.n_gpu_layers;
@@ -5576,12 +5582,17 @@ static struct ggml_cgraph * llama_build_graph(
         for (int i = 0; i < result->n_nodes; ++i) {
             struct ggml_tensor * cur = result->nodes[i];
 
+            // view tensors are not offloaded
+            if (cur->view_src != nullptr) {
+                continue;
+            }
+
             const std::string name = cur->name;
 
             const auto it = k_offload_func.find(name);
             if (it == k_offload_func.end()) {
                 // if a tensor that is not view hasn't been offloaded, we warn the user
-                if (worst_case && cur->view_src == nullptr) {
+                if (worst_case) {
                     LLAMA_LOG_WARN("%s: node %4d %32s: not offloaded (ref: %s)\n", __func__,
                             i, name.c_str(), "https://github.com/ggerganov/llama.cpp/pull/3837");
                 }
@@ -5600,7 +5611,7 @@ static struct ggml_cgraph * llama_build_graph(
             // apply offload function to the tensor
             f(cur);
 
-            if (worst_case && cur->view_src == nullptr) {
+            if (worst_case) {
                 LLAMA_LOG_INFO("%s: node %4d %32s: %s\n", __func__, i, name.c_str(), k_offload_func_name.at(f).c_str());
             }
         }