diff --git a/src/llama.cpp b/src/llama.cpp index 22b951ba2..25e3ae84d 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -17561,6 +17561,12 @@ static int llama_decode_internal( }; while (lctx.sbatch.n_tokens > 0) { + // If aborted, break out + if (lctx.abort_callback != nullptr && lctx.abort_callback(lctx.abort_callback_data)) { + LLAMA_LOG_ERROR("%s: token decode aborted\n", __func__); + return -1; + } + llama_ubatch ubatch; if (kv_self.recurrent) { if (embd_pooled) {