mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 22:08:46 +01:00
ggml-opencl, llama: using reserve() if count already known (#7272)
This commit is contained in:
parent
65c58207ec
commit
213e90ed73
@ -1,4 +1,4 @@
|
|||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
#include "ggml-opencl.h"
|
#include "ggml-opencl.h"
|
||||||
#include "ggml-backend-impl.h"
|
#include "ggml-backend-impl.h"
|
||||||
|
|
||||||
@ -1835,7 +1835,10 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
|
|||||||
CL_CHECK(clEnqueueNDRangeKernel(queue, *to_fp32_cl, 1, &offset, &global, local > 0 ? &local : NULL, events.size(), !events.empty() ? events.data() : NULL, NULL));
|
CL_CHECK(clEnqueueNDRangeKernel(queue, *to_fp32_cl, 1, &offset, &global, local > 0 ? &local : NULL, events.size(), !events.empty() ? events.data() : NULL, NULL));
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int64_t i12 = i02 * r2, e12 = i12 + r2; i12 < e12; i12++) {
|
int64_t i12 = i02 * r2;
|
||||||
|
int64_t e12 = i12 + r2;
|
||||||
|
events.reserve(e12 - i12);
|
||||||
|
for (; i12 < e12; i12++) {
|
||||||
if (mul_mat_vec) { // specialized dequantize_mul_mat_vec kernel
|
if (mul_mat_vec) { // specialized dequantize_mul_mat_vec kernel
|
||||||
// copy src1 to device
|
// copy src1 to device
|
||||||
events.emplace_back();
|
events.emplace_back();
|
||||||
|
@ -16162,6 +16162,7 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const
|
|||||||
}
|
}
|
||||||
|
|
||||||
// make tensors
|
// make tensors
|
||||||
|
cvec.tensors.reserve(model.hparams.n_layer);
|
||||||
cvec.tensors.push_back(nullptr); // there's never a tensor for layer 0
|
cvec.tensors.push_back(nullptr); // there's never a tensor for layer 0
|
||||||
for (size_t il = 1; il < model.hparams.n_layer; il++) {
|
for (size_t il = 1; il < model.hparams.n_layer; il++) {
|
||||||
struct ggml_context * ctx = ctx_map.at(model.buft_layer[il].buft);
|
struct ggml_context * ctx = ctx_map.at(model.buft_layer[il].buft);
|
||||||
@ -16170,6 +16171,8 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const
|
|||||||
}
|
}
|
||||||
|
|
||||||
// allocate tensors / buffers and zero
|
// allocate tensors / buffers and zero
|
||||||
|
cvec.ctxs.reserve(ctx_map.size());
|
||||||
|
cvec.bufs.reserve(ctx_map.size());
|
||||||
for (auto it : ctx_map) {
|
for (auto it : ctx_map) {
|
||||||
ggml_backend_buffer_type_t buft = it.first;
|
ggml_backend_buffer_type_t buft = it.first;
|
||||||
ggml_context * ctx = it.second;
|
ggml_context * ctx = it.second;
|
||||||
|
Loading…
Reference in New Issue
Block a user