mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-28 04:47:04 +01:00
imatrix : fix wname for mul_mat_id ops (#6271)
* imatrix : fix wname for mul_mat_id ops * also filter tensor names in mul_mat_id ops --------- Co-authored-by: slaren <slarengh@gmail.com>
This commit is contained in:
parent
7aed0ffe68
commit
a0e584defd
@ -50,17 +50,11 @@ private:
|
|||||||
void keep_imatrix(int ncall) const;
|
void keep_imatrix(int ncall) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data) {
|
// remove any prefix and suffixes from the name
|
||||||
GGML_UNUSED(user_data);
|
// CUDA0#blk.0.attn_k.weight#0 => blk.0.attn_k.weight
|
||||||
|
static std::string filter_tensor_name(const char * name) {
|
||||||
const struct ggml_tensor * src0 = t->src[0];
|
|
||||||
const struct ggml_tensor * src1 = t->src[1];
|
|
||||||
|
|
||||||
std::string wname;
|
std::string wname;
|
||||||
{
|
const char * p = strchr(name, '#');
|
||||||
// remove any prefix and suffixes from the name
|
|
||||||
// CUDA0#blk.0.attn_k.weight#0 => blk.0.attn_k.weight
|
|
||||||
const char * p = strchr(src0->name, '#');
|
|
||||||
if (p != NULL) {
|
if (p != NULL) {
|
||||||
p = p + 1;
|
p = p + 1;
|
||||||
const char * q = strchr(p, '#');
|
const char * q = strchr(p, '#');
|
||||||
@ -70,9 +64,17 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
|
|||||||
wname = p;
|
wname = p;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
wname = src0->name;
|
wname = name;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
return wname;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data) {
|
||||||
|
GGML_UNUSED(user_data);
|
||||||
|
|
||||||
|
const struct ggml_tensor * src0 = t->src[0];
|
||||||
|
const struct ggml_tensor * src1 = t->src[1];
|
||||||
|
std::string wname = filter_tensor_name(src0->name);
|
||||||
|
|
||||||
// when ask is true, the scheduler wants to know if we are interested in data from this tensor
|
// when ask is true, the scheduler wants to know if we are interested in data from this tensor
|
||||||
// if we return true, a follow-up call will be made with ask=false in which we can do the actual collection
|
// if we return true, a follow-up call will be made with ask=false in which we can do the actual collection
|
||||||
@ -112,6 +114,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
|
|||||||
// this is necessary to guarantee equal number of "ncall" for each tensor
|
// this is necessary to guarantee equal number of "ncall" for each tensor
|
||||||
for (int ex = 0; ex < n_as; ++ex) {
|
for (int ex = 0; ex < n_as; ++ex) {
|
||||||
src0 = t->src[2 + ex];
|
src0 = t->src[2 + ex];
|
||||||
|
wname = filter_tensor_name(src0->name);
|
||||||
auto& e = m_stats[wname];
|
auto& e = m_stats[wname];
|
||||||
if (e.values.empty()) {
|
if (e.values.empty()) {
|
||||||
e.values.resize(src1->ne[0], 0);
|
e.values.resize(src1->ne[0], 0);
|
||||||
|
Loading…
Reference in New Issue
Block a user