ggml-quants, llama : removed excess checks (#7274)

This commit is contained in:
Herman Semenov 2024-05-17 07:08:49 +00:00 committed by GitHub
parent e18bc6aaf3
commit 359cbe3f46
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 4 additions and 8 deletions

View File

@ -2553,7 +2553,7 @@ void dump_string_yaml_multiline(FILE * stream, const char * prop_name, const cha
size_t pos_start = 0; size_t pos_start = 0;
size_t pos_found = 0; size_t pos_found = 0;
if (!data_str.empty() && (std::isspace(data_str[0]) || std::isspace(data_str.back()))) { if (std::isspace(data_str[0]) || std::isspace(data_str.back())) {
data_str = std::regex_replace(data_str, std::regex("\n"), "\\n"); data_str = std::regex_replace(data_str, std::regex("\n"), "\\n");
data_str = std::regex_replace(data_str, std::regex("\""), "\\\""); data_str = std::regex_replace(data_str, std::regex("\""), "\\\"");
data_str = std::regex_replace(data_str, std::regex(R"(\\[^n"])"), R"(\$&)"); data_str = std::regex_replace(data_str, std::regex(R"(\\[^n"])"), R"(\$&)");

View File

@ -1986,7 +1986,7 @@ static void quantize_row_q3_K_impl(const float * restrict x, block_q3_K * restri
for (int j = 0; j < QK_K/16; ++j) { for (int j = 0; j < QK_K/16; ++j) {
if (quant_weights) { if (quant_weights) {
const float * qw = quant_weights ? quant_weights + QK_K * i + 16*j : NULL; const float * qw = quant_weights + QK_K * i + 16*j;
for (int l = 0; l < 16; ++l) weight[l] = qw[l] * sqrtf(sigma2 + x[16*j+l]*x[16*j+l]); for (int l = 0; l < 16; ++l) weight[l] = qw[l] * sqrtf(sigma2 + x[16*j+l]*x[16*j+l]);
} else { } else {
for (int l = 0; l < 16; ++l) weight[l] = x[16*j+l]*x[16*j+l]; for (int l = 0; l < 16; ++l) weight[l] = x[16*j+l]*x[16*j+l];

View File

@ -13904,9 +13904,7 @@ llama_token llama_sample_token_mirostat(struct llama_context * ctx, llama_token_
// Sample the next word X using top-k sampling // Sample the next word X using top-k sampling
llama_sample_top_k(nullptr, candidates, int(k), 1); llama_sample_top_k(nullptr, candidates, int(k), 1);
if (ctx) {
ctx->t_sample_us += ggml_time_us() - t_start_sample_us; ctx->t_sample_us += ggml_time_us() - t_start_sample_us;
}
llama_token X = llama_sample_token(ctx, candidates); llama_token X = llama_sample_token(ctx, candidates);
t_start_sample_us = ggml_time_us(); t_start_sample_us = ggml_time_us();
@ -13920,9 +13918,7 @@ llama_token llama_sample_token_mirostat(struct llama_context * ctx, llama_token_
// Update mu using the learning rate and error // Update mu using the learning rate and error
*mu = *mu - eta * e; *mu = *mu - eta * e;
if (ctx) {
ctx->t_sample_us += ggml_time_us() - t_start_sample_us; ctx->t_sample_us += ggml_time_us() - t_start_sample_us;
}
return X; return X;
} }