control-vectors : minor code style updates

This commit is contained in:
Georgi Gerganov 2024-03-14 16:43:37 +02:00
parent 42abb46c1f
commit 0a9bc301ac
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735
4 changed files with 113 additions and 99 deletions

View File

@ -573,30 +573,29 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
invalid_param = true; invalid_param = true;
break; break;
} }
params.control_vectors.push_back(std::make_tuple(argv[i], 1.0f)); params.control_vectors.push_back({ 1.0f, argv[i], });
} else if (arg == "--control-vector-scaled") { } else if (arg == "--control-vector-scaled") {
if (++i >= argc) { if (++i >= argc) {
invalid_param = true; invalid_param = true;
break; break;
} }
const char * control_vector = argv[i]; const char * fname = argv[i];
if (++i >= argc) { if (++i >= argc) {
invalid_param = true; invalid_param = true;
break; break;
} }
params.control_vectors.push_back(std::make_tuple(control_vector, std::stof(argv[i]))); params.control_vectors.push_back({ std::stof(argv[i]), fname, });
} else if (arg == "--control-vector-layer-range") { } else if (arg == "--control-vector-layer-range") {
if (++i >= argc) { if (++i >= argc) {
invalid_param = true; invalid_param = true;
break; break;
} }
int32_t start = std::stoi(argv[i]); params.control_vector_layer_start = std::stoi(argv[i]);
if (++i >= argc) { if (++i >= argc) {
invalid_param = true; invalid_param = true;
break; break;
} }
int32_t end = std::stoi(argv[i]); params.control_vector_layer_end = std::stoi(argv[i]);
params.control_vector_layer_range = std::make_tuple(start, end);
} else if (arg == "--mmproj") { } else if (arg == "--mmproj") {
if (++i >= argc) { if (++i >= argc) {
invalid_param = true; invalid_param = true;
@ -1396,27 +1395,22 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
} }
if (!params.control_vectors.empty()) { if (!params.control_vectors.empty()) {
int32_t layer_start, layer_end; if (params.control_vector_layer_start <= 0) params.control_vector_layer_start = 1;
std::tie(layer_start, layer_end) = params.control_vector_layer_range; if (params.control_vector_layer_end <= 0) params.control_vector_layer_end = llama_n_layer(model);
if (layer_start == 0) layer_start = 1; const auto cvec = llama_control_vector_load(params.control_vectors);
if (layer_end == 0) layer_end = 31; if (cvec.n_embd == -1) {
std::vector<float> control_vector;
int n_embd;
std::tie(control_vector, n_embd) = llama_control_vector_load(params.control_vectors);
if (n_embd == -1) {
llama_free(lctx); llama_free(lctx);
llama_free_model(model); llama_free_model(model);
return std::make_tuple(nullptr, nullptr); return std::make_tuple(nullptr, nullptr);
} }
int err = llama_control_vector_apply(lctx, int err = llama_control_vector_apply(lctx,
control_vector.data(), cvec.data.data(),
control_vector.size(), cvec.data.size(),
n_embd, cvec.n_embd,
layer_start, params.control_vector_layer_start,
layer_end); params.control_vector_layer_end);
if (err) { if (err) {
llama_free(lctx); llama_free(lctx);
llama_free_model(model); llama_free_model(model);
@ -1959,11 +1953,14 @@ float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n)
// Control vector utils // Control vector utils
// //
static std::tuple<std::vector<float>, int> llama_control_vector_load_one(const std::string & path, float strength) { static llama_control_vector_data llama_control_vector_load_one(const llama_control_vector_load_info & load_info) {
int n_tensors; int32_t n_tensors;
size_t n_bytes = 0; size_t n_bytes = 0;
uint32_t max_direction_layer = 0; uint32_t max_direction_layer = 0;
int n_embd = -1;
llama_control_vector_data result = { -1, {} };
// calculate size of ctx needed for tensors, ensure tensors are f32, and find max layer // calculate size of ctx needed for tensors, ensure tensors are f32, and find max layer
{ {
@ -1977,11 +1974,11 @@ static std::tuple<std::vector<float>, int> llama_control_vector_load_one(const s
/* .no_alloc = */ true, /* .no_alloc = */ true,
/* .ctx = */ &meta_ctx, /* .ctx = */ &meta_ctx,
}; };
struct gguf_context * meta_ctx_gguf = gguf_init_from_file(path.c_str(), meta_gguf_params); struct gguf_context * meta_ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params);
if (!meta_ctx_gguf) { if (!meta_ctx_gguf) {
fprintf(stderr, "%s: failed to load control vector from %s\n", __func__, path.c_str()); fprintf(stderr, "%s: failed to load control vector from %s\n", __func__, load_info.fname.c_str());
ggml_free(meta_ctx); ggml_free(meta_ctx);
return std::make_tuple(std::vector<float>(), -1); return result;
} }
n_tensors = gguf_get_n_tensors(meta_ctx_gguf); n_tensors = gguf_get_n_tensors(meta_ctx_gguf);
@ -1994,36 +1991,36 @@ static std::tuple<std::vector<float>, int> llama_control_vector_load_one(const s
try { try {
uint32_t layer = std::stoi(name.substr(dotpos + 1)); uint32_t layer = std::stoi(name.substr(dotpos + 1));
if (layer == 0) { if (layer == 0) {
fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, path.c_str()); fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, load_info.fname.c_str());
ggml_free(meta_ctx); ggml_free(meta_ctx);
gguf_free(meta_ctx_gguf); gguf_free(meta_ctx_gguf);
return std::make_tuple(std::vector<float>(), -1); return result;
} }
if (layer > max_direction_layer) { if (layer > max_direction_layer) {
max_direction_layer = layer; max_direction_layer = layer;
} }
} catch (...) { } catch (...) {
fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, path.c_str()); fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, load_info.fname.c_str());
ggml_free(meta_ctx); ggml_free(meta_ctx);
gguf_free(meta_ctx_gguf); gguf_free(meta_ctx_gguf);
return std::make_tuple(std::vector<float>(), -1); return result;
} }
} }
struct ggml_tensor * tensor_meta = ggml_get_tensor(meta_ctx, name.c_str()); struct ggml_tensor * tensor_meta = ggml_get_tensor(meta_ctx, name.c_str());
if (tensor_meta->type != GGML_TYPE_F32 || ggml_n_dims(tensor_meta) != 1) { if (tensor_meta->type != GGML_TYPE_F32 || ggml_n_dims(tensor_meta) != 1) {
fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, path.c_str()); fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, load_info.fname.c_str());
ggml_free(meta_ctx); ggml_free(meta_ctx);
gguf_free(meta_ctx_gguf); gguf_free(meta_ctx_gguf);
return std::make_tuple(std::vector<float>(), -1); return result;
} }
if (n_embd == -1) { if (result.n_embd == -1) {
n_embd = ggml_nelements(tensor_meta); result.n_embd = ggml_nelements(tensor_meta);
} else if (ggml_nelements(tensor_meta) != n_embd) { } else if (ggml_nelements(tensor_meta) != result.n_embd) {
fprintf(stderr, "%s: direction tensor sizes mismatched in %s\n", __func__, path.c_str()); fprintf(stderr, "%s: direction tensor sizes mismatched in %s\n", __func__, load_info.fname.c_str());
ggml_free(meta_ctx); ggml_free(meta_ctx);
gguf_free(meta_ctx_gguf); gguf_free(meta_ctx_gguf);
return std::make_tuple(std::vector<float>(), -1); return result;
} }
n_bytes += ggml_nbytes(tensor_meta); n_bytes += ggml_nbytes(tensor_meta);
} }
@ -2032,8 +2029,8 @@ static std::tuple<std::vector<float>, int> llama_control_vector_load_one(const s
} }
if (n_tensors == 0) { if (n_tensors == 0) {
fprintf(stderr, "%s: no direction tensors found in %s\n", __func__, path.c_str()); fprintf(stderr, "%s: no direction tensors found in %s\n", __func__, load_info.fname.c_str());
return std::make_tuple(std::vector<float>(), -1); return result;
} }
// load and scale tensors into final control vector context // load and scale tensors into final control vector context
@ -2048,63 +2045,63 @@ static std::tuple<std::vector<float>, int> llama_control_vector_load_one(const s
/*.no_alloc = */ false, /*.no_alloc = */ false,
/*.ctx = */ &ctx, /*.ctx = */ &ctx,
}; };
struct gguf_context * ctx_gguf = gguf_init_from_file(path.c_str(), params); struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), params);
if (!ctx_gguf) { if (!ctx_gguf) {
fprintf(stderr, "%s: failed to load control vector from %s\n", __func__, path.c_str()); fprintf(stderr, "%s: failed to load control vector from %s\n", __func__, load_info.fname.c_str());
ggml_free(ctx); ggml_free(ctx);
return std::make_tuple(std::vector<float>(), -1); return result;
} }
std::vector<float> vector; // do not store data for layer 0 (it's not used)
for (uint32_t i = 1; i < max_direction_layer; i++) { result.data.resize(result.n_embd * max_direction_layer);
std::string name = "direction." + std::to_string(i);
ggml_tensor * tensor = ggml_get_tensor(ctx, name.c_str()); for (uint32_t il = 1; il <= max_direction_layer; il++) {
const std::string name = "direction." + std::to_string(il);
const ggml_tensor * tensor = ggml_get_tensor(ctx, name.c_str());
float * dst = result.data.data() + result.n_embd * (il - 1);
if (tensor) { if (tensor) {
const float * data = (const float *) tensor->data; const float * src = (const float *) tensor->data;
for (int i = 0; i < n_embd; i++) { for (int j = 0; j < result.n_embd; j++) {
vector.push_back(data[i] * strength); dst[j] = src[j] * load_info.strength;
} }
} else { } else {
vector.insert(vector.end(), n_embd, 0.); // as a filler for (int j = 0; j < result.n_embd; j++) {
dst[j] = 0.0f;
}
} }
} }
return std::make_tuple(vector, n_embd); return result;
} }
std::tuple<std::vector<float>, int> llama_control_vector_load(const std::vector<std::tuple<std::string, float>> & vectors) { llama_control_vector_data llama_control_vector_load(const std::vector<llama_control_vector_load_info> & load_infos) {
std::vector<float> vector; llama_control_vector_data result = { -1, {} };
int n_embd = -1;
for (const auto& pair : vectors) { for (const auto & info : load_infos) {
std::string path; auto cur = llama_control_vector_load_one(info);
float strength;
std::tie(path, strength) = pair;
std::vector<float> v; if (cur.n_embd == -1) {
int v_n_embd; return result;
std::tie(v, v_n_embd) = llama_control_vector_load_one(path, strength);
if (v_n_embd == -1) {
return std::make_tuple(std::vector<float>(), -1);
} }
if (n_embd != -1 && (n_embd != v_n_embd || v.size() != vector.size())) { if (result.n_embd != -1 && (result.n_embd != cur.n_embd || result.data.size() != cur.data.size())) {
fprintf(stderr, "%s: control vector in %s does not match previous vector dimensions\n", __func__, path.c_str()); fprintf(stderr, "%s: control vector in %s does not match previous vector dimensions\n", __func__, info.fname.c_str());
return std::make_tuple(std::vector<float>(), -1); return result;
} }
if (n_embd == -1) { if (result.n_embd == -1) {
vector = std::move(v); result = std::move(cur);
n_embd = v_n_embd;
} else { } else {
for (size_t i = 0; i < vector.size(); i++) { for (size_t i = 0; i < cur.data.size(); i++) {
vector[i] += v[i]; result.data[i] += cur.data[i];
} }
} }
} }
if (n_embd == -1) { if (result.n_embd == -1) {
fprintf(stderr, "%s: no vectors passed\n", __func__); fprintf(stderr, "%s: no vectors passed\n", __func__);
} }
return std::make_tuple(vector, n_embd);
return result;
} }

View File

@ -37,10 +37,13 @@ extern char const *LLAMA_COMMIT;
extern char const *LLAMA_COMPILER; extern char const *LLAMA_COMPILER;
extern char const *LLAMA_BUILD_TARGET; extern char const *LLAMA_BUILD_TARGET;
struct llama_control_vector_load_info;
int32_t get_num_physical_cores();
// //
// CLI argument parsing // CLI argument parsing
// //
int32_t get_num_physical_cores();
struct gpt_params { struct gpt_params {
uint32_t seed = LLAMA_DEFAULT_SEED; // RNG seed uint32_t seed = LLAMA_DEFAULT_SEED; // RNG seed
@ -103,8 +106,10 @@ struct gpt_params {
std::vector<std::tuple<std::string, float>> lora_adapter; // lora adapter path with user defined scale std::vector<std::tuple<std::string, float>> lora_adapter; // lora adapter path with user defined scale
std::string lora_base = ""; // base model path for the lora adapter std::string lora_base = ""; // base model path for the lora adapter
std::vector<std::tuple<std::string, float>> control_vectors; // control vector with user defined scale std::vector<llama_control_vector_load_info> control_vectors; // control vector with user defined scale
std::tuple<int32_t, int32_t> control_vector_layer_range; // layer range for control vector
int32_t control_vector_layer_start = -1; // layer range for control vector
int32_t control_vector_layer_end = -1; // layer range for control vector
int ppl_stride = 0; // stride for perplexity calculations. If left at 0, the pre-existing approach will be used. int ppl_stride = 0; // stride for perplexity calculations. If left at 0, the pre-existing approach will be used.
int ppl_output_type = 0; // = 0 -> ppl output is as usual, = 1 -> ppl output is num_tokens, ppl, one per line int ppl_output_type = 0; // = 0 -> ppl output is as usual, = 1 -> ppl output is num_tokens, ppl, one per line
@ -277,8 +282,19 @@ float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n)
// Control vector utils // Control vector utils
// //
// Load control vectors from a tuple of {path, strength}, scale each by strength, and add them together. struct llama_control_vector_data {
// Returns a tuple of {concatenated vector data (n_emnd x n_layer), n_embd} int n_embd;
// On error, returns a tuple of {empty, -1}
std::tuple<std::vector<float>, int> llama_control_vector_load( // stores data for layers [1, n_layer] where n_layer = data.size() / n_embd
const std::vector<std::tuple<std::string, float>> & vectors); std::vector<float> data;
};
struct llama_control_vector_load_info {
float strength;
std::string fname;
};
// Load control vectors, scale each by strength, and add them together.
// On error, returns {-1, empty}
llama_control_vector_data llama_control_vector_load(const std::vector<llama_control_vector_load_info> & load_infos);

View File

@ -1877,7 +1877,7 @@ struct llama_control_vector {
std::vector<ggml_backend_buffer_t> bufs; std::vector<ggml_backend_buffer_t> bufs;
int32_t layer_start = 0; int32_t layer_start = 0;
int32_t layer_end = 0; int32_t layer_end = 0;
ggml_tensor * tensor_for(int il) const { ggml_tensor * tensor_for(int il) const {
if (il < 0 || il < layer_start || il > layer_end || (size_t) il >= tensors.size()) { if (il < 0 || il < layer_start || il > layer_end || (size_t) il >= tensors.size()) {
@ -13183,6 +13183,10 @@ int32_t llama_n_embd(const struct llama_model * model) {
return model->hparams.n_embd; return model->hparams.n_embd;
} }
int32_t llama_n_layer(const struct llama_model * model) {
return model->hparams.n_layer;
}
float llama_rope_freq_scale_train(const struct llama_model * model) { float llama_rope_freq_scale_train(const struct llama_model * model) {
return model->hparams.rope_freq_scale_train; return model->hparams.rope_freq_scale_train;
} }
@ -13335,7 +13339,7 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const
return true; return true;
} }
int32_t llama_control_vector_apply(struct llama_context * lctx, float * data, size_t len, int n_embd, int32_t il_start, int32_t il_end) { int32_t llama_control_vector_apply(struct llama_context * lctx, const float * data, size_t len, int32_t n_embd, int32_t il_start, int32_t il_end) {
const llama_model & model = lctx->model; const llama_model & model = lctx->model;
llama_control_vector & cvec = lctx->cvec; llama_control_vector & cvec = lctx->cvec;
@ -13351,18 +13355,14 @@ int32_t llama_control_vector_apply(struct llama_context * lctx, float * data, si
} }
cvec.layer_start = il_start; cvec.layer_start = il_start;
cvec.layer_end = il_end; cvec.layer_end = il_end;
for (size_t il = 1; il < model.hparams.n_layer; il++) { for (size_t il = 1; il < model.hparams.n_layer; il++) {
if (il >= cvec.tensors.size() || cvec.tensors[il] == nullptr) { assert(cvec.tensors[il] != nullptr);
continue;
} const size_t off = n_embd * (il - 1); // buffer doesn't have data for layer 0, since it's never present
size_t off = n_embd * (il - 1); // buffer doesn't have data for layer 0, since it's never present
if (off + n_embd <= len) { if (off + n_embd <= len) {
ggml_backend_tensor_set(cvec.tensors[il], ggml_backend_tensor_set(cvec.tensors[il], data + off, 0, n_embd * ggml_element_size(cvec.tensors[il]));
data + off,
0,
n_embd * ggml_element_size(cvec.tensors[il]));
} }
} }

13
llama.h
View File

@ -387,6 +387,7 @@ extern "C" {
LLAMA_API int32_t llama_n_vocab (const struct llama_model * model); LLAMA_API int32_t llama_n_vocab (const struct llama_model * model);
LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model); LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model);
LLAMA_API int32_t llama_n_embd (const struct llama_model * model); LLAMA_API int32_t llama_n_embd (const struct llama_model * model);
LLAMA_API int32_t llama_n_layer (const struct llama_model * model);
// Get the model's RoPE frequency scaling factor // Get the model's RoPE frequency scaling factor
LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model); LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model);
@ -434,10 +435,10 @@ extern "C" {
// Returns 0 on success // Returns 0 on success
LLAMA_API int32_t llama_model_apply_lora_from_file( LLAMA_API int32_t llama_model_apply_lora_from_file(
const struct llama_model * model, const struct llama_model * model,
const char * path_lora, const char * path_lora,
float scale, float scale,
const char * path_base_model, const char * path_base_model,
int32_t n_threads); int32_t n_threads);
// Apply a loaded control vector to a llama_context, or if data is NULL, clear // Apply a loaded control vector to a llama_context, or if data is NULL, clear
// the currently loaded vector. // the currently loaded vector.
@ -447,9 +448,9 @@ extern "C" {
// See llama_control_vector_load in common to load a control vector. // See llama_control_vector_load in common to load a control vector.
LLAMA_API int32_t llama_control_vector_apply( LLAMA_API int32_t llama_control_vector_apply(
struct llama_context * lctx, struct llama_context * lctx,
float * data, const float * data,
size_t len, size_t len,
int n_embd, int32_t n_embd,
int32_t il_start, int32_t il_start,
int32_t il_end); int32_t il_end);