mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-03 17:51:09 +01:00
16bc66d947
* llama.cpp : split llama_context_params into model and context params ggml-ci * fix metal build * fix freq_base/scale default to model value * llama-bench : keep the same model between tests when possible * move n_threads to llama_context_params, add n_threads_batch * fix mpi build * remove kv_size(), cuda scratch fixes * remove low-vram option * add n_threads_batch to system info, refactor to get_system_info() * add documentation about --threads-batch to the READMEs * llama-bench fix * main : fix rope freq/scale warning * llama.cpp : add llama_get_model common : add llama_tokenize from model * remove duplicated ctx/model functions ggml-ci * cuda : print total VRAM used
36 lines
969 B
C++
36 lines
969 B
C++
#include "embd-input.h"
|
|
#include <stdlib.h>
|
|
#include <random>
|
|
#include <string.h>
|
|
|
|
int main(int argc, char** argv) {
|
|
|
|
auto mymodel = create_mymodel(argc, argv);
|
|
int N = 10;
|
|
int max_tgt_len = 500;
|
|
int n_embd = llama_n_embd(llama_get_model(mymodel->ctx));
|
|
|
|
// add random float embd to test evaluation
|
|
float * data = new float[N*n_embd];
|
|
std::default_random_engine e;
|
|
std::uniform_real_distribution<float> u(0,1);
|
|
for (int i=0;i<N*n_embd;i++) {
|
|
data[i] = u(e);
|
|
}
|
|
|
|
eval_string(mymodel, "user: what is the color of the flag of UN?");
|
|
eval_float(mymodel, data, N);
|
|
eval_string(mymodel, "assistant:");
|
|
eval_string(mymodel, mymodel->params.prompt.c_str());
|
|
const char* tmp;
|
|
for (int i=0; i<max_tgt_len; i++) {
|
|
tmp = sampling(mymodel);
|
|
if (strcmp(tmp, "</s>")==0) break;
|
|
printf("%s", tmp);
|
|
fflush(stdout);
|
|
}
|
|
printf("\n");
|
|
free_mymodel(mymodel);
|
|
return 0;
|
|
}
|