Final touches

This commit is contained in:
Georgi Gerganov 2023-03-10 21:50:46 +02:00
parent 775328064e
commit 319cdb3e1f
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735
5 changed files with 32 additions and 32 deletions

View File

@ -114,6 +114,5 @@ python3 convert-pth-to-ggml.py models/7B/ 1
In general, it seems to work, but I think it fails for unicode character support. Hopefully, someone can help with that In general, it seems to work, but I think it fails for unicode character support. Hopefully, someone can help with that
- I don't know yet how much the quantization affects the quality of the generated text - I don't know yet how much the quantization affects the quality of the generated text
- Probably the token sampling can be improved - Probably the token sampling can be improved
- No Windows support
- x86 quantization support [not yet ready](https://github.com/ggerganov/ggml/pull/27). Basically, you want to run this on Apple Silicon - x86 quantization support [not yet ready](https://github.com/ggerganov/ggml/pull/27). Basically, you want to run this on Apple Silicon

View File

@ -728,6 +728,7 @@ int main(int argc, char ** argv) {
// end of text token // end of text token
if (embd.back() == 2) { if (embd.back() == 2) {
printf(" [end of text]\n");
break; break;
} }
} }

0
models/.gitignore vendored Normal file
View File

View File

@ -231,39 +231,39 @@ std::vector<gpt_vocab::id> gpt_tokenize(const gpt_vocab & vocab, const std::stri
} }
std::vector<gpt_vocab::id> llama_tokenize(const gpt_vocab & vocab, const std::string & text, bool bos) { std::vector<gpt_vocab::id> llama_tokenize(const gpt_vocab & vocab, const std::string & text, bool bos) {
auto res = gpt_tokenize(vocab, text); //auto res = gpt_tokenize(vocab, text);
if (bos) {
res.insert(res.begin(), 1); // TODO: replace with vocab.bos
}
//std::vector<gpt_vocab::id> res;
//if (bos) { //if (bos) {
// res.push_back(1); // TODO: replace with vocab.bos // res.insert(res.begin(), 1); // TODO: replace with vocab.bos
//} //}
// find the longest token that matches the text std::vector<gpt_vocab::id> res;
//int pos = 0;
//while (true) {
// int l = 0;
// int t = 0;
// for (const auto & kv : vocab.id_to_token) {
// if (kv.second.size() < l) continue;
// if (kv.second.size() > text.size() - pos) continue;
// if (text.substr(pos, kv.second.size()) == kv.second) {
// l = kv.second.size();
// t = kv.first;
// }
// }
// if (l == 0 && t != 13) { if (bos) {
// break; res.push_back(1); // TODO: replace with vocab.bos
// } }
// res.push_back(t); //find the longest token that matches the text
// pos += l; int pos = 0;
//} while (true) {
int l = 0;
int t = 0;
for (const auto & kv : vocab.id_to_token) {
if (kv.second.size() < l) continue;
if (kv.second.size() > text.size() - pos) continue;
if (text.substr(pos, kv.second.size()) == kv.second) {
l = kv.second.size();
t = kv.first;
}
}
if (l == 0 && t != 13) {
break;
}
res.push_back(t);
pos += l;
}
return res; return res;
} }

View File

@ -15,12 +15,12 @@
struct gpt_params { struct gpt_params {
int32_t seed = -1; // RNG seed int32_t seed = -1; // RNG seed
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()); int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
int32_t n_predict = 200; // new tokens to predict int32_t n_predict = 128; // new tokens to predict
// sampling parameters // sampling parameters
int32_t top_k = 100; int32_t top_k = 40;
float top_p = 0.95f; float top_p = 0.95f;
float temp = 0.8f; float temp = 0.80f;
int32_t n_batch = 8; // batch size for prompt processing int32_t n_batch = 8; // batch size for prompt processing