parallel : add disabled experimental batch chunking in powers of two

This commit is contained in:
Georgi Gerganov 2023-09-20 20:14:05 +03:00
parent ded9b43cad
commit b2debf65f2
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735

View File

@ -253,6 +253,13 @@ int main(int argc, char ** argv) {
int32_t n_batch = params.n_batch;
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch) {
// experiment: process in powers of 2
//if (i + n_batch > (int32_t) batch.n_tokens && n_batch > 32) {
// n_batch /= 2;
// i -= n_batch;
// continue;
//}
const int32_t n_tokens = std::min(n_batch, (int32_t) (batch.n_tokens - i));
llama_batch batch_view = {