mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-04 01:57:53 +01:00
parallel : add disabled experimental batch chunking in powers of two
This commit is contained in:
parent
ded9b43cad
commit
b2debf65f2
@ -253,6 +253,13 @@ int main(int argc, char ** argv) {
|
|||||||
int32_t n_batch = params.n_batch;
|
int32_t n_batch = params.n_batch;
|
||||||
|
|
||||||
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch) {
|
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch) {
|
||||||
|
// experiment: process in powers of 2
|
||||||
|
//if (i + n_batch > (int32_t) batch.n_tokens && n_batch > 32) {
|
||||||
|
// n_batch /= 2;
|
||||||
|
// i -= n_batch;
|
||||||
|
// continue;
|
||||||
|
//}
|
||||||
|
|
||||||
const int32_t n_tokens = std::min(n_batch, (int32_t) (batch.n_tokens - i));
|
const int32_t n_tokens = std::min(n_batch, (int32_t) (batch.n_tokens - i));
|
||||||
|
|
||||||
llama_batch batch_view = {
|
llama_batch batch_view = {
|
||||||
|
Loading…
Reference in New Issue
Block a user