diff --git a/examples/parallel/parallel.cpp b/examples/parallel/parallel.cpp index 9c7cfd0dc..abf3991a1 100644 --- a/examples/parallel/parallel.cpp +++ b/examples/parallel/parallel.cpp @@ -127,7 +127,9 @@ int main(int argc, char ** argv) { llama_seq_id g_seq_id = 0; - llama_batch batch = llama_batch_init(params.n_batch, 0); + // the max batch size is as large as the context to handle cases where we get very long input prompt from multiple + // users. regardless of the size, the main loop will chunk the batch into a maximum of params.n_batch tokens at a time + llama_batch batch = llama_batch_init(params.n_ctx, 0); int32_t n_total_prompt = 0; int32_t n_total_gen = 0;