From ded9b43cad687d229c28c707c60d1dbac5c9c1ea Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Wed, 20 Sep 2023 19:09:25 +0300
Subject: [PATCH] parallel : fix cases where the input prompts can overflow the
 batch

---
 examples/parallel/parallel.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/examples/parallel/parallel.cpp b/examples/parallel/parallel.cpp
index 9c7cfd0dc..abf3991a1 100644
--- a/examples/parallel/parallel.cpp
+++ b/examples/parallel/parallel.cpp
@@ -127,7 +127,9 @@ int main(int argc, char ** argv) {
 
     llama_seq_id g_seq_id = 0;
 
-    llama_batch batch = llama_batch_init(params.n_batch, 0);
+    // the max batch size is as large as the context to handle cases where we get very long input prompt from multiple
+    // users. regardless of the size, the main loop will chunk the batch into a maximum of params.n_batch tokens at a time
+    llama_batch batch = llama_batch_init(params.n_ctx, 0);
 
     int32_t n_total_prompt = 0;
     int32_t n_total_gen    = 0;