From 9fdfd5ecb0d6472d7cefaeb87d09395c650f0587 Mon Sep 17 00:00:00 2001
From: Yiximail <yiximail@gmail.com>
Date: Fri, 23 Aug 2024 09:46:17 +0800
Subject: [PATCH] Add user_input in impersonate reply for consistency

---
 extensions/openai/completions.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/extensions/openai/completions.py b/extensions/openai/completions.py
index 76d1cc7e..fb0e8aa6 100644
--- a/extensions/openai/completions.py
+++ b/extensions/openai/completions.py
@@ -245,7 +245,7 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
     continue_ = body['continue_']
     impersonate = body['impersonate']
     if impersonate:
-        continue_ = False
+        continue_ = False # While impersonate, continue_ should be False. References impersonate_wrapper in chat.py
 
     # Instruction template
     if body['instruction_template_str']:
@@ -342,7 +342,12 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
     seen_content = ''
 
     for a in generator:
-        answer = a if impersonate else a['internal'][-1][1]
+        if impersonate:
+            # The generate_chat_reply returns the entire message, but generate_reply will only start from new content.
+            # So we need to add the user_input to keep output consistent.
+            answer = user_input + a
+        else:
+            answer = a['internal'][-1][1]
         if stream:
             len_seen = len(seen_content)
             new_content = answer[len_seen:]