assistant: space fix, system: prompt fix (#2219)

2024-11-22 08:07:56 +01:00 · 2023-05-20 22:32:34 -04:00 · 2023-05-20 22:32:34 -04:00 · 78b2478d9c
commit 78b2478d9c
parent 05593a7834
1 changed files with 21 additions and 5 deletions
--- a/extensions/openai/script.py
+++ b/extensions/openai/script.py
@ -243,9 +243,9 @@ class Handler(BaseHTTPRequestHandler):

                messages = body['messages']

-                system_msg = ''  # You are ChatGPT, a large language model trained by OpenAI. Answer as concisely as possible. Knowledge cutoff: {knowledge_cutoff} Current date: {current_date}
+                system_msgs = []
                if 'prompt' in body:  # Maybe they sent both? This is not documented in the API, but some clients seem to do this.
-                    system_msg = body['prompt']
+                    system_msgs = [ body['prompt'] ]

                chat_msgs = []

@ -254,10 +254,15 @@ class Handler(BaseHTTPRequestHandler):
                    content = m['content']
                    # name = m.get('name', 'user')
                    if role == 'system':
-                        system_msg += content
+                        system_msgs.extend([content.strip()])
                    else:
                        chat_msgs.extend([f"\n{role}: {content.strip()}"])  # Strip content? linefeed?

+                # You are ChatGPT, a large language model trained by OpenAI. Answer as concisely as possible. Knowledge cutoff: {knowledge_cutoff} Current date: {current_date}
+                system_msg = 'You are assistant, a large language model. Answer as concisely as possible.'
+                if system_msgs:
+                    system_msg = '\n'.join(system_msgs)
+
                system_token_count = len(encode(system_msg)[0])
                remaining_tokens = req_params['truncation_length'] - req_params['max_new_tokens'] - system_token_count
                chat_msg = ''
@ -277,9 +282,9 @@ class Handler(BaseHTTPRequestHandler):
                    print(f"truncating chat messages, dropping {len(chat_msgs)} messages.")

                if system_msg:
-                    prompt = 'system: ' + system_msg + '\n' + chat_msg + '\nassistant: '
+                    prompt = 'system: ' + system_msg + '\n' + chat_msg + '\nassistant:'
                else:
-                    prompt = chat_msg + '\nassistant: '
+                    prompt = chat_msg + '\nassistant:'

                token_count = len(encode(prompt)[0])

@ -396,6 +401,11 @@ class Handler(BaseHTTPRequestHandler):
                            "finish_reason": None,
                        }],
                    }
+
+                    # strip extra leading space off new generated content
+                    if len_seen == 0 and new_content[0] == ' ':
+                        new_content = new_content[1:]
+
                    if stream_object_type == 'text_completion.chunk':
                        chunk[resp_list][0]['text'] = new_content
                    else:
@ -432,9 +442,15 @@ class Handler(BaseHTTPRequestHandler):
                self.wfile.write(response.encode('utf-8'))
                # Finished if streaming.
                if debug:
+                    if answer and answer[0] == ' ':
+                        answer = answer[1:]
                    print({'response': answer})
                return

+            # strip extra leading space off new generated content
+            if answer and answer[0] == ' ':
+                answer = answer[1:]
+
            if debug:
                print({'response': answer})