extensions/openai: cross_origin + chunked_response (updated fix) (#2423)

2024-11-22 16:17:57 +01:00 · 2023-05-30 20:54:24 -04:00 · 2023-05-30 20:54:24 -04:00 · ebcadc0042
commit ebcadc0042
parent df50f077db
2 changed files with 42 additions and 4 deletions
--- a/extensions/openai/README.md
+++ b/extensions/openai/README.md
@ -126,6 +126,7 @@ Everything needs OPENAI_API_KEY=dummy set.
 | ✅❌ | openai-python | https://github.com/openai/openai-python | only the endpoints from above are working. OPENAI_API_BASE=http://127.0.0.1:5001/v1 |
 | ✅❌ | openai-node | https://github.com/openai/openai-node | only the endpoints from above are working. environment variables don't work by default, but can be configured (see above) |
 | ✅❌ | chatgpt-api | https://github.com/transitive-bullshit/chatgpt-api | only the endpoints from above are working. environment variables don't work by default, but can be configured (see above) |
 | ✅ | anse | https://github.com/anse-app/anse | API Key & URL configurable in UI |
 | ✅ | shell_gpt | https://github.com/TheR1D/shell_gpt | OPENAI_API_HOST=http://127.0.0.1:5001 |
 | ✅ | gpt-shell | https://github.com/jla/gpt-shell | OPENAI_API_BASE=http://127.0.0.1:5001/v1 |
 | ✅ | gpt-discord-bot | https://github.com/openai/gpt-discord-bot | OPENAI_API_BASE=http://127.0.0.1:5001/v1 |
--- a/extensions/openai/script.py
+++ b/extensions/openai/script.py
@ -126,10 +126,32 @@ def float_list_to_base64(float_list):
 class Handler(BaseHTTPRequestHandler):
    def send_access_control_headers(self):
        self.send_header("Access-Control-Allow-Origin", "*")
        self.send_header("Access-Control-Allow-Credentials", "true")
        self.send_header(
            "Access-Control-Allow-Methods",
            "GET,HEAD,OPTIONS,POST,PUT"
        )
        self.send_header(
            "Access-Control-Allow-Headers",
            "Origin, Accept, X-Requested-With, Content-Type, "
            "Access-Control-Request-Method, Access-Control-Request-Headers, "
            "Authorization"
        )    
    def do_OPTIONS(self):
        self.send_response(200)
        self.send_access_control_headers()
        self.send_header('Content-Type', 'application/json')
        self.end_headers()
        self.wfile.write("OK".encode('utf-8'))
    def do_GET(self):
        if self.path.startswith('/v1/models'):
            self.send_response(200)
            self.send_access_control_headers()
            self.send_header('Content-Type', 'application/json')
            self.end_headers()
@ -181,6 +203,7 @@ class Handler(BaseHTTPRequestHandler):
        elif '/billing/usage' in self.path:
            # Ex. /v1/dashboard/billing/usage?start_date=2023-05-01&end_date=2023-05-31
            self.send_response(200)
            self.send_access_control_headers()
            self.send_header('Content-Type', 'application/json')
            self.end_headers()
@ -243,6 +266,7 @@ class Handler(BaseHTTPRequestHandler):
            req_params['add_bos_token'] = shared.settings.get('add_bos_token', default_req_params['add_bos_token'])
            self.send_response(200)
            self.send_access_control_headers()
            if req_params['stream']:
                self.send_header('Content-Type', 'text/event-stream')
                self.send_header('Cache-Control', 'no-cache')
@ -396,7 +420,9 @@ class Handler(BaseHTTPRequestHandler):
                    chunk[resp_list][0]["message"] = {'role': 'assistant', 'content': ''}
                    chunk[resp_list][0]["delta"] = {'role': 'assistant', 'content': ''}
-                response = 'data: ' + json.dumps(chunk) + '\n'
+                data_chunk = 'data: ' + json.dumps(chunk) + '\r\n\r\n'
                chunk_size = hex(len(data_chunk))[2:] + '\r\n'
                response = chunk_size + data_chunk
                self.wfile.write(response.encode('utf-8'))
            # generate reply #######################################
@ -469,7 +495,9 @@ class Handler(BaseHTTPRequestHandler):
                        # So yeah... do both methods? delta and messages.
                        chunk[resp_list][0]['message'] = {'content': new_content}
                        chunk[resp_list][0]['delta'] = {'content': new_content}
-                    response = 'data: ' + json.dumps(chunk) + '\n'
+                    data_chunk = 'data: ' + json.dumps(chunk) + '\r\n\r\n'
                    chunk_size = hex(len(data_chunk))[2:] + '\r\n'
                    response = chunk_size + data_chunk
                    self.wfile.write(response.encode('utf-8'))
                    completion_token_count += len(encode(new_content)[0])
@ -494,8 +522,12 @@ class Handler(BaseHTTPRequestHandler):
                else:
                    # So yeah... do both methods? delta and messages.
                    chunk[resp_list][0]['message'] = {'content': ''}
-                    chunk[resp_list][0]['delta'] = {}
+                    chunk[resp_list][0]['delta'] = {'content': ''}
-                response = 'data: ' + json.dumps(chunk) + '\ndata: [DONE]\n'
+
                data_chunk = 'data: ' + json.dumps(chunk) + '\r\n\r\n'
                chunk_size = hex(len(data_chunk))[2:] + '\r\n'
                done = 'data: [DONE]\r\n\r\n'
                response = chunk_size + data_chunk + done
                self.wfile.write(response.encode('utf-8'))
                # Finished if streaming.
                if debug:
@ -541,6 +573,7 @@ class Handler(BaseHTTPRequestHandler):
            self.wfile.write(response.encode('utf-8'))
        elif '/edits' in self.path:
            self.send_response(200)
            self.send_access_control_headers()
            self.send_header('Content-Type', 'application/json')
            self.end_headers()
@ -613,6 +646,7 @@ class Handler(BaseHTTPRequestHandler):
            # url return types will require file management and a web serving files... Perhaps later!
            self.send_response(200)
            self.send_access_control_headers()
            self.send_header('Content-Type', 'application/json')
            self.end_headers()
@ -647,6 +681,7 @@ class Handler(BaseHTTPRequestHandler):
            self.wfile.write(response.encode('utf-8'))
        elif '/embeddings' in self.path and embedding_model is not None:
            self.send_response(200)
            self.send_access_control_headers()
            self.send_header('Content-Type', 'application/json')
            self.end_headers()
@ -680,6 +715,7 @@ class Handler(BaseHTTPRequestHandler):
        elif '/moderations' in self.path:
            # for now do nothing, just don't error.
            self.send_response(200)
            self.send_access_control_headers()
            self.send_header('Content-Type', 'application/json')
            self.end_headers()
@ -713,6 +749,7 @@ class Handler(BaseHTTPRequestHandler):
        elif self.path == '/api/v1/token-count':
            # NOT STANDARD. lifted from the api extension, but it's still very useful to calculate tokenized length client side.
            self.send_response(200)
            self.send_access_control_headers()
            self.send_header('Content-Type', 'application/json')
            self.end_headers()