From ebcadc004227e5fce2fe368f39863302f980ae1f Mon Sep 17 00:00:00 2001 From: matatonic <73265741+matatonic@users.noreply.github.com> Date: Tue, 30 May 2023 20:54:24 -0400 Subject: [PATCH] extensions/openai: cross_origin + chunked_response (updated fix) (#2423) --- extensions/openai/README.md | 1 + extensions/openai/script.py | 45 +++++++++++++++++++++++++++++++++---- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/extensions/openai/README.md b/extensions/openai/README.md index b20eba33..0a9ed20a 100644 --- a/extensions/openai/README.md +++ b/extensions/openai/README.md @@ -126,6 +126,7 @@ Everything needs OPENAI_API_KEY=dummy set. | ✅❌ | openai-python | https://github.com/openai/openai-python | only the endpoints from above are working. OPENAI_API_BASE=http://127.0.0.1:5001/v1 | | ✅❌ | openai-node | https://github.com/openai/openai-node | only the endpoints from above are working. environment variables don't work by default, but can be configured (see above) | | ✅❌ | chatgpt-api | https://github.com/transitive-bullshit/chatgpt-api | only the endpoints from above are working. environment variables don't work by default, but can be configured (see above) | +| ✅ | anse | https://github.com/anse-app/anse | API Key & URL configurable in UI | | ✅ | shell_gpt | https://github.com/TheR1D/shell_gpt | OPENAI_API_HOST=http://127.0.0.1:5001 | | ✅ | gpt-shell | https://github.com/jla/gpt-shell | OPENAI_API_BASE=http://127.0.0.1:5001/v1 | | ✅ | gpt-discord-bot | https://github.com/openai/gpt-discord-bot | OPENAI_API_BASE=http://127.0.0.1:5001/v1 | diff --git a/extensions/openai/script.py b/extensions/openai/script.py index cdd37376..1914dcf4 100644 --- a/extensions/openai/script.py +++ b/extensions/openai/script.py @@ -126,10 +126,32 @@ def float_list_to_base64(float_list): class Handler(BaseHTTPRequestHandler): + def send_access_control_headers(self): + self.send_header("Access-Control-Allow-Origin", "*") + self.send_header("Access-Control-Allow-Credentials", "true") + self.send_header( + "Access-Control-Allow-Methods", + "GET,HEAD,OPTIONS,POST,PUT" + ) + self.send_header( + "Access-Control-Allow-Headers", + "Origin, Accept, X-Requested-With, Content-Type, " + "Access-Control-Request-Method, Access-Control-Request-Headers, " + "Authorization" + ) + + def do_OPTIONS(self): + self.send_response(200) + self.send_access_control_headers() + self.send_header('Content-Type', 'application/json') + self.end_headers() + self.wfile.write("OK".encode('utf-8')) + def do_GET(self): if self.path.startswith('/v1/models'): self.send_response(200) + self.send_access_control_headers() self.send_header('Content-Type', 'application/json') self.end_headers() @@ -181,6 +203,7 @@ class Handler(BaseHTTPRequestHandler): elif '/billing/usage' in self.path: # Ex. /v1/dashboard/billing/usage?start_date=2023-05-01&end_date=2023-05-31 self.send_response(200) + self.send_access_control_headers() self.send_header('Content-Type', 'application/json') self.end_headers() @@ -243,6 +266,7 @@ class Handler(BaseHTTPRequestHandler): req_params['add_bos_token'] = shared.settings.get('add_bos_token', default_req_params['add_bos_token']) self.send_response(200) + self.send_access_control_headers() if req_params['stream']: self.send_header('Content-Type', 'text/event-stream') self.send_header('Cache-Control', 'no-cache') @@ -396,7 +420,9 @@ class Handler(BaseHTTPRequestHandler): chunk[resp_list][0]["message"] = {'role': 'assistant', 'content': ''} chunk[resp_list][0]["delta"] = {'role': 'assistant', 'content': ''} - response = 'data: ' + json.dumps(chunk) + '\n' + data_chunk = 'data: ' + json.dumps(chunk) + '\r\n\r\n' + chunk_size = hex(len(data_chunk))[2:] + '\r\n' + response = chunk_size + data_chunk self.wfile.write(response.encode('utf-8')) # generate reply ####################################### @@ -469,7 +495,9 @@ class Handler(BaseHTTPRequestHandler): # So yeah... do both methods? delta and messages. chunk[resp_list][0]['message'] = {'content': new_content} chunk[resp_list][0]['delta'] = {'content': new_content} - response = 'data: ' + json.dumps(chunk) + '\n' + data_chunk = 'data: ' + json.dumps(chunk) + '\r\n\r\n' + chunk_size = hex(len(data_chunk))[2:] + '\r\n' + response = chunk_size + data_chunk self.wfile.write(response.encode('utf-8')) completion_token_count += len(encode(new_content)[0]) @@ -494,8 +522,12 @@ class Handler(BaseHTTPRequestHandler): else: # So yeah... do both methods? delta and messages. chunk[resp_list][0]['message'] = {'content': ''} - chunk[resp_list][0]['delta'] = {} - response = 'data: ' + json.dumps(chunk) + '\ndata: [DONE]\n' + chunk[resp_list][0]['delta'] = {'content': ''} + + data_chunk = 'data: ' + json.dumps(chunk) + '\r\n\r\n' + chunk_size = hex(len(data_chunk))[2:] + '\r\n' + done = 'data: [DONE]\r\n\r\n' + response = chunk_size + data_chunk + done self.wfile.write(response.encode('utf-8')) # Finished if streaming. if debug: @@ -541,6 +573,7 @@ class Handler(BaseHTTPRequestHandler): self.wfile.write(response.encode('utf-8')) elif '/edits' in self.path: self.send_response(200) + self.send_access_control_headers() self.send_header('Content-Type', 'application/json') self.end_headers() @@ -613,6 +646,7 @@ class Handler(BaseHTTPRequestHandler): # url return types will require file management and a web serving files... Perhaps later! self.send_response(200) + self.send_access_control_headers() self.send_header('Content-Type', 'application/json') self.end_headers() @@ -647,6 +681,7 @@ class Handler(BaseHTTPRequestHandler): self.wfile.write(response.encode('utf-8')) elif '/embeddings' in self.path and embedding_model is not None: self.send_response(200) + self.send_access_control_headers() self.send_header('Content-Type', 'application/json') self.end_headers() @@ -680,6 +715,7 @@ class Handler(BaseHTTPRequestHandler): elif '/moderations' in self.path: # for now do nothing, just don't error. self.send_response(200) + self.send_access_control_headers() self.send_header('Content-Type', 'application/json') self.end_headers() @@ -713,6 +749,7 @@ class Handler(BaseHTTPRequestHandler): elif self.path == '/api/v1/token-count': # NOT STANDARD. lifted from the api extension, but it's still very useful to calculate tokenized length client side. self.send_response(200) + self.send_access_control_headers() self.send_header('Content-Type', 'application/json') self.end_headers()