text-generation-webui/extensions/api/blocking_api.py

import json
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from threading import Thread

from extensions.api.util import build_parameters, build_parameters_train, try_start_cloudflared
from modules import shared
from modules.chat import generate_chat_reply
from modules.text_generation import encode, generate_reply
from modules.training import do_train

class Handler(BaseHTTPRequestHandler):
    def do_GET(self):
        if self.path == '/api/v1/model':
            self.send_response(200)
            self.end_headers()
            response = json.dumps({
                'result': shared.model_name
            })

            self.wfile.write(response.encode('utf-8'))
        else:
            self.send_error(404)

    def do_POST(self):
        content_length = int(self.headers['Content-Length'])
        body = json.loads(self.rfile.read(content_length).decode('utf-8'))

        if self.path == '/api/v1/generate':
            self.send_response(200)
            self.send_header('Content-Type', 'application/json')
            self.end_headers()

            prompt = body['prompt']
            generate_params = build_parameters(body)
            stopping_strings = generate_params.pop('stopping_strings')
            generate_params['stream'] = False

            generator = generate_reply(
                prompt, generate_params, stopping_strings=stopping_strings, is_chat=False)

            answer = ''
            for a in generator:
                answer = a

            response = json.dumps({
                'results': [{
                    'text': answer
                }]
            })

            self.wfile.write(response.encode('utf-8'))

        elif self.path == '/api/v1/chat':
            self.send_response(200)
            self.send_header('Content-Type', 'application/json')
            self.end_headers()

            user_input = body['user_input']
            history = body['history']
            regenerate = body.get('regenerate', False)
            _continue = body.get('_continue', False)

            generate_params = build_parameters(body, chat=True)
            generate_params['stream'] = False

            generator = generate_chat_reply(
                user_input, history, generate_params, regenerate=regenerate, _continue=_continue, loading_message=False)

            answer = history
            for a in generator:
                answer = a

            response = json.dumps({
                'results': [{
                    'history': answer
                }]
            })

            self.wfile.write(response.encode('utf-8'))

        elif self.path == '/api/v1/finetune':
            self.send_response(200)
            self.send_header('Content-Type', 'application/json')
            self.end_headers()

            generate_params = build_parameters_train(body)
            while True:
                try:
                    print(next(do_train(**generate_params)))
                except StopIteration:
                    break

        elif self.path == '/api/v1/token-count':
            self.send_response(200)
            self.send_header('Content-Type', 'application/json')
            self.end_headers()

            tokens = encode(body['prompt'])[0]
            response = json.dumps({
                'results': [{
                    'tokens': len(tokens)
                }]
            })

            self.wfile.write(response.encode('utf-8'))
        else:
            self.send_error(404)


def _run_server(port: int, share: bool = False):
    address = '0.0.0.0' if shared.args.listen else '127.0.0.1'

    server = ThreadingHTTPServer((address, port), Handler)

    def on_start(public_url: str):
        print(f'Starting non-streaming server at public url {public_url}/api')

    if share:
        try:
            try_start_cloudflared(port, max_attempts=3, on_start=on_start)
        except Exception:
            pass
    else:
        print(
            f'Starting API at http://{address}:{port}/api')

    server.serve_forever()


def start_server(port: int, share: bool = False):
    Thread(target=_run_server, args=[port, share], daemon=True).start()
New universal API with streaming/blocking endpoints (#990) Previous title: Add api_streaming extension and update api-example-stream to use it * Merge with latest main * Add parameter capturing encoder_repetition_penalty * Change some defaults, minor fixes * Add --api, --public-api flags * remove unneeded/broken comment from blocking API startup. The comment is already correctly emitted in try_start_cloudflared by calling the lambda we pass in. * Update on_start message for blocking_api, it should say 'non-streaming' and not 'streaming' * Update the API examples * Change a comment * Update README * Remove the gradio API * Remove unused import * Minor change * Remove unused import --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> 2023-04-23 20:52:43 +02:00			`import json`
			`from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer`
			`from threading import Thread`

add api endpoint for finetuning 2023-05-25 23:42:32 +02:00			`from extensions.api.util import build_parameters, build_parameters_train, try_start_cloudflared`
New universal API with streaming/blocking endpoints (#990) Previous title: Add api_streaming extension and update api-example-stream to use it * Merge with latest main * Add parameter capturing encoder_repetition_penalty * Change some defaults, minor fixes * Add --api, --public-api flags * remove unneeded/broken comment from blocking API startup. The comment is already correctly emitted in try_start_cloudflared by calling the lambda we pass in. * Update on_start message for blocking_api, it should say 'non-streaming' and not 'streaming' * Update the API examples * Change a comment * Update README * Remove the gradio API * Remove unused import * Minor change * Remove unused import --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> 2023-04-23 20:52:43 +02:00			`from modules import shared`
Add chat API (#2233) 2023-05-20 23:42:17 +02:00			`from modules.chat import generate_chat_reply`
New universal API with streaming/blocking endpoints (#990) Previous title: Add api_streaming extension and update api-example-stream to use it * Merge with latest main * Add parameter capturing encoder_repetition_penalty * Change some defaults, minor fixes * Add --api, --public-api flags * remove unneeded/broken comment from blocking API startup. The comment is already correctly emitted in try_start_cloudflared by calling the lambda we pass in. * Update on_start message for blocking_api, it should say 'non-streaming' and not 'streaming' * Update the API examples * Change a comment * Update README * Remove the gradio API * Remove unused import * Minor change * Remove unused import --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> 2023-04-23 20:52:43 +02:00			`from modules.text_generation import encode, generate_reply`
add api endpoint for finetuning 2023-05-25 23:42:32 +02:00			`from modules.training import do_train`
New universal API with streaming/blocking endpoints (#990) Previous title: Add api_streaming extension and update api-example-stream to use it * Merge with latest main * Add parameter capturing encoder_repetition_penalty * Change some defaults, minor fixes * Add --api, --public-api flags * remove unneeded/broken comment from blocking API startup. The comment is already correctly emitted in try_start_cloudflared by calling the lambda we pass in. * Update on_start message for blocking_api, it should say 'non-streaming' and not 'streaming' * Update the API examples * Change a comment * Update README * Remove the gradio API * Remove unused import * Minor change * Remove unused import --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> 2023-04-23 20:52:43 +02:00
			`class Handler(BaseHTTPRequestHandler):`
			`def do_GET(self):`
			`if self.path == '/api/v1/model':`
			`self.send_response(200)`
			`self.end_headers()`
			`response = json.dumps({`
			`'result': shared.model_name`
			`})`

			`self.wfile.write(response.encode('utf-8'))`
			`else:`
			`self.send_error(404)`

			`def do_POST(self):`
			`content_length = int(self.headers['Content-Length'])`
			`body = json.loads(self.rfile.read(content_length).decode('utf-8'))`

			`if self.path == '/api/v1/generate':`
			`self.send_response(200)`
			`self.send_header('Content-Type', 'application/json')`
			`self.end_headers()`

			`prompt = body['prompt']`
			`generate_params = build_parameters(body)`
			`stopping_strings = generate_params.pop('stopping_strings')`
Refactor text_generation.py, add support for custom generation functions (#1817) 2023-05-05 23:53:03 +02:00			`generate_params['stream'] = False`
New universal API with streaming/blocking endpoints (#990) Previous title: Add api_streaming extension and update api-example-stream to use it * Merge with latest main * Add parameter capturing encoder_repetition_penalty * Change some defaults, minor fixes * Add --api, --public-api flags * remove unneeded/broken comment from blocking API startup. The comment is already correctly emitted in try_start_cloudflared by calling the lambda we pass in. * Update on_start message for blocking_api, it should say 'non-streaming' and not 'streaming' * Update the API examples * Change a comment * Update README * Remove the gradio API * Remove unused import * Minor change * Remove unused import --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> 2023-04-23 20:52:43 +02:00
			`generator = generate_reply(`
Refactor chat functions (#2003) 2023-05-11 20:37:04 +02:00			`prompt, generate_params, stopping_strings=stopping_strings, is_chat=False)`
New universal API with streaming/blocking endpoints (#990) Previous title: Add api_streaming extension and update api-example-stream to use it * Merge with latest main * Add parameter capturing encoder_repetition_penalty * Change some defaults, minor fixes * Add --api, --public-api flags * remove unneeded/broken comment from blocking API startup. The comment is already correctly emitted in try_start_cloudflared by calling the lambda we pass in. * Update on_start message for blocking_api, it should say 'non-streaming' and not 'streaming' * Update the API examples * Change a comment * Update README * Remove the gradio API * Remove unused import * Minor change * Remove unused import --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> 2023-04-23 20:52:43 +02:00
			`answer = ''`
			`for a in generator:`
Refactor chat functions (#2003) 2023-05-11 20:37:04 +02:00			`answer = a`
New universal API with streaming/blocking endpoints (#990) Previous title: Add api_streaming extension and update api-example-stream to use it * Merge with latest main * Add parameter capturing encoder_repetition_penalty * Change some defaults, minor fixes * Add --api, --public-api flags * remove unneeded/broken comment from blocking API startup. The comment is already correctly emitted in try_start_cloudflared by calling the lambda we pass in. * Update on_start message for blocking_api, it should say 'non-streaming' and not 'streaming' * Update the API examples * Change a comment * Update README * Remove the gradio API * Remove unused import * Minor change * Remove unused import --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> 2023-04-23 20:52:43 +02:00
			`response = json.dumps({`
			`'results': [{`
Always return only the new tokens in generation functions 2023-05-11 22:07:20 +02:00			`'text': answer`
New universal API with streaming/blocking endpoints (#990) Previous title: Add api_streaming extension and update api-example-stream to use it * Merge with latest main * Add parameter capturing encoder_repetition_penalty * Change some defaults, minor fixes * Add --api, --public-api flags * remove unneeded/broken comment from blocking API startup. The comment is already correctly emitted in try_start_cloudflared by calling the lambda we pass in. * Update on_start message for blocking_api, it should say 'non-streaming' and not 'streaming' * Update the API examples * Change a comment * Update README * Remove the gradio API * Remove unused import * Minor change * Remove unused import --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> 2023-04-23 20:52:43 +02:00			`}]`
			`})`
Add chat API (#2233) 2023-05-20 23:42:17 +02:00
New universal API with streaming/blocking endpoints (#990) Previous title: Add api_streaming extension and update api-example-stream to use it * Merge with latest main * Add parameter capturing encoder_repetition_penalty * Change some defaults, minor fixes * Add --api, --public-api flags * remove unneeded/broken comment from blocking API startup. The comment is already correctly emitted in try_start_cloudflared by calling the lambda we pass in. * Update on_start message for blocking_api, it should say 'non-streaming' and not 'streaming' * Update the API examples * Change a comment * Update README * Remove the gradio API * Remove unused import * Minor change * Remove unused import --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> 2023-04-23 20:52:43 +02:00			`self.wfile.write(response.encode('utf-8'))`
Add chat API (#2233) 2023-05-20 23:42:17 +02:00
			`elif self.path == '/api/v1/chat':`
			`self.send_response(200)`
			`self.send_header('Content-Type', 'application/json')`
			`self.end_headers()`

			`user_input = body['user_input']`
			`history = body['history']`
			`regenerate = body.get('regenerate', False)`
			`_continue = body.get('_continue', False)`

			`generate_params = build_parameters(body, chat=True)`
			`generate_params['stream'] = False`

			`generator = generate_chat_reply(`
			`user_input, history, generate_params, regenerate=regenerate, _continue=_continue, loading_message=False)`

			`answer = history`
			`for a in generator:`
			`answer = a`

			`response = json.dumps({`
			`'results': [{`
			`'history': answer`
			`}]`
			`})`

			`self.wfile.write(response.encode('utf-8'))`

add api endpoint for finetuning 2023-05-25 23:42:32 +02:00			`elif self.path == '/api/v1/finetune':`
			`self.send_response(200)`
			`self.send_header('Content-Type', 'application/json')`
			`self.end_headers()`

			`generate_params = build_parameters_train(body)`
			`while True:`
			`try:`
			`print(next(do_train(**generate_params)))`
			`except StopIteration:`
			`break`

New universal API with streaming/blocking endpoints (#990) Previous title: Add api_streaming extension and update api-example-stream to use it * Merge with latest main * Add parameter capturing encoder_repetition_penalty * Change some defaults, minor fixes * Add --api, --public-api flags * remove unneeded/broken comment from blocking API startup. The comment is already correctly emitted in try_start_cloudflared by calling the lambda we pass in. * Update on_start message for blocking_api, it should say 'non-streaming' and not 'streaming' * Update the API examples * Change a comment * Update README * Remove the gradio API * Remove unused import * Minor change * Remove unused import --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> 2023-04-23 20:52:43 +02:00			`elif self.path == '/api/v1/token-count':`
			`self.send_response(200)`
			`self.send_header('Content-Type', 'application/json')`
			`self.end_headers()`

			`tokens = encode(body['prompt'])[0]`
			`response = json.dumps({`
			`'results': [{`
			`'tokens': len(tokens)`
			`}]`
			`})`
Add chat API (#2233) 2023-05-20 23:42:17 +02:00
New universal API with streaming/blocking endpoints (#990) Previous title: Add api_streaming extension and update api-example-stream to use it * Merge with latest main * Add parameter capturing encoder_repetition_penalty * Change some defaults, minor fixes * Add --api, --public-api flags * remove unneeded/broken comment from blocking API startup. The comment is already correctly emitted in try_start_cloudflared by calling the lambda we pass in. * Update on_start message for blocking_api, it should say 'non-streaming' and not 'streaming' * Update the API examples * Change a comment * Update README * Remove the gradio API * Remove unused import * Minor change * Remove unused import --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> 2023-04-23 20:52:43 +02:00			`self.wfile.write(response.encode('utf-8'))`
			`else:`
			`self.send_error(404)`


Refactor text_generation.py, add support for custom generation functions (#1817) 2023-05-05 23:53:03 +02:00			`def _run_server(port: int, share: bool = False):`
New universal API with streaming/blocking endpoints (#990) Previous title: Add api_streaming extension and update api-example-stream to use it * Merge with latest main * Add parameter capturing encoder_repetition_penalty * Change some defaults, minor fixes * Add --api, --public-api flags * remove unneeded/broken comment from blocking API startup. The comment is already correctly emitted in try_start_cloudflared by calling the lambda we pass in. * Update on_start message for blocking_api, it should say 'non-streaming' and not 'streaming' * Update the API examples * Change a comment * Update README * Remove the gradio API * Remove unused import * Minor change * Remove unused import --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> 2023-04-23 20:52:43 +02:00			`address = '0.0.0.0' if shared.args.listen else '127.0.0.1'`

			`server = ThreadingHTTPServer((address, port), Handler)`

			`def on_start(public_url: str):`
			`print(f'Starting non-streaming server at public url {public_url}/api')`

			`if share:`
			`try:`
			`try_start_cloudflared(port, max_attempts=3, on_start=on_start)`
			`except Exception:`
			`pass`
			`else:`
			`print(`
			`f'Starting API at http://{address}:{port}/api')`

			`server.serve_forever()`


			`def start_server(port: int, share: bool = False):`
			`Thread(target=_run_server, args=[port, share], daemon=True).start()`