diff --git a/server.py b/server.py index e7a38ac2..73aedf31 100644 --- a/server.py +++ b/server.py @@ -44,6 +44,7 @@ def load_model(model_name): print(f"Loaded the model in {(time.time()-t0):.2f} seconds.") return model, tokenizer +# Removes empty replies from gpt4chan outputs def fix_gpt4chan(s): for i in range(10): s = re.sub("--- [0-9]*\n>>[0-9]*\n---", "---", s) @@ -52,7 +53,7 @@ def fix_gpt4chan(s): return s -def fn(question, temperature, max_length, inference_settings, selected_model): +def generate_reply(question, temperature, max_length, inference_settings, selected_model): global model, tokenizer, model_name, loaded_preset, preset if selected_model != model_name: @@ -70,7 +71,6 @@ def fn(question, temperature, max_length, inference_settings, selected_model): input_text = question input_ids = tokenizer.encode(str(input_text), return_tensors='pt').cuda() - output = eval(f"model.generate(input_ids, {preset}).cuda()") reply = tokenizer.decode(output[0], skip_special_tokens=True) @@ -86,7 +86,7 @@ else: default_text = "Common sense questions and answers\n\nQuestion: \nFactual answer:" interface = gr.Interface( - fn, + generate_reply, inputs=[ gr.Textbox(value=default_text, lines=15), gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Temperature', value=0.7), @@ -98,7 +98,7 @@ interface = gr.Interface( gr.Textbox(placeholder="", lines=15), ], title="Text generation lab", - description=f"Generate text using Large Language Models. Currently working with {model_name}", + description=f"Generate text using Large Language Models.", ) interface.launch(share=False, server_name="0.0.0.0")