mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-26 17:50:22 +01:00
parent
4e4cd67223
commit
a28f0d8bd7
@ -242,7 +242,7 @@ def generate_reply(question, tokens, inference_settings, selected_model, eos_tok
|
|||||||
output = eval(f"model.generate(input_ids, {','.join(generate_params)}, {preset}){cuda}")
|
output = eval(f"model.generate(input_ids, {','.join(generate_params)}, {preset}){cuda}")
|
||||||
reply = decode(output[0])
|
reply = decode(output[0])
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
print(f"Output generated in {(t1-t0):.2f} seconds ({(len(output[0])-len(input_ids[0]))/(t1-t0):.2f} it/s)")
|
print(f"Output generated in {(t1-t0):.2f} seconds ({(len(output[0])-len(input_ids[0]))/(t1-t0)/8:.2f} it/s, {len(output[0])-len(input_ids[0])} tokens)")
|
||||||
if not (args.chat or args.cai_chat):
|
if not (args.chat or args.cai_chat):
|
||||||
reply = original_question + apply_extensions(reply[len(question):], "output")
|
reply = original_question + apply_extensions(reply[len(question):], "output")
|
||||||
yield formatted_outputs(reply, model_name)
|
yield formatted_outputs(reply, model_name)
|
||||||
|
Loading…
Reference in New Issue
Block a user