From c90310e40e544905cad98d7c5457e55d1343aded Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 19 Jan 2023 00:41:57 -0300 Subject: [PATCH] Small simplification --- server.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/server.py b/server.py index a18120ee..3adf3478 100644 --- a/server.py +++ b/server.py @@ -140,10 +140,10 @@ def generate_reply(question, tokens, inference_settings, selected_model, eos_tok preset = infile.read() loaded_preset = inference_settings + cuda = "" if args.cpu else ".cuda()" if not args.no_stream: input_ids = encode(question, 1) preset = preset.replace('max_new_tokens=tokens', 'max_new_tokens=1') - cuda = "" if args.cpu else ".cuda()" for i in range(tokens): output = eval(f"model.generate(input_ids, {preset}){cuda}") reply = tokenizer.decode(output[0], skip_special_tokens=True) @@ -162,7 +162,6 @@ def generate_reply(question, tokens, inference_settings, selected_model, eos_tok input_ids = output else: input_ids = encode(question, tokens) - cuda = "" if args.cpu else ".cuda()" if eos_token is None: output = eval(f"model.generate(input_ids, {preset}){cuda}") else: