diff --git a/modules/text_generation.py b/modules/text_generation.py index 436afbeb..a8157a76 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -201,12 +201,8 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi reply = original_question + apply_extensions(reply[len(question):], "output") yield formatted_outputs(reply, shared.model_name) - if not shared.args.flexgen: - if output[-1] == n: - break - else: - if np.count_nonzero(input_ids[0] == n) < np.count_nonzero(output == n): - break + if output[-1] == n: + break # Stream the output naively for FlexGen since it doesn't support 'stopping_criteria' else: @@ -223,14 +219,9 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi reply = original_question + apply_extensions(reply[len(question):], "output") yield formatted_outputs(reply, shared.model_name) - if not shared.args.flexgen: - if output[-1] == n: - break - input_ids = torch.reshape(output, (1, output.shape[0])) - else: - if np.count_nonzero(input_ids[0] == n) < np.count_nonzero(output == n): - break - input_ids = np.reshape(output, (1, output.shape[0])) + if np.count_nonzero(input_ids[0] == n) < np.count_nonzero(output == n): + break + input_ids = np.reshape(output, (1, output.shape[0])) if shared.soft_prompt: inputs_embeds, filler_input_ids = generate_softprompt_input_tensors(input_ids)