From e735806c510887710d8da9d78db542a60e41b234 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 1 Mar 2023 12:16:11 -0300 Subject: [PATCH] Add a generate() function for RWKV --- modules/RWKV.py | 13 +++++++++++-- modules/text_generation.py | 10 ++-------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/modules/RWKV.py b/modules/RWKV.py index c4481043..9f348ad7 100644 --- a/modules/RWKV.py +++ b/modules/RWKV.py @@ -31,5 +31,14 @@ class RWKVModel: result.model = pipeline return result - def generate(self, context, **kwargs): - return self.model.generate(context, **kwargs) + def generate(self, context, token_count=20, temperature=1, top_p=1, alpha_frequency=0.25, alpha_presence=0.25, token_ban=[0], token_stop=[], callback=None): + args = PIPELINE_ARGS( + temperature = temperature, + top_p = top_p, + alpha_frequency = 0.25, # Frequency Penalty (as in GPT-3) + alpha_presence = 0.25, # Presence Penalty (as in GPT-3) + token_ban = [0], # ban the generation of some tokens + token_stop = [] + ) + + return self.model.generate(context, token_count=token_count, args=args, callback=callback) diff --git a/modules/text_generation.py b/modules/text_generation.py index ba4b7d79..1bd84cd2 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -85,19 +85,13 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi torch.cuda.empty_cache() if shared.is_RWKV: - args = PIPELINE_ARGS(temperature = temperature, top_p = top_p, - alpha_frequency = 0.25, # Frequency Penalty (as in GPT-3) - alpha_presence = 0.25, # Presence Penalty (as in GPT-3) - token_ban = [0], # ban the generation of some tokens - token_stop = []) # stop generation whenever you see any token here - if shared.args.no_stream: - reply = question + shared.model.generate(question, token_count=max_new_tokens, args=args, callback=None) + reply = question + shared.model.generate(question, token_count=max_new_tokens, temperature=temperature) yield formatted_outputs(reply, None) return formatted_outputs(reply, None) else: for i in range(max_new_tokens//8): - reply = question + shared.model.generate(question, token_count=8, args=args, callback=None) + reply = question + shared.model.generate(question, token_count=8, temperature=temperature) yield formatted_outputs(reply, None) question = reply return formatted_outputs(reply, None)