mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-25 17:29:22 +01:00
Proper way to free the cuda cache
This commit is contained in:
parent
03d25c1c61
commit
fa58fd5559
@ -1,3 +1,4 @@
|
|||||||
|
import gc
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
|
||||||
@ -73,7 +74,9 @@ def formatted_outputs(reply, model_name):
|
|||||||
return reply
|
return reply
|
||||||
|
|
||||||
def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, eos_token=None, stopping_string=None):
|
def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, eos_token=None, stopping_string=None):
|
||||||
torch.cuda.empty_cache()
|
gc.collect()
|
||||||
|
if not shared.args.cpu:
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
|
||||||
original_question = question
|
original_question = question
|
||||||
if not (shared.args.chat or shared.args.cai_chat):
|
if not (shared.args.chat or shared.args.cai_chat):
|
||||||
|
Loading…
Reference in New Issue
Block a user