Proper way to free the cuda cache

This commit is contained in:
oobabooga 2023-02-25 15:50:29 -03:00
parent 03d25c1c61
commit fa58fd5559

View File

@ -1,3 +1,4 @@
import gc
import re import re
import time import time
@ -73,7 +74,9 @@ def formatted_outputs(reply, model_name):
return reply return reply
def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, eos_token=None, stopping_string=None): def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, eos_token=None, stopping_string=None):
torch.cuda.empty_cache() gc.collect()
if not shared.args.cpu:
torch.cuda.empty_cache()
original_question = question original_question = question
if not (shared.args.chat or shared.args.cai_chat): if not (shared.args.chat or shared.args.cai_chat):