From 119be5639052fe49cafb78986dbde09340fcc5af Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 1 Feb 2023 10:01:44 -0300 Subject: [PATCH 1/2] Add back low_cpu_mem_usage=True Removing it didn't help with anything, so I am adding it bad on a purely superstiticious basis. --- server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server.py b/server.py index 2b49c572..ea05deca 100644 --- a/server.py +++ b/server.py @@ -88,7 +88,7 @@ def load_model(model_name): # Custom else: command = "AutoModelForCausalLM.from_pretrained" - settings = [] + settings = ["low_cpu_mem_usage=True"] if args.cpu: settings.append("low_cpu_mem_usage=True") From 6b13816c47626fe7fe70ca5c49c8cbdeca602a8f Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 1 Feb 2023 10:43:28 -0300 Subject: [PATCH 2/2] Change default --disk behavior --- server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server.py b/server.py index ea05deca..06e65292 100644 --- a/server.py +++ b/server.py @@ -99,7 +99,7 @@ def load_model(model_name): if args.gpu_memory: settings.append(f"max_memory={{0: '{args.gpu_memory or '99'}GiB', 'cpu': '{args.cpu_memory or '99'}GiB'}}") - elif not args.load_in_8bit: + elif (args.gpu_memory or args.cpu_memory) and not args.load_in_8bit: total_mem = (torch.cuda.get_device_properties(0).total_memory/(1024*1024)) suggestion = round((total_mem-1000)/1000)*1000 if total_mem-suggestion < 800: