From b55486fa00fe65015cf857f4a5b2f45c91a2ef37 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 15 Jan 2023 23:01:51 -0300 Subject: [PATCH] Reorganize things --- server.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/server.py b/server.py index b1f3d3a2..e5c0e19d 100644 --- a/server.py +++ b/server.py @@ -74,28 +74,21 @@ def load_model(model_name): # Custom else: settings = ["low_cpu_mem_usage=True"] - cuda = "" command = "AutoModelForCausalLM.from_pretrained" if args.cpu: settings.append("torch_dtype=torch.float32") else: + settings.append("device_map='auto'") if args.max_gpu_memory is not None: settings.append(f"max_memory={{0: '{args.max_gpu_memory}GiB', 'cpu': '99GiB'}}") - settings.append("device_map='auto'") - settings.append("torch_dtype=torch.float16") - elif args.load_in_8bit: - settings.append("device_map='auto'") + if args.load_in_8bit: settings.append("load_in_8bit=True") else: settings.append("torch_dtype=torch.float16") - if args.auto_devices: - settings.append("device_map='auto'") - else: - cuda = ".cuda()" settings = ', '.join(list(set(settings))) - command = f"{command}(Path(f'models/{model_name}'), {settings}){cuda}" + command = f"{command}(Path(f'models/{model_name}'), {settings})" model = eval(command) # Loading the tokenizer