mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-23 00:18:20 +01:00
Handle the no-GPU / multi-GPU cases
This commit is contained in:
parent
10e939c9b4
commit
13789fd200
52
server.py
52
server.py
@ -8,7 +8,6 @@ import json
|
|||||||
import math
|
import math
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
|
||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
import zipfile
|
import zipfile
|
||||||
@ -209,16 +208,26 @@ def download_model_wrapper(repo_id):
|
|||||||
|
|
||||||
|
|
||||||
def list_model_parameters():
|
def list_model_parameters():
|
||||||
return ['gpu_memory', 'cpu_memory', 'auto_devices', 'disk', 'cpu', 'bf16', 'load_in_8bit', 'wbits', 'groupsize', 'model_type', 'pre_layer']
|
parameters = ['cpu_memory', 'auto_devices', 'disk', 'cpu', 'bf16', 'load_in_8bit', 'wbits', 'groupsize', 'model_type', 'pre_layer']
|
||||||
|
for i in range(torch.cuda.device_count()):
|
||||||
|
parameters.append(f'gpu_memory_{i}')
|
||||||
|
return parameters
|
||||||
|
|
||||||
|
|
||||||
# Update the command-line arguments based on the interface values
|
# Update the command-line arguments based on the interface values
|
||||||
def update_model_parameters(*args):
|
def update_model_parameters(*args):
|
||||||
args = list(args)
|
|
||||||
elements = list_model_parameters()
|
|
||||||
|
|
||||||
|
args = list(args) # the values of the parameters
|
||||||
|
elements = list_model_parameters() # the names of the parameters
|
||||||
|
|
||||||
|
gpu_memories = []
|
||||||
for i, element in enumerate(elements):
|
for i, element in enumerate(elements):
|
||||||
if element in ['gpu_memory', 'cpu_memory'] and args[i] == 0:
|
|
||||||
|
if element.startswith('gpu_memory'):
|
||||||
|
gpu_memories.append(args[i])
|
||||||
|
continue
|
||||||
|
|
||||||
|
if element == 'cpu_memory' and args[i] == 0:
|
||||||
args[i] = None
|
args[i] = None
|
||||||
if element == 'wbits' and args[i] == 'None':
|
if element == 'wbits' and args[i] == 'None':
|
||||||
args[i] = 0
|
args[i] = 0
|
||||||
@ -228,25 +237,41 @@ def update_model_parameters(*args):
|
|||||||
args[i] = None
|
args[i] = None
|
||||||
if element in ['wbits', 'groupsize', 'pre_layer']:
|
if element in ['wbits', 'groupsize', 'pre_layer']:
|
||||||
args[i] = int(args[i])
|
args[i] = int(args[i])
|
||||||
if element == 'gpu_memory' and args[i] is not None:
|
|
||||||
args[i] = [f"{args[i]}MiB"]
|
|
||||||
elif element == 'cpu_memory' and args[i] is not None:
|
elif element == 'cpu_memory' and args[i] is not None:
|
||||||
args[i] = f"{args[i]}MiB"
|
args[i] = f"{args[i]}MiB"
|
||||||
|
|
||||||
#print(element, repr(eval(f"shared.args.{element}")), repr(args[i]))
|
#print(element, repr(eval(f"shared.args.{element}")), repr(args[i]))
|
||||||
#print(f"shared.args.{element} = args[i]")
|
#print(f"shared.args.{element} = args[i]")
|
||||||
exec(f"shared.args.{element} = args[i]")
|
exec(f"shared.args.{element} = args[i]")
|
||||||
#print()
|
|
||||||
|
found_positive = False
|
||||||
|
for i in gpu_memories:
|
||||||
|
if i > 0:
|
||||||
|
found_positive = True
|
||||||
|
break
|
||||||
|
if found_positive:
|
||||||
|
shared.args.gpu_memory = [f"{i}MiB" for i in gpu_memories]
|
||||||
|
else:
|
||||||
|
shared.args.gpu_memory = None
|
||||||
|
|
||||||
def create_model_menus():
|
def create_model_menus():
|
||||||
|
|
||||||
# Finding the default values for the GPU and CPU memories
|
# Finding the default values for the GPU and CPU memories
|
||||||
total_mem = math.floor(torch.cuda.get_device_properties(0).total_memory / (1024*1024))
|
total_mem = []
|
||||||
total_cpu_mem = math.floor(psutil.virtual_memory().total / (1024*1024))
|
for i in range(torch.cuda.device_count()):
|
||||||
|
total_mem.append(math.floor(torch.cuda.get_device_properties(i).total_memory / (1024*1024)))
|
||||||
|
|
||||||
|
default_gpu_mem = []
|
||||||
if shared.args.gpu_memory is not None and len(shared.args.gpu_memory) > 0:
|
if shared.args.gpu_memory is not None and len(shared.args.gpu_memory) > 0:
|
||||||
default_gpu_mem = re.sub('[a-zA-Z ]', '', shared.args.gpu_memory[0])
|
for i in shared.args.gpu_memory:
|
||||||
|
if 'mib' in i.lower():
|
||||||
|
default_gpu_mem.append(int(re.sub('[a-zA-Z ]', '', i)))
|
||||||
else:
|
else:
|
||||||
default_gpu_mem = 0
|
default_gpu_mem.append(int(re.sub('[a-zA-Z ]', '', i))*1000)
|
||||||
|
while len(default_gpu_mem) < len(total_mem):
|
||||||
|
default_gpu_mem.append(0)
|
||||||
|
|
||||||
|
total_cpu_mem = math.floor(psutil.virtual_memory().total / (1024*1024))
|
||||||
if shared.args.cpu_memory is not None:
|
if shared.args.cpu_memory is not None:
|
||||||
default_cpu_mem = re.sub('[a-zA-Z ]', '', shared.args.cpu_memory)
|
default_cpu_mem = re.sub('[a-zA-Z ]', '', shared.args.cpu_memory)
|
||||||
else:
|
else:
|
||||||
@ -275,7 +300,8 @@ def create_model_menus():
|
|||||||
with gr.Box():
|
with gr.Box():
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
components['gpu_memory'] = gr.Slider(label="gpu-memory in MiB", maximum=total_mem, value=default_gpu_mem)
|
for i in range(len(total_mem)):
|
||||||
|
components[f'gpu_memory_{i}'] = gr.Slider(label="gpu-memory in MiB", maximum=total_mem[i], value=default_gpu_mem[i])
|
||||||
components['cpu_memory'] = gr.Slider(label="cpu-memory in MiB", maximum=total_cpu_mem, value=default_cpu_mem)
|
components['cpu_memory'] = gr.Slider(label="cpu-memory in MiB", maximum=total_cpu_mem, value=default_cpu_mem)
|
||||||
|
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
|
Loading…
Reference in New Issue
Block a user