Minor changes

This commit is contained in:
oobabooga 2023-01-08 14:37:43 -03:00
parent 730c5562cc
commit b801e0d50d
2 changed files with 4 additions and 5 deletions

View File

@ -68,9 +68,9 @@ You also need to put GPT-J-6B's config.json file in the same folder: [config.jso
The script `convert-to-torch.py` allows you to convert models to .pt format, which is about 10x faster to load: The script `convert-to-torch.py` allows you to convert models to .pt format, which is about 10x faster to load:
python convert-to-torch.py models/model-name/ python convert-to-torch.py models/model-name
The output model will be saved to `torch-dumps/model-name.pt`. When you load a new model, the webui first looks for this .pt file; if it is not found, it loads the model as usual from `models/model-name/`. The output model will be saved to `torch-dumps/model-name.pt`. When you load a new model, the webui first looks for this .pt file; if it is not found, it loads the model as usual from `models/model-name`.
## Starting the webui ## Starting the webui

View File

@ -75,7 +75,7 @@ def generate_reply(question, temperature, max_length, inference_settings, select
if selected_model != model_name: if selected_model != model_name:
model_name = selected_model model_name = selected_model
model = None model = None
tokenier = None tokenizer = None
torch.cuda.empty_cache() torch.cuda.empty_cache()
model, tokenizer = load_model(model_name) model, tokenizer = load_model(model_name)
if inference_settings != loaded_preset: if inference_settings != loaded_preset:
@ -84,8 +84,7 @@ def generate_reply(question, temperature, max_length, inference_settings, select
loaded_preset = inference_settings loaded_preset = inference_settings
torch.cuda.empty_cache() torch.cuda.empty_cache()
input_text = question input_ids = tokenizer.encode(str(question), return_tensors='pt').cuda()
input_ids = tokenizer.encode(str(input_text), return_tensors='pt').cuda()
output = eval(f"model.generate(input_ids, {preset}).cuda()") output = eval(f"model.generate(input_ids, {preset}).cuda()")
reply = tokenizer.decode(output[0], skip_special_tokens=True) reply = tokenizer.decode(output[0], skip_special_tokens=True)