Minor changes

2024-11-26 17:50:22 +01:00 · 2023-01-08 14:37:43 -03:00 · 2023-01-08 14:37:43 -03:00 · b801e0d50d
commit b801e0d50d
parent 730c5562cc
2 changed files with 4 additions and 5 deletions
--- a/README.md
+++ b/README.md
@ -68,9 +68,9 @@ You also need to put GPT-J-6B's config.json file in the same folder: [config.jso
 The script `convert-to-torch.py` allows you to convert models to .pt format, which is about 10x faster to load:
-    python convert-to-torch.py models/model-name/
+    python convert-to-torch.py models/model-name
-The output model will be saved to `torch-dumps/model-name.pt`. When you load a new model, the webui first looks for this .pt file; if it is not found, it loads the model as usual from `models/model-name/`. 
+The output model will be saved to `torch-dumps/model-name.pt`. When you load a new model, the webui first looks for this .pt file; if it is not found, it loads the model as usual from `models/model-name`. 
 ## Starting the webui
--- a/server.py
+++ b/server.py
@ -75,7 +75,7 @@ def generate_reply(question, temperature, max_length, inference_settings, select
    if selected_model != model_name:
        model_name = selected_model
        model = None
-        tokenier = None
+        tokenizer = None
        torch.cuda.empty_cache()
        model, tokenizer = load_model(model_name)
    if inference_settings != loaded_preset:
@ -84,8 +84,7 @@ def generate_reply(question, temperature, max_length, inference_settings, select
        loaded_preset = inference_settings
    torch.cuda.empty_cache()
-    input_text = question
+    input_ids = tokenizer.encode(str(question), return_tensors='pt').cuda()
    input_ids = tokenizer.encode(str(input_text), return_tensors='pt').cuda()
    output = eval(f"model.generate(input_ids, {preset}).cuda()")
    reply = tokenizer.decode(output[0], skip_special_tokens=True)