diff --git a/README.md b/README.md index c5efcc04..dec01cc1 100644 --- a/README.md +++ b/README.md @@ -68,9 +68,9 @@ You also need to put GPT-J-6B's config.json file in the same folder: [config.jso The script `convert-to-torch.py` allows you to convert models to .pt format, which is about 10x faster to load: - python convert-to-torch.py models/model-name/ + python convert-to-torch.py models/model-name -The output model will be saved to `torch-dumps/model-name.pt`. When you load a new model, the webui first looks for this .pt file; if it is not found, it loads the model as usual from `models/model-name/`. +The output model will be saved to `torch-dumps/model-name.pt`. When you load a new model, the webui first looks for this .pt file; if it is not found, it loads the model as usual from `models/model-name`. ## Starting the webui diff --git a/server.py b/server.py index 0d5dcab8..8c184985 100644 --- a/server.py +++ b/server.py @@ -75,7 +75,7 @@ def generate_reply(question, temperature, max_length, inference_settings, select if selected_model != model_name: model_name = selected_model model = None - tokenier = None + tokenizer = None torch.cuda.empty_cache() model, tokenizer = load_model(model_name) if inference_settings != loaded_preset: @@ -84,8 +84,7 @@ def generate_reply(question, temperature, max_length, inference_settings, select loaded_preset = inference_settings torch.cuda.empty_cache() - input_text = question - input_ids = tokenizer.encode(str(input_text), return_tensors='pt').cuda() + input_ids = tokenizer.encode(str(question), return_tensors='pt').cuda() output = eval(f"model.generate(input_ids, {preset}).cuda()") reply = tokenizer.decode(output[0], skip_special_tokens=True)