Code reuse + indication

Now shows the message in the console when unloading weights. Also reload_model() calls unload_model() first to free the memory so that multiple reloads won't overfill it.
2024-11-25 17:29:22 +01:00 · 2023-03-21 20:19:38 +03:00 · 2023-03-21 20:19:38 +03:00 · 483d173d23
commit 483d173d23
parent 1917b15275
1 changed files with 2 additions and 3 deletions
--- a/server.py
+++ b/server.py
@ -64,9 +64,7 @@ def load_model_wrapper(selected_model):
    return selected_model

 def reload_model():
-    if not shared.args.cpu:
-        gc.collect()
-        torch.cuda.empty_cache()
+    unload_model()
    shared.model, shared.tokenizer = load_model(shared.model_name)

 def unload_model():
@ -74,6 +72,7 @@ def unload_model():
    if not shared.args.cpu:
        gc.collect()
        torch.cuda.empty_cache()
+        print("Model weights unloaded.")

 def load_lora_wrapper(selected_lora):
    shared.lora_name = selected_lora