From c8207d474f9c5365ab5a1c269eb71bff05a31988 Mon Sep 17 00:00:00 2001
From: Maya Eary <maya@pabloader.ru>
Date: Tue, 28 Mar 2023 20:38:55 +0300
Subject: [PATCH 01/14] Generalized load_quantized

---
 modules/GPTQ_loader.py | 54 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 44 insertions(+), 10 deletions(-)

diff --git a/modules/GPTQ_loader.py b/modules/GPTQ_loader.py
index afb5695f..351d658d 100644
--- a/modules/GPTQ_loader.py
+++ b/modules/GPTQ_loader.py
@@ -4,13 +4,48 @@ from pathlib import Path
 
 import accelerate
 import torch
+import transformers
+from transformers import AutoConfig, AutoModelForCausalLM 
 
 import modules.shared as shared
 
 sys.path.insert(0, str(Path("repositories/GPTQ-for-LLaMa")))
-import llama
 import llama_inference_offload
-import opt
+from quant import make_quant
+from modelutils import find_layers
+
+def _load_quant(model, checkpoint, wbits, groupsize=-1, faster_kernel=False, exclude_layers=['lm_head']):
+    config = AutoConfig.from_pretrained(model)
+    def noop(*args, **kwargs):
+        pass
+    torch.nn.init.kaiming_uniform_ = noop 
+    torch.nn.init.uniform_ = noop 
+    torch.nn.init.normal_ = noop 
+
+    torch.set_default_dtype(torch.half)
+    transformers.modeling_utils._init_weights = False
+    torch.set_default_dtype(torch.half)
+    model = AutoModelForCausalLM.from_config(config)
+    torch.set_default_dtype(torch.float)
+    model = model.eval()
+    layers = find_layers(model)
+    for name in exclude_layers:
+        if name in layers:
+            del layers[name]
+    make_quant(model, layers, wbits, groupsize, faster=faster_kernel)
+
+    del layers
+    
+    print('Loading model ...')
+    if checkpoint.endswith('.safetensors'):
+        from safetensors.torch import load_file as safe_load
+        model.load_state_dict(safe_load(checkpoint))
+    else:
+        model.load_state_dict(torch.load(checkpoint))
+    model.seqlen = 2048
+    print('Done.')
+
+    return model
 
 
 def load_quantized(model_name):
@@ -20,6 +55,8 @@ def load_quantized(model_name):
             model_type = 'llama'
         elif model_name.lower().startswith(('opt', 'galactica')):
             model_type = 'opt'
+        elif model_name.lower().startswith(('gpt-j', 'pygmalion-6b')):
+            model_type = 'gptj'
         else:
             print("Can't determine model type from model name. Please specify it manually using --model_type "
                   "argument")
@@ -27,15 +64,12 @@ def load_quantized(model_name):
     else:
         model_type = shared.args.model_type.lower()
 
-    if model_type == 'llama':
-        if not shared.args.pre_layer:
-            load_quant = llama.load_quant
-        else:
-            load_quant = llama_inference_offload.load_quant
-    elif model_type == 'opt':
-        load_quant = opt.load_quant
+    if model_type == 'llama' and shared.args.pre_layer:
+        oad_quant = llama_inference_offload.load_quant
+    elif model_type in ('llama', 'opt', 'gptj'):
+        load_quant = _load_quant
     else:
-        print("Unknown pre-quantized model type specified. Only 'llama' and 'opt' are supported")
+        print("Unknown pre-quantized model type specified. Only 'llama', 'opt' and 'gptj' are supported")
         exit()
 
     # Now we are going to try to locate the quantized model file.

From 1c075d8d219b5fd2bfeba1b4bad8f912b22a26da Mon Sep 17 00:00:00 2001
From: Maya Eary <maya@pabloader.ru>
Date: Tue, 28 Mar 2023 20:43:50 +0300
Subject: [PATCH 02/14] Fix typo

---
 modules/GPTQ_loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/GPTQ_loader.py b/modules/GPTQ_loader.py
index 351d658d..1fdd23c0 100644
--- a/modules/GPTQ_loader.py
+++ b/modules/GPTQ_loader.py
@@ -65,7 +65,7 @@ def load_quantized(model_name):
         model_type = shared.args.model_type.lower()
 
     if model_type == 'llama' and shared.args.pre_layer:
-        oad_quant = llama_inference_offload.load_quant
+        load_quant = llama_inference_offload.load_quant
     elif model_type in ('llama', 'opt', 'gptj'):
         load_quant = _load_quant
     else:

From 41ec682834de3e7b79cd8e27aeec98690bc209ac Mon Sep 17 00:00:00 2001
From: Maya Eary <maya@pabloader.ru>
Date: Tue, 28 Mar 2023 22:45:38 +0300
Subject: [PATCH 03/14] Disable kernel threshold for gpt-j

---
 modules/GPTQ_loader.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/modules/GPTQ_loader.py b/modules/GPTQ_loader.py
index 1fdd23c0..2a9039a3 100644
--- a/modules/GPTQ_loader.py
+++ b/modules/GPTQ_loader.py
@@ -14,7 +14,7 @@ import llama_inference_offload
 from quant import make_quant
 from modelutils import find_layers
 
-def _load_quant(model, checkpoint, wbits, groupsize=-1, faster_kernel=False, exclude_layers=['lm_head']):
+def _load_quant(model, checkpoint, wbits, groupsize=-1, faster_kernel=False, exclude_layers=['lm_head'], kernel_switch_threshold=128):
     config = AutoConfig.from_pretrained(model)
     def noop(*args, **kwargs):
         pass
@@ -32,7 +32,7 @@ def _load_quant(model, checkpoint, wbits, groupsize=-1, faster_kernel=False, exc
     for name in exclude_layers:
         if name in layers:
             del layers[name]
-    make_quant(model, layers, wbits, groupsize, faster=faster_kernel)
+    make_quant(model, layers, wbits, groupsize, faster=faster_kernel, kernel_switch_threshold=kernel_switch_threshold)
 
     del layers
     
@@ -109,7 +109,8 @@ def load_quantized(model_name):
     if shared.args.pre_layer:
         model = load_quant(str(path_to_model), str(pt_path), shared.args.wbits, shared.args.groupsize, shared.args.pre_layer)
     else:
-        model = load_quant(str(path_to_model), str(pt_path), shared.args.wbits, shared.args.groupsize)
+        threshold = False if model_type == 'gptj' else 128
+        model = load_quant(str(path_to_model), str(pt_path), shared.args.wbits, shared.args.groupsize, kernel_switch_threshold=threshold)
 
         # accelerate offload (doesn't work properly)
         if shared.args.gpu_memory:

From 0bec15ebcd1571155a54e87b371dc40534864f2e Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 28 Mar 2023 17:34:15 -0300
Subject: [PATCH 04/14] Reorder imports

---
 modules/GPTQ_loader.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/modules/GPTQ_loader.py b/modules/GPTQ_loader.py
index 2a9039a3..c99a63f3 100644
--- a/modules/GPTQ_loader.py
+++ b/modules/GPTQ_loader.py
@@ -5,14 +5,15 @@ from pathlib import Path
 import accelerate
 import torch
 import transformers
-from transformers import AutoConfig, AutoModelForCausalLM 
+from transformers import AutoConfig, AutoModelForCausalLM
 
 import modules.shared as shared
 
 sys.path.insert(0, str(Path("repositories/GPTQ-for-LLaMa")))
 import llama_inference_offload
-from quant import make_quant
 from modelutils import find_layers
+from quant import make_quant
+
 
 def _load_quant(model, checkpoint, wbits, groupsize=-1, faster_kernel=False, exclude_layers=['lm_head'], kernel_switch_threshold=128):
     config = AutoConfig.from_pretrained(model)

From 010b259dde859b5703a6ea4cf2ea6c0aa4f25343 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 28 Mar 2023 17:46:00 -0300
Subject: [PATCH 05/14] Update documentation

---
 README.md              | 2 +-
 modules/GPTQ_loader.py | 1 -
 modules/shared.py      | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index f6b1d4f5..ba386852 100644
--- a/README.md
+++ b/README.md
@@ -177,7 +177,7 @@ Optionally, you can use the following command-line flags:
 | `--cpu`          | Use the CPU to generate text.|
 | `--load-in-8bit` | Load the model with 8-bit precision.|
 | `--wbits WBITS`            | GPTQ: Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported. |
-| `--model_type MODEL_TYPE`  | GPTQ: Model type of pre-quantized model. Currently only LLaMA and OPT are supported. |
+| `--model_type MODEL_TYPE`  | GPTQ: Model type of pre-quantized model. Currently LLaMA, OPT, and GPT-J are supported. |
 | `--groupsize GROUPSIZE`    | GPTQ: Group size. |
 | `--pre_layer PRE_LAYER`    | GPTQ: The number of layers to preload. |
 | `--bf16`         | Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU. |
diff --git a/modules/GPTQ_loader.py b/modules/GPTQ_loader.py
index c99a63f3..7926d0ab 100644
--- a/modules/GPTQ_loader.py
+++ b/modules/GPTQ_loader.py
@@ -48,7 +48,6 @@ def _load_quant(model, checkpoint, wbits, groupsize=-1, faster_kernel=False, exc
 
     return model
 
-
 def load_quantized(model_name):
     if not shared.args.model_type:
         # Try to determine model type from model name
diff --git a/modules/shared.py b/modules/shared.py
index ac9d750c..5d1b42d4 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -84,7 +84,7 @@ parser.add_argument('--gptq-bits', type=int, default=0, help='DEPRECATED: use --
 parser.add_argument('--gptq-model-type', type=str, help='DEPRECATED: use --model_type instead.')
 parser.add_argument('--gptq-pre-layer', type=int, default=0, help='DEPRECATED: use --pre_layer instead.')
 parser.add_argument('--wbits', type=int, default=0, help='GPTQ: Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported.')
-parser.add_argument('--model_type', type=str, help='GPTQ: Model type of pre-quantized model. Currently only LLaMA and OPT are supported.')
+parser.add_argument('--model_type', type=str, help='GPTQ: Model type of pre-quantized model. Currently LLaMA, OPT, and GPT-J are supported.')
 parser.add_argument('--groupsize', type=int, default=-1, help='GPTQ: Group size.')
 parser.add_argument('--pre_layer', type=int, default=0, help='GPTQ: The number of layers to preload.')
 

From 304f812c637f5494e6c42d296040f0506d9194a1 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 28 Mar 2023 19:20:50 -0300
Subject: [PATCH 06/14] Gracefully handle CUDA out of memory errors with
 streaming

---
 modules/callbacks.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/modules/callbacks.py b/modules/callbacks.py
index d85f406d..aa92f9cb 100644
--- a/modules/callbacks.py
+++ b/modules/callbacks.py
@@ -1,4 +1,5 @@
 import gc
+import traceback
 from queue import Queue
 from threading import Thread
 
@@ -63,6 +64,10 @@ class Iteratorize:
                 ret = self.mfunc(callback=_callback, **self.kwargs)
             except ValueError:
                 pass
+            except:
+                traceback.print_exc()
+                pass
+
             clear_torch_cache()
             self.q.put(self.sentinel)
             if self.c_callback:

From 1edfb9677840b03ce321a450aed87961af24a361 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 28 Mar 2023 23:27:02 -0300
Subject: [PATCH 07/14] Fix loading extensions from within the interface

---
 modules/extensions.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/modules/extensions.py b/modules/extensions.py
index c3cf4de4..fe6a3945 100644
--- a/modules/extensions.py
+++ b/modules/extensions.py
@@ -7,7 +7,7 @@ import modules.shared as shared
 
 state = {}
 available_extensions = []
-setup_called = False
+setup_called = set()
 
 def load_extensions():
     global state
@@ -53,13 +53,12 @@ def create_extensions_block():
     should_display_ui = False
 
     # Running setup function
-    if not setup_called:
-        for extension, name in iterator():
-            if hasattr(extension, "setup"):
-                extension.setup()
-            if hasattr(extension, "ui"):
-                should_display_ui = True
-        setup_called = True
+    for extension, name in iterator():
+        if hasattr(extension, "ui"):
+            should_display_ui = True
+        if extension not in setup_called and hasattr(extension, "setup"):
+            setup_called.add(extension)
+            extension.setup()
 
     # Creating the extension ui elements
     if should_display_ui:

From c2a863f87deee8b9a314e3c58d93b6b2703cf0d9 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 29 Mar 2023 02:11:51 -0300
Subject: [PATCH 08/14] Mention the updated one-click installer

---
 README.md | 41 +++++++++++++++++++++++------------------
 1 file changed, 23 insertions(+), 18 deletions(-)

diff --git a/README.md b/README.md
index ba386852..241d0e03 100644
--- a/README.md
+++ b/README.md
@@ -36,7 +36,28 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.
 
 ## Installation
 
-The recommended installation methods are the following:
+### One-click installers
+
+[oobabooga-windows.zip](https://github.com/oobabooga/text-generation-webui/releases/download/installers/oobabooga-windows.zip)
+
+Just download the zip above, extract it, and double click on "install". The web UI and all its dependencies will be installed in the same folder.
+
+* To download a model, double click on "download-model"
+* To start the web UI, double click on "start-webui" 
+
+Source codes: https://github.com/oobabooga/one-click-installers
+
+> **Note**
+> 
+> Thanks to [@jllllll](https://github.com/jllllll) and [@ClayShoaf](https://github.com/ClayShoaf), the Windows 1-click installer now sets up 8-bit and 4-bit requirements out of the box. No additional installation steps are necessary.
+
+> **Note**
+> 
+> There is no need to run the installer as admin.
+
+### Manual installation using Conda
+
+These are the recommended installation methods:
 
 * Linux and MacOS: using conda natively.
 * Windows: using conda on WSL ([WSL installation guide](https://github.com/oobabooga/text-generation-webui/wiki/Windows-Subsystem-for-Linux-(Ubuntu)-Installation-Guide)).
@@ -84,24 +105,8 @@ pip install -r requirements.txt
 > 
 > For bitsandbytes and `--load-in-8bit` to work on Linux/WSL, this dirty fix is currently necessary: https://github.com/oobabooga/text-generation-webui/issues/400#issuecomment-1474876859
 
-### Alternative: one-click installers
 
-[oobabooga-windows.zip](https://github.com/oobabooga/one-click-installers/archive/refs/heads/oobabooga-windows.zip)
-
-[oobabooga-linux.zip](https://github.com/oobabooga/one-click-installers/archive/refs/heads/oobabooga-linux.zip)
-
-Just download the zip above, extract it, and double click on "install". The web UI and all its dependencies will be installed in the same folder.
-
-* To download a model, double click on "download-model"
-* To start the web UI, double click on "start-webui" 
-
-Source codes: https://github.com/oobabooga/one-click-installers
-
-> **Note**
-> 
-> To get 8-bit and 4-bit models working in your 1-click Windows installation, you can use the [one-click-bandaid](https://github.com/ClayShoaf/oobabooga-one-click-bandaid).
-
-### Alternative: native Windows installation
+### Alternative: manual Windows installation
 
 As an alternative to the recommended WSL method, you can install the web UI natively on Windows using this guide. It will be a lot harder and the performance may be slower: [Installation instructions for human beings](https://github.com/oobabooga/text-generation-webui/wiki/Installation-instructions-for-human-beings).
 

From 5d0b83c341804bcdffe73d8876468012a2edc78b Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 29 Mar 2023 02:22:19 -0300
Subject: [PATCH 09/14] Update README.md

---
 README.md | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 241d0e03..965c9d15 100644
--- a/README.md
+++ b/README.md
@@ -57,10 +57,9 @@ Source codes: https://github.com/oobabooga/one-click-installers
 
 ### Manual installation using Conda
 
-These are the recommended installation methods:
+Recommended if you have some experience with the command-line.
 
-* Linux and MacOS: using conda natively.
-* Windows: using conda on WSL ([WSL installation guide](https://github.com/oobabooga/text-generation-webui/wiki/Windows-Subsystem-for-Linux-(Ubuntu)-Installation-Guide)).
+On Windows, I additionally recommend carrying out the installation on WSL instead of the base system: [WSL installation guide](https://github.com/oobabooga/text-generation-webui/wiki/Windows-Subsystem-for-Linux-(Ubuntu)-Installation-Guide).
 
 Conda can be downloaded here: https://docs.conda.io/en/latest/miniconda.html
 

From 3b4447a4fe2ef7c99322a626b750ea1aa43083e8 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 29 Mar 2023 02:24:11 -0300
Subject: [PATCH 10/14] Update README.md

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 965c9d15..87367877 100644
--- a/README.md
+++ b/README.md
@@ -61,6 +61,8 @@ Recommended if you have some experience with the command-line.
 
 On Windows, I additionally recommend carrying out the installation on WSL instead of the base system: [WSL installation guide](https://github.com/oobabooga/text-generation-webui/wiki/Windows-Subsystem-for-Linux-(Ubuntu)-Installation-Guide).
 
+#### 0. Install Conda
+
 Conda can be downloaded here: https://docs.conda.io/en/latest/miniconda.html
 
 On Linux or WSL, it can be automatically installed with these two commands:

From 41b58bc47e84458b880386e57d0d17e2bfe6f76c Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 29 Mar 2023 11:02:29 -0300
Subject: [PATCH 11/14] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 87367877..97f26ccb 100644
--- a/README.md
+++ b/README.md
@@ -109,7 +109,7 @@ pip install -r requirements.txt
 
 ### Alternative: manual Windows installation
 
-As an alternative to the recommended WSL method, you can install the web UI natively on Windows using this guide. It will be a lot harder and the performance may be slower: [Installation instructions for human beings](https://github.com/oobabooga/text-generation-webui/wiki/Installation-instructions-for-human-beings).
+As an alternative to the recommended WSL method, you can install the web UI natively on Windows using this guide. It will be a lot harder and the performance may be slower: [Windows installation guide](https://github.com/oobabooga/text-generation-webui/wiki/Windows-installation-guide).
 
 ### Alternative: Docker
 

From a6d03730639463eb261b40ec5dad380f5df791ed Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 29 Mar 2023 11:48:17 -0300
Subject: [PATCH 12/14] Fix training dataset loading #636

---
 modules/training.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/modules/training.py b/modules/training.py
index 7bcecb38..913866d9 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -119,7 +119,7 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int
         }
 
     # == Prep the dataset, format, etc ==
-    if raw_text_file is not None:
+    if raw_text_file not in ['None', '']:
         print("Loading raw text file dataset...")
         with open(clean_path('training/datasets', f'{raw_text_file}.txt'), 'r') as file:
             raw_text = file.read()
@@ -136,16 +136,17 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int
         del text_chunks
 
     else:
-        with open(clean_path('training/formats', f'{format}.json'), 'r') as formatFile:
-            format_data: dict[str, str] = json.load(formatFile)
-
-        if dataset is None:
+        if dataset in ['None', '']:
             yield "**Missing dataset choice input, cannot continue.**"
             return
-        if format is None:
+
+        if format in ['None', '']:
             yield "**Missing format choice input, cannot continue.**"
             return
 
+        with open(clean_path('training/formats', f'{format}.json'), 'r') as formatFile:
+            format_data: dict[str, str] = json.load(formatFile)
+
         def generate_prompt(data_point: dict[str, str]):
             for options, data in format_data.items():
                 if set(options.split(',')) == set(x[0] for x in data_point.items() if len(x[1].strip()) > 0):

From 58349f44a0924671e65de7cb42764fb846653afe Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 29 Mar 2023 11:55:34 -0300
Subject: [PATCH 13/14] Handle training exception for unsupported models

---
 modules/training.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/modules/training.py b/modules/training.py
index 913866d9..62ba181c 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -2,6 +2,7 @@ import json
 import sys
 import threading
 import time
+import traceback
 from pathlib import Path
 
 import gradio as gr
@@ -184,7 +185,13 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int
         bias="none",
         task_type="CAUSAL_LM"
     )
-    lora_model = get_peft_model(shared.model, config)
+
+    try:
+        lora_model = get_peft_model(shared.model, config)
+    except:
+        yield traceback.format_exc()
+        return
+
     trainer = transformers.Trainer(
         model=lora_model,
         train_dataset=train_data,

From 1445ea86f7c2a0c8e3f88337ab15d4e076accc70 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 29 Mar 2023 20:26:44 -0300
Subject: [PATCH 14/14] Add --output and better metadata for downloading models

---
 download-model.py               | 21 +++++++++++++++++----
 loras/place-your-loras-here.txt |  0
 2 files changed, 17 insertions(+), 4 deletions(-)
 delete mode 100644 loras/place-your-loras-here.txt

diff --git a/download-model.py b/download-model.py
index dce7e749..05d9dca4 100644
--- a/download-model.py
+++ b/download-model.py
@@ -8,6 +8,7 @@ python download-model.py facebook/opt-1.3b
 
 import argparse
 import base64
+import datetime
 import json
 import multiprocessing
 import re
@@ -22,6 +23,7 @@ parser.add_argument('MODEL', type=str, default=None, nargs='?')
 parser.add_argument('--branch', type=str, default='main', help='Name of the Git branch to download from.')
 parser.add_argument('--threads', type=int, default=1, help='Number of files to download simultaneously.')
 parser.add_argument('--text-only', action='store_true', help='Only download text files (txt/json).')
+parser.add_argument('--output', type=str, default=None, help='The folder where the model should be saved.')
 args = parser.parse_args()
 
 def get_file(args):
@@ -169,13 +171,24 @@ if __name__ == '__main__':
                 sys.exit()
 
     links, is_lora = get_download_links_from_huggingface(model, branch)
-    base_folder = 'models' if not is_lora else 'loras'
-    if branch != 'main':
-        output_folder = Path(base_folder) / (model.split('/')[-1] + f'_{branch}')
+
+    if args.output is not None:
+        base_folder = args.output
     else:
-        output_folder = Path(base_folder) / model.split('/')[-1]
+        base_folder = 'models' if not is_lora else 'loras'
+
+    output_folder = f"{'_'.join(model.split('/')[-2:])}"
+    if branch != 'main':
+        output_folder += f'_{branch}'
+
+    # Creating the folder and writing the metadata
+    output_folder = Path(base_folder) / output_folder
     if not output_folder.exists():
         output_folder.mkdir()
+    with open(output_folder / 'huggingface-metadata.txt', 'w') as f:
+        f.write(f'url: https://huggingface.co/{model}\n')
+        f.write(f'branch: {branch}\n')
+        f.write(f'download date: {str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))}\n')
 
     # Downloading the files
     print(f"Downloading the model to {output_folder}")
diff --git a/loras/place-your-loras-here.txt b/loras/place-your-loras-here.txt
deleted file mode 100644
index e69de29b..00000000