diff --git a/css/html_instruct_style.css b/css/html_instruct_style.css
index acff04f1..5a439304 100644
--- a/css/html_instruct_style.css
+++ b/css/html_instruct_style.css
@@ -22,17 +22,12 @@
.message-body p, .message-body li {
font-size: 15px !important;
line-height: 24px !important;
- list-style-position: outside;
}
.message-body p, .chat .message-body ul, .chat .message-body ol {
margin-bottom: 16px !important;
}
-.chat .message-body ul, .chat .message-body ol {
- padding-inline-start: 2em;
-}
-
.message-body p:last-child, .chat .message-body ul:last-child, .chat .message-body ol:last-child {
margin-bottom: 0 !important;
}
diff --git a/css/main.css b/css/main.css
index a53f99d0..33feb179 100644
--- a/css/main.css
+++ b/css/main.css
@@ -364,6 +364,14 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
padding-bottom: 0 !important;
}
+.message-body li {
+ list-style-position: outside;
+}
+
+.chat .message-body ul, .chat .message-body ol {
+ padding-inline-start: 2em;
+}
+
.message-body li:not(:last-child) {
margin-top: 0 !important;
margin-bottom: 2px !important;
diff --git a/extensions/Training_PRO/script.py b/extensions/Training_PRO/script.py
index 5afa627e..8f296462 100644
--- a/extensions/Training_PRO/script.py
+++ b/extensions/Training_PRO/script.py
@@ -51,59 +51,9 @@ from modules.logging_colors import logger
from modules.models import reload_model
from modules.utils import natural_keys
-
-
-## just temporary to avoid warning
-
-import inspect
-
-from typing import Callable, Optional, Tuple, ContextManager
-
-
-
-if hasattr(torch.utils.checkpoint, 'noop_context_fn'):
- def my_checkpoint(
- function,
- *args,
- use_reentrant: Optional[bool] = None,
- context_fn: Callable[[], Tuple[ContextManager, ContextManager]] = torch.utils.checkpoint.noop_context_fn,
- determinism_check: str = torch.utils.checkpoint._DEFAULT_DETERMINISM_MODE,
- debug: bool = False,
- **kwargs
- ):
-
- if use_reentrant is None:
- #print ("reentran = NONE")
- use_reentrant = True
- # Hack to mix *args with **kwargs in a python 2.7-compliant way
- preserve = kwargs.pop("preserve_rng_state", True)
- if kwargs and use_reentrant:
- raise ValueError(
- "Unexpected keyword arguments: " + ",".join(arg for arg in kwargs)
- )
-
- if use_reentrant:
- if context_fn is not torch.utils.checkpoint.noop_context_fn or debug is not False:
- raise ValueError(
- "Passing `context_fn` or `debug` is only supported when "
- "use_reentrant=False."
- )
- return torch.utils.checkpoint.CheckpointFunction.apply(function, preserve, *args)
- else:
-
- print ("reentran = FALSE")
- gen = torch.utils.checkpoint._checkpoint_without_reentrant_generator(
- function, preserve, context_fn, determinism_check, debug, *args, **kwargs
- )
- # Runs pre-forward logic
- next(gen)
- ret = function(*args, **kwargs)
- # Runs post-forward logic
- try:
- next(gen)
- except StopIteration:
- return ret
-
+import warnings
+warnings.filterwarnings(action = "ignore", message="torch.utils.checkpoint:")
+warnings.filterwarnings(action = "ignore", message="`do_sample` is set to `False`")
params = {
"display_name": "Training PRO",
@@ -121,6 +71,7 @@ non_serialized_params = {
"save_epochs": 0,
"checkpoint_offset": 0,
"epoch_offset":0,
+ "safe_serialization": False,
}
MODEL_CLASSES = {v[1]: v[0] for v in MODEL_FOR_CAUSAL_LM_MAPPING_NAMES.items()}
@@ -150,7 +101,7 @@ def ui():
with gr.Row():
with gr.Column():
# YY.MM.DD
- gr.Markdown("`Ver: 23.10.20` This is enhanced version of QLora Training. [Maintained by FP](https://github.com/FartyPants/Training_PRO/tree/main)")
+ gr.Markdown("`Ver: 23.10.20 (REV2)` This is enhanced version of QLora Training. [Maintained by FP](https://github.com/FartyPants/Training_PRO/tree/main)")
with gr.Row():
with gr.Column(scale=5):
@@ -290,7 +241,7 @@ def ui():
stride_length = gr.Slider(label='Stride', minimum=1, maximum=2048, value=512, step=1, info='Used to make the evaluation faster at the cost of accuracy. 1 = slowest but most accurate. 512 is a common value.')
with gr.Column():
- max_length = gr.Slider(label='max_length', minimum=0, maximum=8096, value=0, step=1, info='The context for each evaluation. If set to 0, the maximum context length for the model will be used.')
+ max_length = gr.Slider(label='max_length', minimum=0, maximum=shared.settings['truncation_length_max'], value=0, step=1, info='The context for each evaluation. If set to 0, the maximum context length for the model will be used.')
with gr.Row():
start_current_evaluation = gr.Button("Evaluate loaded model")
@@ -712,7 +663,6 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
}
train_template.clear()
-
#reset stuff
print(f"*** LoRA: {lora_name} ***")
@@ -725,26 +675,8 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
non_serialized_params.update({"checkpoint_offset": 0})
non_serialized_params.update({"epoch_offset": 0})
train_log_graph.clear()
-
- # === once fixed, this can be removed ==============================
- if hasattr(torch.utils.checkpoint, 'noop_context_fn'):
- print("Testing Pytorch...")
- old_checkpoint_signature = inspect.signature(torch.utils.checkpoint.checkpoint)
-
- # Get the signature of your new checkpoint function
- my_checkpoint_signature = inspect.signature(my_checkpoint)
-
- # Check if the signatures match
- if old_checkpoint_signature.parameters == my_checkpoint_signature.parameters:
- print(F"{RED}Overriding Torch checkpoint function to avoid repeated 'use_reentrant not explicitly set' warnings{RESET}")
- #print(" - Note: Transformers need to pass use_reentrant in llama.modeling_llama in def forward, layer_outputs = torch.utils.checkpoint.checkpoint")
- #print(" Once they do, this function can be removed")
- torch.utils.checkpoint.checkpoint = my_checkpoint
-
-
- # END OF FPHAM SENTENCE SPLIT functions ===================
-
- # == Prep the dataset, format, etc ==
+
+ # == Prep the dataset, format, etc ==
if raw_text_file not in ['None', '']:
train_template["template_type"] = "raw_text"
logger.info("Loading text file...")
@@ -1025,7 +957,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
force_save = True
if force_save:
- lora_model.save_pretrained(f"{lora_file_path}/{folder_save}/")
+ lora_model.save_pretrained(f"{lora_file_path}/{folder_save}/", safe_serialization = non_serialized_params['safe_serialization'])
print(f"\033[1;30;40mStep: {tracked.current_steps:6} \033[0;37;0m Saved: [{folder_save}]")
# Save log
with open(f"{lora_file_path}/{folder_save}/training_log.json", 'w', encoding='utf-8') as file:
@@ -1252,7 +1184,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
log_train_dataset(trainer)
trainer.train()
# Note: save in the thread in case the gradio thread breaks (eg browser closed)
- lora_model.save_pretrained(lora_file_path)
+ lora_model.save_pretrained(lora_file_path, safe_serialization = non_serialized_params['safe_serialization'])
logger.info("LoRA training run is completed and saved.")
# Save log
with open(f"{lora_file_path}/training_log.json", 'w', encoding='utf-8') as file:
@@ -1353,7 +1285,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
if not tracked.did_save:
logger.info("Training complete, saving...")
- lora_model.save_pretrained(lora_file_path)
+ lora_model.save_pretrained(lora_file_path, safe_serialization = non_serialized_params['safe_serialization'])
if WANT_INTERRUPT:
logger.info("Training interrupted.")
diff --git a/instruction-templates/Guanaco.yaml b/instruction-templates/Guanaco.yaml
deleted file mode 100644
index fba10031..00000000
--- a/instruction-templates/Guanaco.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
-instruction_template: |-
- {%- set found_item = false -%}
- {%- for message in messages -%}
- {%- if message['role'] == 'system' -%}
- {%- set found_item = true -%}
- {%- endif -%}
- {%- endfor -%}
- {%- if not found_item -%}
- {{- '' + 'A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human\'s questions.' + '\n\n' -}}
- {%- endif %}
- {%- for message in messages %}
- {%- if message['role'] == 'system' -%}
- {{- '' + message['content'] + '\n\n' -}}
- {%- else -%}
- {%- if message['role'] == 'user' -%}
- {{-'### Human: ' + message['content'] + '\n'-}}
- {%- else -%}
- {{-'### Assistant: ' + message['content'] + '\n' -}}
- {%- endif -%}
- {%- endif -%}
- {%- endfor -%}
- {%- if add_generation_prompt -%}
- {{-'### Assistant:'-}}
- {%- endif -%}
-
diff --git a/instruction-templates/H2O-human_bot.yaml b/instruction-templates/H2O-human_bot.yaml
deleted file mode 100644
index f562e451..00000000
--- a/instruction-templates/H2O-human_bot.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
-instruction_template: |-
- {%- set found_item = false -%}
- {%- for message in messages -%}
- {%- if message['role'] == 'system' -%}
- {%- set found_item = true -%}
- {%- endif -%}
- {%- endfor -%}
- {%- if not found_item -%}
- {{- '' + '' + '' -}}
- {%- endif %}
- {%- for message in messages %}
- {%- if message['role'] == 'system' -%}
- {{- '' + message['content'] + '' -}}
- {%- else -%}
- {%- if message['role'] == 'user' -%}
- {{-': ' + message['content'] + '\n'-}}
- {%- else -%}
- {{-':' + message['content'] + '\n' -}}
- {%- endif -%}
- {%- endif -%}
- {%- endfor -%}
- {%- if add_generation_prompt -%}
- {{-':'-}}
- {%- endif -%}
-
diff --git a/instruction-templates/LLaVA-v1.yaml b/instruction-templates/LLaVA-v1.yaml
deleted file mode 100644
index f960d808..00000000
--- a/instruction-templates/LLaVA-v1.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
-instruction_template: |-
- {%- set found_item = false -%}
- {%- for message in messages -%}
- {%- if message['role'] == 'system' -%}
- {%- set found_item = true -%}
- {%- endif -%}
- {%- endfor -%}
- {%- if not found_item -%}
- {{- '' + 'A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user\'s questions.' + '\n\n' -}}
- {%- endif %}
- {%- for message in messages %}
- {%- if message['role'] == 'system' -%}
- {{- '' + message['content'] + '\n\n' -}}
- {%- else -%}
- {%- if message['role'] == 'user' -%}
- {{-'USER: ' + message['content'] + '\n'-}}
- {%- else -%}
- {{-'ASSISTANT: ' + message['content'] + '\n' -}}
- {%- endif -%}
- {%- endif -%}
- {%- endfor -%}
- {%- if add_generation_prompt -%}
- {{-'ASSISTANT:'-}}
- {%- endif -%}
-
diff --git a/instruction-templates/Minotaur.yaml b/instruction-templates/Orca-Vicuna.yaml
similarity index 79%
rename from instruction-templates/Minotaur.yaml
rename to instruction-templates/Orca-Vicuna.yaml
index 7b8d5764..b8dd250b 100644
--- a/instruction-templates/Minotaur.yaml
+++ b/instruction-templates/Orca-Vicuna.yaml
@@ -6,20 +6,19 @@ instruction_template: |-
{%- endif -%}
{%- endfor -%}
{%- if not found_item -%}
- {{- '' + '' + '' -}}
+ {{-'SYSTEM: ' + '' + '\n' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
- {{- '' + message['content'] + '' -}}
+ {{-'SYSTEM: ' + message['content'] + '\n' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'USER: ' + message['content'] + '\n'-}}
{%- else -%}
- {{-'ASSISTANT:' + message['content'] + '\n' -}}
+ {{-'ASSISTANT: ' + message['content'] + '\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'ASSISTANT:'-}}
{%- endif -%}
-
diff --git a/instruction-templates/Vigogne-Chat.yaml b/instruction-templates/Vigogne-Chat.yaml
index 8fc3e452..4c4de1db 100644
--- a/instruction-templates/Vigogne-Chat.yaml
+++ b/instruction-templates/Vigogne-Chat.yaml
@@ -13,9 +13,9 @@ instruction_template: |-
{{- '' + message['content'] + '\n' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
- {{-'\n<|USER|>: ' + message['content'] + '\n'-}}
+ {{-'<|USER|>: ' + message['content'] + '\n'-}}
{%- else -%}
- {{-'<|ASSISTANT|>: ' + message['content'] + '' -}}
+ {{-'<|ASSISTANT|>: ' + message['content'] + '\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
diff --git a/instruction-templates/Wizard-Mega WizardLM.yaml b/instruction-templates/Wizard-Mega WizardLM.yaml
deleted file mode 100644
index 1f2086a2..00000000
--- a/instruction-templates/Wizard-Mega WizardLM.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
-instruction_template: |-
- {%- set found_item = false -%}
- {%- for message in messages -%}
- {%- if message['role'] == 'system' -%}
- {%- set found_item = true -%}
- {%- endif -%}
- {%- endfor -%}
- {%- if not found_item -%}
- {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\n\n' -}}
- {%- endif %}
- {%- for message in messages %}
- {%- if message['role'] == 'system' -%}
- {{- '' + message['content'] + '\n\n' -}}
- {%- else -%}
- {%- if message['role'] == 'user' -%}
- {{-'### Instruction:\n' + message['content'] + '\n\n'-}}
- {%- else -%}
- {{-'### Response:\n' + message['content'] + '\n\n' -}}
- {%- endif -%}
- {%- endif -%}
- {%- endfor -%}
- {%- if add_generation_prompt -%}
- {{-'### Response:\n'-}}
- {%- endif -%}
-
diff --git a/models/config.yaml b/models/config.yaml
index 197d4b35..5cebb713 100644
--- a/models/config.yaml
+++ b/models/config.yaml
@@ -38,7 +38,7 @@
instruction_template: 'LLaVA'
custom_stopping_strings: '"\n###"'
.*llava.*1.5:
- instruction_template: 'LLaVA-v1'
+ instruction_template: 'Vicuna-v1.1'
.*wizard.*mega:
instruction_template: 'Wizard-Mega'
custom_stopping_strings: '""'
@@ -108,7 +108,7 @@
.*bactrian:
instruction_template: 'Bactrian'
.*(h2ogpt-oig-|h2ogpt-oasst1-|h2ogpt-research-oasst1-):
- instruction_template: 'H2O-human_bot'
+ instruction_template: 'INCITE-Chat'
.*h2ogpt-gm-:
instruction_template: 'H2O-prompt_answer'
.*manticore:
@@ -128,7 +128,7 @@
.*lazarus:
instruction_template: 'Alpaca'
.*guanaco-.*(7|13|33|65)b:
- instruction_template: 'Guanaco'
+ instruction_template: 'Vicuna-v0'
.*hypermantis:
instruction_template: 'Alpaca'
.*open-llama-.*-open-instruct:
@@ -144,7 +144,7 @@
.*wizardcoder:
instruction_template: 'Alpaca'
.*minotaur:
- instruction_template: 'Minotaur'
+ instruction_template: 'Manticore Chat'
.*orca_mini:
instruction_template: 'Orca Mini'
.*(platypus|gplatty|superplatty):
@@ -186,3 +186,5 @@
instruction_template: 'ChatML'
.*Yi-34B-Chat:
instruction_template: 'ChatML'
+(dolphin).*:
+ instruction_template: 'ChatML'
diff --git a/modules/chat.py b/modules/chat.py
index 613cae1b..3106d3d2 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -112,6 +112,13 @@ def generate_chat_prompt(user_input, state, **kwargs):
if user_input and not impersonate and not _continue:
messages.append({"role": "user", "content": user_input})
+ def remove_extra_bos(prompt):
+ for bos_token in ['', '<|startoftext|>']:
+ while prompt.startswith(bos_token):
+ prompt = prompt[len(bos_token):]
+
+ return prompt
+
def make_prompt(messages):
if state['mode'] == 'chat-instruct' and _continue:
prompt = renderer(messages=messages[:-1])
@@ -123,6 +130,7 @@ def generate_chat_prompt(user_input, state, **kwargs):
if state['custom_system_message'].strip() != '':
outer_messages.append({"role": "system", "content": state['custom_system_message']})
+ prompt = remove_extra_bos(prompt)
command = state['chat-instruct_command']
command = command.replace('<|character|>', state['name2'] if not impersonate else state['name1'])
command = command.replace('<|prompt|>', prompt)
@@ -153,6 +161,7 @@ def generate_chat_prompt(user_input, state, **kwargs):
prompt += prefix
+ prompt = remove_extra_bos(prompt)
return prompt
prompt = make_prompt(messages)
diff --git a/modules/metadata_gguf.py b/modules/metadata_gguf.py
index 0ea41a2a..70ad41dc 100644
--- a/modules/metadata_gguf.py
+++ b/modules/metadata_gguf.py
@@ -82,8 +82,9 @@ def load_metadata(fname):
if value_type == GGUFValueType.ARRAY:
ltype = GGUFValueType(struct.unpack("