diff --git a/css/html_instruct_style.css b/css/html_instruct_style.css
index bdf68aad..8a31d6e2 100644
--- a/css/html_instruct_style.css
+++ b/css/html_instruct_style.css
@@ -49,7 +49,7 @@
.gradio-container .chat .assistant-message {
padding: 20px;
- background: var(--color-grey-200);
+ background: #f4f4f4;
margin-top: 9px !important;
margin-bottom: 12px !important;
border-radius: 7px;
diff --git a/css/main.css b/css/main.css
index 498b3c6c..5768348e 100644
--- a/css/main.css
+++ b/css/main.css
@@ -95,7 +95,7 @@ gradio-app > :first-child {
}
.header_bar {
- background-color: #f7f7f7;
+ background-color: #f4f4f4;
box-shadow: 0 0 3px rgba(22 22 22 / 35%);
margin-bottom: 0;
overflow-x: scroll;
@@ -336,6 +336,11 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
padding-left: 0;
padding-right: 0;
}
+
+ .chat {
+ padding-left: 0;
+ padding-right: 0;
+ }
}
.chat {
@@ -391,7 +396,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
.chat .message:last-child {
margin-bottom: 0 !important;
- padding-bottom: 0 !important;
+ padding-bottom: 15px !important;
}
.message-body li {
@@ -510,7 +515,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
#show-controls {
position: absolute;
height: 100%;
- background-color: var(--background-fill-primary);
+ background-color: transparent;
border: 0 !important;
border-radius: 0;
}
diff --git a/extensions/sd_api_pictures/script.py b/extensions/sd_api_pictures/script.py
index a16575cd..3a31771a 100644
--- a/extensions/sd_api_pictures/script.py
+++ b/extensions/sd_api_pictures/script.py
@@ -33,7 +33,7 @@ params = {
'hr_upscaler': 'ESRGAN_4x',
'hr_scale': '1.0',
'seed': -1,
- 'sampler_name': 'DPM++ 2M Karras',
+ 'sampler_name': 'DPM++ 2M',
'steps': 32,
'cfg_scale': 7,
'textgen_prefix': 'Please provide a detailed and vivid description of [subject]',
diff --git a/extensions/whisper_stt/script.js b/extensions/whisper_stt/script.js
new file mode 100644
index 00000000..c4a908b5
--- /dev/null
+++ b/extensions/whisper_stt/script.js
@@ -0,0 +1,86 @@
+console.log("Whisper STT script loaded");
+
+let mediaRecorder;
+let audioChunks = [];
+let isRecording = false;
+
+window.startStopRecording = function() {
+ if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
+ console.error("getUserMedia not supported on your browser!");
+ return;
+ }
+
+ if (isRecording == false) {
+ //console.log("Start recording function called");
+ navigator.mediaDevices.getUserMedia({ audio: true })
+ .then(stream => {
+ //console.log("Got audio stream");
+ mediaRecorder = new MediaRecorder(stream);
+ audioChunks = []; // Reset audio chunks
+ mediaRecorder.start();
+ //console.log("MediaRecorder started");
+ recButton.icon;
+ recordButton.innerHTML = recButton.innerHTML = "Stop";
+ isRecording = true;
+
+ mediaRecorder.addEventListener("dataavailable", event => {
+ //console.log("Data available event, data size: ", event.data.size);
+ audioChunks.push(event.data);
+ });
+
+ mediaRecorder.addEventListener("stop", () => {
+ //console.log("MediaRecorder stopped");
+ if (audioChunks.length > 0) {
+ const audioBlob = new Blob(audioChunks, { type: "audio/webm" });
+ //console.log("Audio blob created, size: ", audioBlob.size);
+ const reader = new FileReader();
+ reader.readAsDataURL(audioBlob);
+ reader.onloadend = function() {
+ const base64data = reader.result;
+ //console.log("Audio converted to base64, length: ", base64data.length);
+
+ const audioBase64Input = document.querySelector("#audio-base64 textarea");
+ if (audioBase64Input) {
+ audioBase64Input.value = base64data;
+ audioBase64Input.dispatchEvent(new Event("input", { bubbles: true }));
+ audioBase64Input.dispatchEvent(new Event("change", { bubbles: true }));
+ //console.log("Updated textarea with base64 data");
+ } else {
+ console.error("Could not find audio-base64 textarea");
+ }
+ };
+ } else {
+ console.error("No audio data recorded for Whisper");
+ }
+ });
+ });
+ } else {
+ //console.log("Stopping MediaRecorder");
+ recordButton.innerHTML = recButton.innerHTML = "Rec.";
+ isRecording = false;
+ mediaRecorder.stop();
+ }
+};
+
+const recordButton = gradioApp().querySelector("#record-button");
+recordButton.addEventListener("click", window.startStopRecording);
+
+
+function gradioApp() {
+ const elems = document.getElementsByTagName("gradio-app");
+ const gradioShadowRoot = elems.length == 0 ? null : elems[0].shadowRoot;
+ return gradioShadowRoot ? gradioShadowRoot : document;
+}
+
+
+// extra rec button next to generate button
+var recButton = recordButton.cloneNode(true);
+var generate_button = document.getElementById("Generate");
+generate_button.insertAdjacentElement("afterend", recButton);
+
+recButton.style.setProperty("margin-left", "-10px");
+recButton.innerHTML = "Rec.";
+
+recButton.addEventListener("click", function() {
+ recordButton.click();
+});
diff --git a/extensions/whisper_stt/script.py b/extensions/whisper_stt/script.py
index efc62f41..e45c8b1e 100644
--- a/extensions/whisper_stt/script.py
+++ b/extensions/whisper_stt/script.py
@@ -1,5 +1,13 @@
+import base64
+import gc
+import io
+from pathlib import Path
+
import gradio as gr
-import speech_recognition as sr
+import numpy as np
+import torch
+import whisper
+from pydub import AudioSegment
from modules import shared
@@ -8,13 +16,16 @@ input_hijack = {
'value': ["", ""]
}
-# parameters which can be customized in settings.json of webui
+# parameters which can be customized in settings.yaml of webui
params = {
'whipser_language': 'english',
'whipser_model': 'small.en',
'auto_submit': True
}
+startup_device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+WHISPERMODEL = whisper.load_model(params['whipser_model'], device=startup_device)
+
def chat_input_modifier(text, visible_text, state):
global input_hijack
@@ -25,47 +36,84 @@ def chat_input_modifier(text, visible_text, state):
return text, visible_text
-def do_stt(audio, whipser_model, whipser_language):
- transcription = ""
- r = sr.Recognizer()
+def do_stt(audio, whipser_language):
+ # use pydub to convert sample_rate and sample_width for whisper input
+ dubaudio = AudioSegment.from_file(io.BytesIO(audio))
+ dubaudio = dubaudio.set_channels(1)
+ dubaudio = dubaudio.set_frame_rate(16000)
+ dubaudio = dubaudio.set_sample_width(2)
- # Convert to AudioData
- audio_data = sr.AudioData(sample_rate=audio[0], frame_data=audio[1], sample_width=4)
+ # same method to get the array as openai whisper repo used from wav file
+ audio_np = np.frombuffer(dubaudio.raw_data, np.int16).flatten().astype(np.float32) / 32768.0
- try:
- transcription = r.recognize_whisper(audio_data, language=whipser_language, model=whipser_model)
- except sr.UnknownValueError:
- print("Whisper could not understand audio")
- except sr.RequestError as e:
- print("Could not request results from Whisper", e)
+ if len(whipser_language) == 0:
+ result = WHISPERMODEL.transcribe(audio=audio_np)
+ else:
+ result = WHISPERMODEL.transcribe(audio=audio_np, language=whipser_language)
+ return result["text"]
+
+def auto_transcribe(audio, auto_submit, whipser_language):
+ if audio is None or audio == "":
+ print("Whisper received no audio data")
+ return "", ""
+ audio_bytes = base64.b64decode(audio.split(',')[1])
+
+ transcription = do_stt(audio_bytes, whipser_language)
+ if auto_submit:
+ input_hijack.update({"state": True, "value": [transcription, transcription]})
return transcription
-def auto_transcribe(audio, auto_submit, whipser_model, whipser_language):
- if audio is None:
- return "", ""
- transcription = do_stt(audio, whipser_model, whipser_language)
- if auto_submit:
- input_hijack.update({"state": True, "value": [transcription, transcription]})
+def reload_whispermodel(whisper_model_name: str, whisper_language: str, device: str):
+ if len(whisper_model_name) > 0:
+ global WHISPERMODEL
+ WHISPERMODEL = None
+ if torch.cuda.is_available():
+ torch.cuda.empty_cache()
+ gc.collect()
- return transcription, None
+ if device != "none":
+ if device == "cuda":
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+
+ WHISPERMODEL = whisper.load_model(whisper_model_name, device=device)
+ params.update({"whipser_model": whisper_model_name})
+ if ".en" in whisper_model_name:
+ whisper_language = "english"
+ audio_update = gr.Audio.update(interactive=True)
+ else:
+ audio_update = gr.Audio.update(interactive=False)
+ return [whisper_model_name, whisper_language, str(device), audio_update]
def ui():
with gr.Accordion("Whisper STT", open=True):
with gr.Row():
- audio = gr.Audio(source="microphone")
+ audio = gr.Textbox(elem_id="audio-base64", visible=False)
+ record_button = gr.Button("Rec.", elem_id="record-button", elem_classes="custom-button")
with gr.Row():
with gr.Accordion("Settings", open=False):
auto_submit = gr.Checkbox(label='Submit the transcribed audio automatically', value=params['auto_submit'])
- whipser_model = gr.Dropdown(label='Whisper Model', value=params['whipser_model'], choices=["tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "large"])
- whipser_language = gr.Dropdown(label='Whisper Language', value=params['whipser_language'], choices=["chinese", "german", "spanish", "russian", "korean", "french", "japanese", "portuguese", "turkish", "polish", "catalan", "dutch", "arabic", "swedish", "italian", "indonesian", "hindi", "finnish", "vietnamese", "hebrew", "ukrainian", "greek", "malay", "czech", "romanian", "danish", "hungarian", "tamil", "norwegian", "thai", "urdu", "croatian", "bulgarian", "lithuanian", "latin", "maori", "malayalam", "welsh", "slovak", "telugu", "persian", "latvian", "bengali", "serbian", "azerbaijani", "slovenian", "kannada", "estonian", "macedonian", "breton", "basque", "icelandic", "armenian", "nepali", "mongolian", "bosnian", "kazakh", "albanian", "swahili", "galician", "marathi", "punjabi", "sinhala", "khmer", "shona", "yoruba", "somali", "afrikaans", "occitan", "georgian", "belarusian", "tajik", "sindhi", "gujarati", "amharic", "yiddish", "lao", "uzbek", "faroese", "haitian creole", "pashto", "turkmen", "nynorsk", "maltese", "sanskrit", "luxembourgish", "myanmar", "tibetan", "tagalog", "malagasy", "assamese", "tatar", "hawaiian", "lingala", "hausa", "bashkir", "javanese", "sundanese"])
+ device_dropd = gr.Dropdown(label='Device', value=str(startup_device), choices=["cuda", "cpu", "none"])
+ whisper_model_dropd = gr.Dropdown(label='Whisper Model', value=params['whipser_model'], choices=["tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "large"])
+ whisper_language = gr.Dropdown(label='Whisper Language', value=params['whipser_language'], choices=["english", "chinese", "german", "spanish", "russian", "korean", "french", "japanese", "portuguese", "turkish", "polish", "catalan", "dutch", "arabic", "swedish", "italian", "indonesian", "hindi", "finnish", "vietnamese", "hebrew", "ukrainian", "greek", "malay", "czech", "romanian", "danish", "hungarian", "tamil", "norwegian", "thai", "urdu", "croatian", "bulgarian", "lithuanian", "latin", "maori", "malayalam", "welsh", "slovak", "telugu", "persian", "latvian", "bengali", "serbian", "azerbaijani", "slovenian", "kannada", "estonian", "macedonian", "breton", "basque", "icelandic", "armenian", "nepali", "mongolian", "bosnian", "kazakh", "albanian", "swahili", "galician", "marathi", "punjabi", "sinhala", "khmer", "shona", "yoruba", "somali", "afrikaans", "occitan", "georgian", "belarusian", "tajik", "sindhi", "gujarati", "amharic", "yiddish", "lao", "uzbek", "faroese", "haitian creole", "pashto", "turkmen", "nynorsk", "maltese", "sanskrit", "luxembourgish", "myanmar", "tibetan", "tagalog", "malagasy", "assamese", "tatar", "hawaiian", "lingala", "hausa", "bashkir", "javanese", "sundanese"])
- audio.stop_recording(
- auto_transcribe, [audio, auto_submit, whipser_model, whipser_language], [shared.gradio['textbox'], audio]).then(
- None, auto_submit, None, js="(check) => {if (check) { document.getElementById('Generate').click() }}")
+ audio.change(
+ auto_transcribe, [audio, auto_submit, whisper_language], [shared.gradio['textbox']]).then(
+ None, auto_submit, None, _js="(check) => {if (check) { document.getElementById('Generate').click() }}")
- whipser_model.change(lambda x: params.update({"whipser_model": x}), whipser_model, None)
- whipser_language.change(lambda x: params.update({"whipser_language": x}), whipser_language, None)
+ device_dropd.input(reload_whispermodel, [whisper_model_dropd, whisper_language, device_dropd], [whisper_model_dropd, whisper_language, device_dropd, audio])
+ whisper_model_dropd.change(reload_whispermodel, [whisper_model_dropd, whisper_language, device_dropd], [whisper_model_dropd, whisper_language, device_dropd, audio])
+ whisper_language.change(lambda x: params.update({"whipser_language": x}), whisper_language, None)
auto_submit.change(lambda x: params.update({"auto_submit": x}), auto_submit, None)
+
+
+def custom_js():
+ """
+ Returns custom javascript as a string. It is applied whenever the web UI is
+ loaded.
+ :return:
+ """
+ with open(Path(__file__).parent.resolve() / "script.js", "r") as f:
+ return f.read()
diff --git a/js/main.js b/js/main.js
index e9a980e2..6b456517 100644
--- a/js/main.js
+++ b/js/main.js
@@ -7,30 +7,30 @@ main_parent.parentNode.style = "gap: 0";
main_parent.parentNode.parentNode.style = "padding: 0";
document.querySelector(".header_bar").addEventListener("click", function(event) {
- if (event.target.tagName === "BUTTON") {
- const buttonText = event.target.textContent.trim();
+ if (event.target.tagName !== "BUTTON") return;
- let chat_visible = (buttonText == "Chat");
- let default_visible = (buttonText == "Default");
- let notebook_visible = (buttonText == "Notebook");
+ const buttonText = event.target.textContent.trim();
+ const extensionsVisible = ["Chat", "Default", "Notebook"].includes(buttonText);
+ const chatVisible = buttonText === "Chat";
+ const showControlsChecked = document.querySelector("#show-controls input").checked;
+ const extensions = document.querySelector("#extensions");
- // Check if one of the generation tabs is visible
- if (chat_visible || notebook_visible || default_visible) {
- extensions && (extensions.style.display = "flex");
-
- if (chat_visible) {
- this.style.marginBottom = "0px";
- extensions && (extensions.style.maxWidth = "880px");
- extensions && (extensions.style.padding = "0px");
- } else {
- this.style.marginBottom = "19px";
- extensions && (extensions.style.maxWidth = "none");
- extensions && (extensions.style.padding = "15px");
- }
- } else {
- this.style.marginBottom = "19px";
- extensions && (extensions.style.display = "none");
+ if (extensionsVisible) {
+ if (extensions) {
+ extensions.style.display = "flex";
+ extensions.style.maxWidth = chatVisible ? "880px" : "none";
+ extensions.style.padding = chatVisible ? "0px" : "15px";
}
+ this.style.marginBottom = chatVisible ? "0px" : "19px";
+
+ if (chatVisible && !showControlsChecked) {
+ document.querySelectorAll("#chat-tab > div > :nth-child(n+2), #extensions").forEach(element => {
+ element.style.display = "none";
+ });
+ }
+ } else {
+ this.style.marginBottom = "19px";
+ if (extensions) extensions.style.display = "none";
}
});
@@ -539,3 +539,64 @@ document.querySelectorAll(".focus-on-chat-input").forEach(element => {
// Fix a border around the "past chats" menu
//------------------------------------------------
document.getElementById("past-chats").parentNode.style.borderRadius = "0px";
+
+//------------------------------------------------
+// Allow the character dropdown to coexist at the
+// Chat tab and the Parameters > Character tab
+//------------------------------------------------
+
+const headerBar = document.querySelector(".header_bar");
+let originalParent;
+let originalIndex; // To keep track of the original position
+let movedElement;
+
+function moveToChatTab() {
+ const characterMenu = document.getElementById("character-menu");
+ const grandParent = characterMenu.parentElement.parentElement;
+
+ // Save the initial location for the character dropdown
+ if (!originalParent) {
+ originalParent = grandParent.parentElement;
+ originalIndex = Array.from(originalParent.children).indexOf(grandParent);
+ movedElement = grandParent;
+ }
+
+ // Do not show the Character dropdown in the Chat tab when "instruct" mode is selected
+ const instructRadio = document.querySelector("#chat-mode input[value=\"instruct\"]");
+ if (instructRadio && instructRadio.checked) {
+ grandParent.style.display = "none";
+ }
+
+ const chatControlsFirstChild = document.querySelector("#chat-controls").firstElementChild;
+ const newParent = chatControlsFirstChild;
+ let newPosition = newParent.children.length - 2;
+
+ newParent.insertBefore(grandParent, newParent.children[newPosition]);
+ document.getElementById("save-character").style.display = "none";
+}
+
+function restoreOriginalPosition() {
+ if (originalParent && movedElement) {
+ if (originalIndex >= originalParent.children.length) {
+ originalParent.appendChild(movedElement);
+ } else {
+ originalParent.insertBefore(movedElement, originalParent.children[originalIndex]);
+ }
+
+ document.getElementById("save-character").style.display = "";
+ movedElement.style.display = "";
+ }
+}
+
+headerBar.addEventListener("click", (e) => {
+ if (e.target.tagName === "BUTTON") {
+ const tabName = e.target.textContent.trim();
+ if (tabName === "Chat") {
+ moveToChatTab();
+ } else {
+ restoreOriginalPosition();
+ }
+ }
+});
+
+moveToChatTab();
diff --git a/modules/chat.py b/modules/chat.py
index 5d2bdd63..920c0f7b 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -3,6 +3,7 @@ import copy
import functools
import html
import json
+import pprint
import re
from datetime import datetime
from functools import partial
@@ -259,10 +260,27 @@ def get_stopping_strings(state):
suffix_bot + prefix_user,
]
+ # Try to find the EOT token
+ for item in stopping_strings.copy():
+ item = item.strip()
+ if item.startswith("<") and ">" in item:
+ stopping_strings.append(item.split(">")[0] + ">")
+ elif item.startswith("[") and "]" in item:
+ stopping_strings.append(item.split("]")[0] + "]")
+
if 'stopping_strings' in state and isinstance(state['stopping_strings'], list):
stopping_strings += state.pop('stopping_strings')
- return list(set(stopping_strings))
+ # Remove redundant items that start with another item
+ result = [item for item in stopping_strings if not any(item.startswith(other) and item != other for other in stopping_strings)]
+ result = list(set(result))
+
+ if shared.args.verbose:
+ logger.info("STOPPING_STRINGS=")
+ pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(result)
+ print()
+
+ return result
def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_message=True, for_ui=False):
diff --git a/modules/llama_cpp_python_hijack.py b/modules/llama_cpp_python_hijack.py
index eb23177f..f3f3f560 100644
--- a/modules/llama_cpp_python_hijack.py
+++ b/modules/llama_cpp_python_hijack.py
@@ -1,3 +1,4 @@
+import importlib
from typing import Sequence
from tqdm import tqdm
@@ -5,20 +6,55 @@ from tqdm import tqdm
from modules import shared
from modules.cache_utils import process_llamacpp_cache
-try:
- import llama_cpp
-except:
- llama_cpp = None
-try:
- import llama_cpp_cuda
-except:
- llama_cpp_cuda = None
+imported_module = None
-try:
- import llama_cpp_cuda_tensorcores
-except:
- llama_cpp_cuda_tensorcores = None
+
+def llama_cpp_lib():
+ global imported_module
+
+ return_lib = None
+
+ if shared.args.cpu:
+ if imported_module and imported_module != 'llama_cpp':
+ raise Exception(f"Cannot import 'llama_cpp' because '{imported_module}' is already imported. See issue #1575 in llama-cpp-python. Please restart the server before attempting to use a different version of llama-cpp-python.")
+ try:
+ return_lib = importlib.import_module('llama_cpp')
+ imported_module = 'llama_cpp'
+ except:
+ pass
+
+ if shared.args.tensorcores and return_lib is None:
+ if imported_module and imported_module != 'llama_cpp_cuda_tensorcores':
+ raise Exception(f"Cannot import 'llama_cpp_cuda_tensorcores' because '{imported_module}' is already imported. See issue #1575 in llama-cpp-python. Please restart the server before attempting to use a different version of llama-cpp-python.")
+ try:
+ return_lib = importlib.import_module('llama_cpp_cuda_tensorcores')
+ imported_module = 'llama_cpp_cuda_tensorcores'
+ except:
+ pass
+
+ if return_lib is None:
+ if imported_module and imported_module != 'llama_cpp_cuda':
+ raise Exception(f"Cannot import 'llama_cpp_cuda' because '{imported_module}' is already imported. See issue #1575 in llama-cpp-python. Please restart the server before attempting to use a different version of llama-cpp-python.")
+ try:
+ return_lib = importlib.import_module('llama_cpp_cuda')
+ imported_module = 'llama_cpp_cuda'
+ except:
+ pass
+
+ if return_lib is None and not shared.args.cpu:
+ if imported_module and imported_module != 'llama_cpp':
+ raise Exception(f"Cannot import 'llama_cpp' because '{imported_module}' is already imported. See issue #1575 in llama-cpp-python. Please restart the server before attempting to use a different version of llama-cpp-python.")
+ try:
+ return_lib = importlib.import_module('llama_cpp')
+ imported_module = 'llama_cpp'
+ except:
+ pass
+
+ if return_lib is not None:
+ monkey_patch_llama_cpp_python(return_lib)
+
+ return return_lib
def eval_with_progress(self, tokens: Sequence[int]):
@@ -63,7 +99,7 @@ def eval_with_progress(self, tokens: Sequence[int]):
self.n_tokens += n_tokens
-def monkey_patch_generate(lib):
+def monkey_patch_llama_cpp_python(lib):
def my_generate(self, *args, **kwargs):
@@ -77,11 +113,6 @@ def monkey_patch_generate(lib):
for output in self.original_generate(*args, **kwargs):
yield output
+ lib.Llama.eval = eval_with_progress
lib.Llama.original_generate = lib.Llama.generate
lib.Llama.generate = my_generate
-
-
-for lib in [llama_cpp, llama_cpp_cuda, llama_cpp_cuda_tensorcores]:
- if lib is not None:
- lib.Llama.eval = eval_with_progress
- monkey_patch_generate(lib)
diff --git a/modules/llamacpp_hf.py b/modules/llamacpp_hf.py
index 74af5fbf..327e3a7b 100644
--- a/modules/llamacpp_hf.py
+++ b/modules/llamacpp_hf.py
@@ -7,35 +7,10 @@ from torch.nn import CrossEntropyLoss
from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel
from transformers.modeling_outputs import CausalLMOutputWithPast
-from modules import llama_cpp_python_hijack, shared
+from modules import shared
+from modules.llama_cpp_python_hijack import llama_cpp_lib
from modules.logging_colors import logger
-try:
- import llama_cpp
-except:
- llama_cpp = None
-
-try:
- import llama_cpp_cuda
-except:
- llama_cpp_cuda = None
-
-try:
- import llama_cpp_cuda_tensorcores
-except:
- llama_cpp_cuda_tensorcores = None
-
-
-def llama_cpp_lib():
- if shared.args.cpu and llama_cpp is not None:
- return llama_cpp
- elif shared.args.tensorcores and llama_cpp_cuda_tensorcores is not None:
- return llama_cpp_cuda_tensorcores
- elif llama_cpp_cuda is not None:
- return llama_cpp_cuda
- else:
- return llama_cpp
-
class LlamacppHF(PreTrainedModel):
def __init__(self, model, path):
@@ -221,6 +196,13 @@ class LlamacppHF(PreTrainedModel):
'flash_attn': shared.args.flash_attn
}
+ if shared.args.cache_4bit:
+ params["type_k"] = 2
+ params["type_v"] = 2
+ elif shared.args.cache_8bit:
+ params["type_k"] = 8
+ params["type_v"] = 8
+
Llama = llama_cpp_lib().Llama
model = Llama(**params)
diff --git a/modules/llamacpp_model.py b/modules/llamacpp_model.py
index d62fd517..a16230ca 100644
--- a/modules/llamacpp_model.py
+++ b/modules/llamacpp_model.py
@@ -4,37 +4,12 @@ from functools import partial
import numpy as np
import torch
-from modules import llama_cpp_python_hijack, shared
+from modules import shared
from modules.callbacks import Iteratorize
+from modules.llama_cpp_python_hijack import llama_cpp_lib
from modules.logging_colors import logger
from modules.text_generation import get_max_prompt_length
-try:
- import llama_cpp
-except:
- llama_cpp = None
-
-try:
- import llama_cpp_cuda
-except:
- llama_cpp_cuda = None
-
-try:
- import llama_cpp_cuda_tensorcores
-except:
- llama_cpp_cuda_tensorcores = None
-
-
-def llama_cpp_lib():
- if shared.args.cpu and llama_cpp is not None:
- return llama_cpp
- elif shared.args.tensorcores and llama_cpp_cuda_tensorcores is not None:
- return llama_cpp_cuda_tensorcores
- elif llama_cpp_cuda is not None:
- return llama_cpp_cuda
- else:
- return llama_cpp
-
def ban_eos_logits_processor(eos_token, input_ids, logits):
logits[eos_token] = -float('inf')
@@ -100,6 +75,13 @@ class LlamaCppModel:
'flash_attn': shared.args.flash_attn
}
+ if shared.args.cache_4bit:
+ params["type_k"] = 2
+ params["type_v"] = 2
+ elif shared.args.cache_8bit:
+ params["type_k"] = 8
+ params["type_v"] = 8
+
result.model = Llama(**params)
if cache_capacity > 0:
result.model.set_cache(LlamaCache(capacity_bytes=cache_capacity))
diff --git a/modules/loaders.py b/modules/loaders.py
index 1da37595..78601c17 100644
--- a/modules/loaders.py
+++ b/modules/loaders.py
@@ -21,6 +21,7 @@ loaders_and_params = OrderedDict({
'trust_remote_code',
'no_use_fast',
'use_flash_attention_2',
+ 'use_eager_attention',
'alpha_value',
'compress_pos_emb',
'disable_exllama',
@@ -30,6 +31,8 @@ loaders_and_params = OrderedDict({
'llama.cpp': [
'n_ctx',
'n_gpu_layers',
+ 'cache_8bit',
+ 'cache_4bit',
'tensor_split',
'n_batch',
'threads',
@@ -51,6 +54,8 @@ loaders_and_params = OrderedDict({
'llamacpp_HF': [
'n_ctx',
'n_gpu_layers',
+ 'cache_8bit',
+ 'cache_4bit',
'tensor_split',
'n_batch',
'threads',
diff --git a/modules/models.py b/modules/models.py
index da741cb0..07c14308 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -146,6 +146,9 @@ def huggingface_loader(model_name):
if shared.args.force_safetensors:
params['force_safetensors'] = True
+ if shared.args.use_eager_attention:
+ params['attn_implementation'] = 'eager'
+
config = AutoConfig.from_pretrained(path_to_model, trust_remote_code=shared.args.trust_remote_code)
if 'chatglm' in model_name.lower():
diff --git a/modules/models_settings.py b/modules/models_settings.py
index 2e3fff9c..7ae68125 100644
--- a/modules/models_settings.py
+++ b/modules/models_settings.py
@@ -9,6 +9,8 @@ from modules import chat, loaders, metadata_gguf, shared, ui
def get_fallback_settings():
return {
+ 'bf16': False,
+ 'use_eager_attention': False,
'wbits': 'None',
'groupsize': 'None',
'desc_act': False,
@@ -97,10 +99,18 @@ def get_model_metadata(model):
elif 'attn_config' in metadata and 'rope_theta' in metadata['attn_config']:
model_settings['rope_freq_base'] = metadata['attn_config']['rope_theta']
- if 'rope_scaling' in metadata and type(metadata['rope_scaling']) is dict and all(key in metadata['rope_scaling'] for key in ('type', 'factor')):
+ if 'rope_scaling' in metadata and isinstance(metadata['rope_scaling'], dict) and all(key in metadata['rope_scaling'] for key in ('type', 'factor')):
if metadata['rope_scaling']['type'] == 'linear':
model_settings['compress_pos_emb'] = metadata['rope_scaling']['factor']
+ # For Gemma-2
+ if 'torch_dtype' in metadata and metadata['torch_dtype'] == 'bfloat16':
+ model_settings['bf16'] = True
+
+ # For Gemma-2
+ if 'architectures' in metadata and isinstance(metadata['architectures'], list) and 'Gemma2ForCausalLM' in metadata['architectures']:
+ model_settings['use_eager_attention'] = True
+
# Read GPTQ metadata for old GPTQ loaders
if 'quantization_config' in metadata and metadata['quantization_config'].get('quant_method', '') != 'exl2':
if 'bits' in metadata['quantization_config']:
@@ -133,7 +143,7 @@ def get_model_metadata(model):
for k in ['eos_token', 'bos_token']:
if k in metadata:
value = metadata[k]
- if type(value) is dict:
+ if isinstance(value, dict):
value = value['content']
template = template.replace(k, "'{}'".format(value))
@@ -168,7 +178,7 @@ def infer_loader(model_name, model_settings):
path_to_model = Path(f'{shared.args.model_dir}/{model_name}')
if not path_to_model.exists():
loader = None
- elif (path_to_model / 'quantize_config.json').exists() or ('wbits' in model_settings and type(model_settings['wbits']) is int and model_settings['wbits'] > 0):
+ elif (path_to_model / 'quantize_config.json').exists() or ('wbits' in model_settings and isinstance(model_settings['wbits'], int) and model_settings['wbits'] > 0):
loader = 'ExLlamav2_HF'
elif (path_to_model / 'quant_config.json').exists() or re.match(r'.*-awq', model_name.lower()):
loader = 'AutoAWQ'
diff --git a/modules/sampler_hijack.py b/modules/sampler_hijack.py
index ad74d658..9fb661ae 100644
--- a/modules/sampler_hijack.py
+++ b/modules/sampler_hijack.py
@@ -359,14 +359,14 @@ class RepetitionPenaltyLogitsProcessorWithRange(LogitsProcessor):
return scores
-def get_logits_warper_patch(self, generation_config):
+def get_logits_warper_patch(self, generation_config, **kwargs):
# Parameter sanitization
if isinstance(generation_config.temperature, int):
generation_config.temperature = float(generation_config.temperature) # Must be float
# Get the original warpers
- warpers = self._get_logits_warper_old(generation_config)
+ warpers = self._get_logits_warper_old(generation_config, **kwargs)
# Replace temperature with our modified class.
# Currently, it behaves identically to the original.
diff --git a/modules/shared.py b/modules/shared.py
index ebbfc268..e04c549a 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -106,6 +106,7 @@ group.add_argument('--trust-remote-code', action='store_true', help='Set trust_r
group.add_argument('--force-safetensors', action='store_true', help='Set use_safetensors=True while loading the model. This prevents arbitrary code execution.')
group.add_argument('--no_use_fast', action='store_true', help='Set use_fast=False while loading the tokenizer (it\'s True by default). Use this if you have any problems related to use_fast.')
group.add_argument('--use_flash_attention_2', action='store_true', help='Set use_flash_attention_2=True while loading the model.')
+group.add_argument('--use_eager_attention', action='store_true', help='Set attn_implementation= eager while loading the model.')
# bitsandbytes 4-bit
group = parser.add_argument_group('bitsandbytes 4-bit')
diff --git a/modules/ui.py b/modules/ui.py
index c20a7888..b1c1cf6d 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -43,6 +43,11 @@ theme = gr.themes.Default(
body_text_color_subdued='#484848',
background_fill_secondary='#eaeaea',
background_fill_primary='var(--neutral-50)',
+ body_background_fill="white",
+ block_background_fill="#f4f4f4",
+ body_text_color="#333",
+ button_secondary_background_fill="#f4f4f4",
+ button_secondary_border_color="var(--border-color-primary)"
)
if Path("notification.mp3").exists():
@@ -64,6 +69,7 @@ def list_model_elements():
'trust_remote_code',
'no_use_fast',
'use_flash_attention_2',
+ 'use_eager_attention',
'load_in_4bit',
'compute_dtype',
'quant_type',
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index 91951624..6942588e 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -87,16 +87,11 @@ def create_ui():
with gr.Row():
shared.gradio['mode'] = gr.Radio(choices=['chat', 'chat-instruct', 'instruct'], label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template selected under Parameters > Instruction template must match the current model.', elem_id='chat-mode')
- with gr.Row():
- shared.gradio['character_menu'] = gr.Dropdown(value=None, choices=utils.get_available_characters(), label='Character', elem_id='character-menu', elem_classes='slim-dropdown')
- shared.gradio['refresh_character'] = ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button', interactive=not mu)
- shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
-
with gr.Row():
shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct')
with gr.Row():
- shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=16, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=False, elem_classes=['add_scrollbar'])
+ shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=12, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=False, elem_classes=['add_scrollbar'])
def create_chat_settings_ui():
@@ -105,10 +100,15 @@ def create_chat_settings_ui():
with gr.Row():
with gr.Column(scale=8):
with gr.Tab("Character"):
+ with gr.Row():
+ shared.gradio['character_menu'] = gr.Dropdown(value=None, choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown')
+ ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button', interactive=not mu)
+ shared.gradio['save_character'] = gr.Button('💾', elem_classes='refresh-button', elem_id="save-character", interactive=not mu)
+ shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
+
shared.gradio['name2'] = gr.Textbox(value='', lines=1, label='Character\'s name')
shared.gradio['context'] = gr.Textbox(value='', lines=10, label='Context', elem_classes=['add_scrollbar'])
shared.gradio['greeting'] = gr.Textbox(value='', lines=5, label='Greeting', elem_classes=['add_scrollbar'])
- shared.gradio['save_character'] = gr.Button('Save character', elem_classes=['small-button'], interactive=not mu)
with gr.Tab("User"):
shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Name')
@@ -300,8 +300,10 @@ def create_event_handlers():
lambda x: gr.update(choices=(histories := chat.find_all_histories_with_first_prompts(x)), value=histories[0][1]), gradio('interface_state'), gradio('unique_id'), show_progress=False).then(
None, None, None, js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}')
+ shared.gradio['mode'].change(None, gradio('mode'), None, js="(mode) => {mode === 'instruct' ? document.getElementById('character-menu').parentNode.parentNode.style.display = 'none' : document.getElementById('character-menu').parentNode.parentNode.style.display = ''}")
+
shared.gradio['mode'].change(
- lambda x: [gr.update(visible=(x != 'instruct'))] * 4 + [gr.update(visible=(x == 'chat-instruct'))], gradio('mode'), gradio('character_menu', 'refresh_character', 'delete_character', 'chat_style', 'chat-instruct_command'), show_progress=False).then(
+ lambda x: [gr.update(visible=x != 'instruct'), gr.update(visible=x == 'chat-instruct')], gradio('mode'), gradio('chat_style', 'chat-instruct_command'), show_progress=False).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.load_latest_history, gradio('interface_state'), gradio('history')).then(
chat.redraw_html, gradio(reload_arr), gradio('display')).then(
diff --git a/modules/ui_default.py b/modules/ui_default.py
index bf9800f6..e3bfe784 100644
--- a/modules/ui_default.py
+++ b/modules/ui_default.py
@@ -16,7 +16,6 @@ outputs = ('output_textbox', 'html-default')
def create_ui():
mu = shared.args.multi_user
with gr.Tab('Default', elem_id='default-tab'):
- shared.gradio['last_input-default'] = gr.State('')
with gr.Row():
with gr.Column():
with gr.Row():
@@ -63,14 +62,12 @@ def create_ui():
def create_event_handlers():
shared.gradio['Generate-default'].click(
- lambda x: x, gradio('textbox-default'), gradio('last_input-default')).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['textbox-default'].submit(
- lambda x: x, gradio('textbox-default'), gradio('last_input-default')).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index 3ebcd126..9a4e7351 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -115,6 +115,7 @@ def create_ui():
shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit)
shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant)
shared.gradio['use_flash_attention_2'] = gr.Checkbox(label="use_flash_attention_2", value=shared.args.use_flash_attention_2, info='Set use_flash_attention_2=True while loading the model.')
+ shared.gradio['use_eager_attention'] = gr.Checkbox(label="use_eager_attention", value=shared.args.use_eager_attention, info='Set attn_implementation= eager while loading the model.')
shared.gradio['flash_attn'] = gr.Checkbox(label="flash_attn", value=shared.args.flash_attn, info='Use flash-attention.')
shared.gradio['auto_devices'] = gr.Checkbox(label="auto-devices", value=shared.args.auto_devices)
shared.gradio['tensorcores'] = gr.Checkbox(label="tensorcores", value=shared.args.tensorcores, info='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards.')
diff --git a/requirements.txt b/requirements.txt
index fb35c7d8..22ae8bed 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
-accelerate==0.30.*
-aqlm[gpu,cpu]==1.1.5; platform_system == "Linux"
+accelerate==0.31.*
+aqlm[gpu,cpu]==1.1.6; platform_system == "Linux"
auto-gptq==0.7.1
bitsandbytes==0.43.*
colorama
@@ -7,7 +7,7 @@ datasets
einops
gradio==4.26.*
hqq==0.1.7.post3
-jinja2==3.1.2
+jinja2==3.1.4
lm_eval==0.3.0
markdown
numba==0.59.*
@@ -24,7 +24,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
-transformers==4.41.*
+transformers==4.42.*
tqdm
wandb
@@ -35,22 +35,22 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# llama-cpp-python (CUDA, no tensor cores)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# llama-cpp-python (CUDA, tensor cores)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# CUDA wheels
https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements_amd.txt b/requirements_amd.txt
index 5bb68522..464a09f4 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -1,10 +1,10 @@
-accelerate==0.30.*
+accelerate==0.31.*
colorama
datasets
einops
gradio==4.26.*
hqq==0.1.7.post3
-jinja2==3.1.2
+jinja2==3.1.4
lm_eval==0.3.0
markdown
numba==0.59.*
@@ -21,7 +21,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
-transformers==4.41.*
+transformers==4.42.*
tqdm
wandb
@@ -32,14 +32,14 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# AMD wheels
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.79+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.79+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.81+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.81+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index eee2c662..9f700b4b 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -1,10 +1,10 @@
-accelerate==0.30.*
+accelerate==0.31.*
colorama
datasets
einops
gradio==4.26.*
hqq==0.1.7.post3
-jinja2==3.1.2
+jinja2==3.1.4
lm_eval==0.3.0
markdown
numba==0.59.*
@@ -21,7 +21,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
-transformers==4.41.*
+transformers==4.42.*
tqdm
wandb
@@ -32,10 +32,10 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# AMD wheels
https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index 61e3c47c..ef9a6d61 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -1,10 +1,10 @@
-accelerate==0.30.*
+accelerate==0.31.*
colorama
datasets
einops
gradio==4.26.*
hqq==0.1.7.post3
-jinja2==3.1.2
+jinja2==3.1.4
lm_eval==0.3.0
markdown
numba==0.59.*
@@ -21,7 +21,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
-transformers==4.41.*
+transformers==4.42.*
tqdm
wandb
@@ -32,10 +32,8 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6-py3-none-any.whl
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index b9497470..0e4574ee 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -1,10 +1,10 @@
-accelerate==0.30.*
+accelerate==0.31.*
colorama
datasets
einops
gradio==4.26.*
hqq==0.1.7.post3
-jinja2==3.1.2
+jinja2==3.1.4
lm_eval==0.3.0
markdown
numba==0.59.*
@@ -21,7 +21,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
-transformers==4.41.*
+transformers==4.42.*
tqdm
wandb
@@ -32,12 +32,10 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6-py3-none-any.whl
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index ad38f23d..37a2d102 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -1,10 +1,10 @@
-accelerate==0.30.*
+accelerate==0.31.*
colorama
datasets
einops
gradio==4.26.*
hqq==0.1.7.post3
-jinja2==3.1.2
+jinja2==3.1.4
lm_eval==0.3.0
markdown
numba==0.59.*
@@ -21,7 +21,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
-transformers==4.41.*
+transformers==4.42.*
tqdm
wandb
@@ -32,7 +32,7 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index 0debda88..d2cc0cbe 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -1,10 +1,10 @@
-accelerate==0.30.*
+accelerate==0.31.*
colorama
datasets
einops
gradio==4.26.*
hqq==0.1.7.post3
-jinja2==3.1.2
+jinja2==3.1.4
lm_eval==0.3.0
markdown
numba==0.59.*
@@ -21,7 +21,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
-transformers==4.41.*
+transformers==4.42.*
tqdm
wandb
@@ -32,7 +32,7 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 78342a2e..6742f2e5 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -1,5 +1,5 @@
-accelerate==0.30.*
-aqlm[gpu,cpu]==1.1.5; platform_system == "Linux"
+accelerate==0.31.*
+aqlm[gpu,cpu]==1.1.6; platform_system == "Linux"
auto-gptq==0.7.1
bitsandbytes==0.43.*
colorama
@@ -7,7 +7,7 @@ datasets
einops
gradio==4.26.*
hqq==0.1.7.post3
-jinja2==3.1.2
+jinja2==3.1.4
lm_eval==0.3.0
markdown
numba==0.59.*
@@ -24,7 +24,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
-transformers==4.41.*
+transformers==4.42.*
tqdm
wandb
@@ -35,22 +35,22 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# llama-cpp-python (CUDA, no tensor cores)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# llama-cpp-python (CUDA, tensor cores)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# CUDA wheels
https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt
index e96d468f..21025a62 100644
--- a/requirements_nowheels.txt
+++ b/requirements_nowheels.txt
@@ -1,10 +1,10 @@
-accelerate==0.30.*
+accelerate==0.31.*
colorama
datasets
einops
gradio==4.26.*
hqq==0.1.7.post3
-jinja2==3.1.2
+jinja2==3.1.4
lm_eval==0.3.0
markdown
numba==0.59.*
@@ -21,7 +21,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
-transformers==4.41.*
+transformers==4.42.*
tqdm
wandb