diff --git a/css/html_instruct_style.css b/css/html_instruct_style.css index bdf68aad..8a31d6e2 100644 --- a/css/html_instruct_style.css +++ b/css/html_instruct_style.css @@ -49,7 +49,7 @@ .gradio-container .chat .assistant-message { padding: 20px; - background: var(--color-grey-200); + background: #f4f4f4; margin-top: 9px !important; margin-bottom: 12px !important; border-radius: 7px; diff --git a/css/main.css b/css/main.css index 498b3c6c..5768348e 100644 --- a/css/main.css +++ b/css/main.css @@ -95,7 +95,7 @@ gradio-app > :first-child { } .header_bar { - background-color: #f7f7f7; + background-color: #f4f4f4; box-shadow: 0 0 3px rgba(22 22 22 / 35%); margin-bottom: 0; overflow-x: scroll; @@ -336,6 +336,11 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { padding-left: 0; padding-right: 0; } + + .chat { + padding-left: 0; + padding-right: 0; + } } .chat { @@ -391,7 +396,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { .chat .message:last-child { margin-bottom: 0 !important; - padding-bottom: 0 !important; + padding-bottom: 15px !important; } .message-body li { @@ -510,7 +515,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { #show-controls { position: absolute; height: 100%; - background-color: var(--background-fill-primary); + background-color: transparent; border: 0 !important; border-radius: 0; } diff --git a/extensions/sd_api_pictures/script.py b/extensions/sd_api_pictures/script.py index a16575cd..3a31771a 100644 --- a/extensions/sd_api_pictures/script.py +++ b/extensions/sd_api_pictures/script.py @@ -33,7 +33,7 @@ params = { 'hr_upscaler': 'ESRGAN_4x', 'hr_scale': '1.0', 'seed': -1, - 'sampler_name': 'DPM++ 2M Karras', + 'sampler_name': 'DPM++ 2M', 'steps': 32, 'cfg_scale': 7, 'textgen_prefix': 'Please provide a detailed and vivid description of [subject]', diff --git a/extensions/whisper_stt/script.js b/extensions/whisper_stt/script.js new file mode 100644 index 00000000..c4a908b5 --- /dev/null +++ b/extensions/whisper_stt/script.js @@ -0,0 +1,86 @@ +console.log("Whisper STT script loaded"); + +let mediaRecorder; +let audioChunks = []; +let isRecording = false; + +window.startStopRecording = function() { + if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) { + console.error("getUserMedia not supported on your browser!"); + return; + } + + if (isRecording == false) { + //console.log("Start recording function called"); + navigator.mediaDevices.getUserMedia({ audio: true }) + .then(stream => { + //console.log("Got audio stream"); + mediaRecorder = new MediaRecorder(stream); + audioChunks = []; // Reset audio chunks + mediaRecorder.start(); + //console.log("MediaRecorder started"); + recButton.icon; + recordButton.innerHTML = recButton.innerHTML = "Stop"; + isRecording = true; + + mediaRecorder.addEventListener("dataavailable", event => { + //console.log("Data available event, data size: ", event.data.size); + audioChunks.push(event.data); + }); + + mediaRecorder.addEventListener("stop", () => { + //console.log("MediaRecorder stopped"); + if (audioChunks.length > 0) { + const audioBlob = new Blob(audioChunks, { type: "audio/webm" }); + //console.log("Audio blob created, size: ", audioBlob.size); + const reader = new FileReader(); + reader.readAsDataURL(audioBlob); + reader.onloadend = function() { + const base64data = reader.result; + //console.log("Audio converted to base64, length: ", base64data.length); + + const audioBase64Input = document.querySelector("#audio-base64 textarea"); + if (audioBase64Input) { + audioBase64Input.value = base64data; + audioBase64Input.dispatchEvent(new Event("input", { bubbles: true })); + audioBase64Input.dispatchEvent(new Event("change", { bubbles: true })); + //console.log("Updated textarea with base64 data"); + } else { + console.error("Could not find audio-base64 textarea"); + } + }; + } else { + console.error("No audio data recorded for Whisper"); + } + }); + }); + } else { + //console.log("Stopping MediaRecorder"); + recordButton.innerHTML = recButton.innerHTML = "Rec."; + isRecording = false; + mediaRecorder.stop(); + } +}; + +const recordButton = gradioApp().querySelector("#record-button"); +recordButton.addEventListener("click", window.startStopRecording); + + +function gradioApp() { + const elems = document.getElementsByTagName("gradio-app"); + const gradioShadowRoot = elems.length == 0 ? null : elems[0].shadowRoot; + return gradioShadowRoot ? gradioShadowRoot : document; +} + + +// extra rec button next to generate button +var recButton = recordButton.cloneNode(true); +var generate_button = document.getElementById("Generate"); +generate_button.insertAdjacentElement("afterend", recButton); + +recButton.style.setProperty("margin-left", "-10px"); +recButton.innerHTML = "Rec."; + +recButton.addEventListener("click", function() { + recordButton.click(); +}); diff --git a/extensions/whisper_stt/script.py b/extensions/whisper_stt/script.py index efc62f41..e45c8b1e 100644 --- a/extensions/whisper_stt/script.py +++ b/extensions/whisper_stt/script.py @@ -1,5 +1,13 @@ +import base64 +import gc +import io +from pathlib import Path + import gradio as gr -import speech_recognition as sr +import numpy as np +import torch +import whisper +from pydub import AudioSegment from modules import shared @@ -8,13 +16,16 @@ input_hijack = { 'value': ["", ""] } -# parameters which can be customized in settings.json of webui +# parameters which can be customized in settings.yaml of webui params = { 'whipser_language': 'english', 'whipser_model': 'small.en', 'auto_submit': True } +startup_device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') +WHISPERMODEL = whisper.load_model(params['whipser_model'], device=startup_device) + def chat_input_modifier(text, visible_text, state): global input_hijack @@ -25,47 +36,84 @@ def chat_input_modifier(text, visible_text, state): return text, visible_text -def do_stt(audio, whipser_model, whipser_language): - transcription = "" - r = sr.Recognizer() +def do_stt(audio, whipser_language): + # use pydub to convert sample_rate and sample_width for whisper input + dubaudio = AudioSegment.from_file(io.BytesIO(audio)) + dubaudio = dubaudio.set_channels(1) + dubaudio = dubaudio.set_frame_rate(16000) + dubaudio = dubaudio.set_sample_width(2) - # Convert to AudioData - audio_data = sr.AudioData(sample_rate=audio[0], frame_data=audio[1], sample_width=4) + # same method to get the array as openai whisper repo used from wav file + audio_np = np.frombuffer(dubaudio.raw_data, np.int16).flatten().astype(np.float32) / 32768.0 - try: - transcription = r.recognize_whisper(audio_data, language=whipser_language, model=whipser_model) - except sr.UnknownValueError: - print("Whisper could not understand audio") - except sr.RequestError as e: - print("Could not request results from Whisper", e) + if len(whipser_language) == 0: + result = WHISPERMODEL.transcribe(audio=audio_np) + else: + result = WHISPERMODEL.transcribe(audio=audio_np, language=whipser_language) + return result["text"] + +def auto_transcribe(audio, auto_submit, whipser_language): + if audio is None or audio == "": + print("Whisper received no audio data") + return "", "" + audio_bytes = base64.b64decode(audio.split(',')[1]) + + transcription = do_stt(audio_bytes, whipser_language) + if auto_submit: + input_hijack.update({"state": True, "value": [transcription, transcription]}) return transcription -def auto_transcribe(audio, auto_submit, whipser_model, whipser_language): - if audio is None: - return "", "" - transcription = do_stt(audio, whipser_model, whipser_language) - if auto_submit: - input_hijack.update({"state": True, "value": [transcription, transcription]}) +def reload_whispermodel(whisper_model_name: str, whisper_language: str, device: str): + if len(whisper_model_name) > 0: + global WHISPERMODEL + WHISPERMODEL = None + if torch.cuda.is_available(): + torch.cuda.empty_cache() + gc.collect() - return transcription, None + if device != "none": + if device == "cuda": + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + + WHISPERMODEL = whisper.load_model(whisper_model_name, device=device) + params.update({"whipser_model": whisper_model_name}) + if ".en" in whisper_model_name: + whisper_language = "english" + audio_update = gr.Audio.update(interactive=True) + else: + audio_update = gr.Audio.update(interactive=False) + return [whisper_model_name, whisper_language, str(device), audio_update] def ui(): with gr.Accordion("Whisper STT", open=True): with gr.Row(): - audio = gr.Audio(source="microphone") + audio = gr.Textbox(elem_id="audio-base64", visible=False) + record_button = gr.Button("Rec.", elem_id="record-button", elem_classes="custom-button") with gr.Row(): with gr.Accordion("Settings", open=False): auto_submit = gr.Checkbox(label='Submit the transcribed audio automatically', value=params['auto_submit']) - whipser_model = gr.Dropdown(label='Whisper Model', value=params['whipser_model'], choices=["tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "large"]) - whipser_language = gr.Dropdown(label='Whisper Language', value=params['whipser_language'], choices=["chinese", "german", "spanish", "russian", "korean", "french", "japanese", "portuguese", "turkish", "polish", "catalan", "dutch", "arabic", "swedish", "italian", "indonesian", "hindi", "finnish", "vietnamese", "hebrew", "ukrainian", "greek", "malay", "czech", "romanian", "danish", "hungarian", "tamil", "norwegian", "thai", "urdu", "croatian", "bulgarian", "lithuanian", "latin", "maori", "malayalam", "welsh", "slovak", "telugu", "persian", "latvian", "bengali", "serbian", "azerbaijani", "slovenian", "kannada", "estonian", "macedonian", "breton", "basque", "icelandic", "armenian", "nepali", "mongolian", "bosnian", "kazakh", "albanian", "swahili", "galician", "marathi", "punjabi", "sinhala", "khmer", "shona", "yoruba", "somali", "afrikaans", "occitan", "georgian", "belarusian", "tajik", "sindhi", "gujarati", "amharic", "yiddish", "lao", "uzbek", "faroese", "haitian creole", "pashto", "turkmen", "nynorsk", "maltese", "sanskrit", "luxembourgish", "myanmar", "tibetan", "tagalog", "malagasy", "assamese", "tatar", "hawaiian", "lingala", "hausa", "bashkir", "javanese", "sundanese"]) + device_dropd = gr.Dropdown(label='Device', value=str(startup_device), choices=["cuda", "cpu", "none"]) + whisper_model_dropd = gr.Dropdown(label='Whisper Model', value=params['whipser_model'], choices=["tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "large"]) + whisper_language = gr.Dropdown(label='Whisper Language', value=params['whipser_language'], choices=["english", "chinese", "german", "spanish", "russian", "korean", "french", "japanese", "portuguese", "turkish", "polish", "catalan", "dutch", "arabic", "swedish", "italian", "indonesian", "hindi", "finnish", "vietnamese", "hebrew", "ukrainian", "greek", "malay", "czech", "romanian", "danish", "hungarian", "tamil", "norwegian", "thai", "urdu", "croatian", "bulgarian", "lithuanian", "latin", "maori", "malayalam", "welsh", "slovak", "telugu", "persian", "latvian", "bengali", "serbian", "azerbaijani", "slovenian", "kannada", "estonian", "macedonian", "breton", "basque", "icelandic", "armenian", "nepali", "mongolian", "bosnian", "kazakh", "albanian", "swahili", "galician", "marathi", "punjabi", "sinhala", "khmer", "shona", "yoruba", "somali", "afrikaans", "occitan", "georgian", "belarusian", "tajik", "sindhi", "gujarati", "amharic", "yiddish", "lao", "uzbek", "faroese", "haitian creole", "pashto", "turkmen", "nynorsk", "maltese", "sanskrit", "luxembourgish", "myanmar", "tibetan", "tagalog", "malagasy", "assamese", "tatar", "hawaiian", "lingala", "hausa", "bashkir", "javanese", "sundanese"]) - audio.stop_recording( - auto_transcribe, [audio, auto_submit, whipser_model, whipser_language], [shared.gradio['textbox'], audio]).then( - None, auto_submit, None, js="(check) => {if (check) { document.getElementById('Generate').click() }}") + audio.change( + auto_transcribe, [audio, auto_submit, whisper_language], [shared.gradio['textbox']]).then( + None, auto_submit, None, _js="(check) => {if (check) { document.getElementById('Generate').click() }}") - whipser_model.change(lambda x: params.update({"whipser_model": x}), whipser_model, None) - whipser_language.change(lambda x: params.update({"whipser_language": x}), whipser_language, None) + device_dropd.input(reload_whispermodel, [whisper_model_dropd, whisper_language, device_dropd], [whisper_model_dropd, whisper_language, device_dropd, audio]) + whisper_model_dropd.change(reload_whispermodel, [whisper_model_dropd, whisper_language, device_dropd], [whisper_model_dropd, whisper_language, device_dropd, audio]) + whisper_language.change(lambda x: params.update({"whipser_language": x}), whisper_language, None) auto_submit.change(lambda x: params.update({"auto_submit": x}), auto_submit, None) + + +def custom_js(): + """ + Returns custom javascript as a string. It is applied whenever the web UI is + loaded. + :return: + """ + with open(Path(__file__).parent.resolve() / "script.js", "r") as f: + return f.read() diff --git a/js/main.js b/js/main.js index e9a980e2..6b456517 100644 --- a/js/main.js +++ b/js/main.js @@ -7,30 +7,30 @@ main_parent.parentNode.style = "gap: 0"; main_parent.parentNode.parentNode.style = "padding: 0"; document.querySelector(".header_bar").addEventListener("click", function(event) { - if (event.target.tagName === "BUTTON") { - const buttonText = event.target.textContent.trim(); + if (event.target.tagName !== "BUTTON") return; - let chat_visible = (buttonText == "Chat"); - let default_visible = (buttonText == "Default"); - let notebook_visible = (buttonText == "Notebook"); + const buttonText = event.target.textContent.trim(); + const extensionsVisible = ["Chat", "Default", "Notebook"].includes(buttonText); + const chatVisible = buttonText === "Chat"; + const showControlsChecked = document.querySelector("#show-controls input").checked; + const extensions = document.querySelector("#extensions"); - // Check if one of the generation tabs is visible - if (chat_visible || notebook_visible || default_visible) { - extensions && (extensions.style.display = "flex"); - - if (chat_visible) { - this.style.marginBottom = "0px"; - extensions && (extensions.style.maxWidth = "880px"); - extensions && (extensions.style.padding = "0px"); - } else { - this.style.marginBottom = "19px"; - extensions && (extensions.style.maxWidth = "none"); - extensions && (extensions.style.padding = "15px"); - } - } else { - this.style.marginBottom = "19px"; - extensions && (extensions.style.display = "none"); + if (extensionsVisible) { + if (extensions) { + extensions.style.display = "flex"; + extensions.style.maxWidth = chatVisible ? "880px" : "none"; + extensions.style.padding = chatVisible ? "0px" : "15px"; } + this.style.marginBottom = chatVisible ? "0px" : "19px"; + + if (chatVisible && !showControlsChecked) { + document.querySelectorAll("#chat-tab > div > :nth-child(n+2), #extensions").forEach(element => { + element.style.display = "none"; + }); + } + } else { + this.style.marginBottom = "19px"; + if (extensions) extensions.style.display = "none"; } }); @@ -539,3 +539,64 @@ document.querySelectorAll(".focus-on-chat-input").forEach(element => { // Fix a border around the "past chats" menu //------------------------------------------------ document.getElementById("past-chats").parentNode.style.borderRadius = "0px"; + +//------------------------------------------------ +// Allow the character dropdown to coexist at the +// Chat tab and the Parameters > Character tab +//------------------------------------------------ + +const headerBar = document.querySelector(".header_bar"); +let originalParent; +let originalIndex; // To keep track of the original position +let movedElement; + +function moveToChatTab() { + const characterMenu = document.getElementById("character-menu"); + const grandParent = characterMenu.parentElement.parentElement; + + // Save the initial location for the character dropdown + if (!originalParent) { + originalParent = grandParent.parentElement; + originalIndex = Array.from(originalParent.children).indexOf(grandParent); + movedElement = grandParent; + } + + // Do not show the Character dropdown in the Chat tab when "instruct" mode is selected + const instructRadio = document.querySelector("#chat-mode input[value=\"instruct\"]"); + if (instructRadio && instructRadio.checked) { + grandParent.style.display = "none"; + } + + const chatControlsFirstChild = document.querySelector("#chat-controls").firstElementChild; + const newParent = chatControlsFirstChild; + let newPosition = newParent.children.length - 2; + + newParent.insertBefore(grandParent, newParent.children[newPosition]); + document.getElementById("save-character").style.display = "none"; +} + +function restoreOriginalPosition() { + if (originalParent && movedElement) { + if (originalIndex >= originalParent.children.length) { + originalParent.appendChild(movedElement); + } else { + originalParent.insertBefore(movedElement, originalParent.children[originalIndex]); + } + + document.getElementById("save-character").style.display = ""; + movedElement.style.display = ""; + } +} + +headerBar.addEventListener("click", (e) => { + if (e.target.tagName === "BUTTON") { + const tabName = e.target.textContent.trim(); + if (tabName === "Chat") { + moveToChatTab(); + } else { + restoreOriginalPosition(); + } + } +}); + +moveToChatTab(); diff --git a/modules/chat.py b/modules/chat.py index 5d2bdd63..920c0f7b 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -3,6 +3,7 @@ import copy import functools import html import json +import pprint import re from datetime import datetime from functools import partial @@ -259,10 +260,27 @@ def get_stopping_strings(state): suffix_bot + prefix_user, ] + # Try to find the EOT token + for item in stopping_strings.copy(): + item = item.strip() + if item.startswith("<") and ">" in item: + stopping_strings.append(item.split(">")[0] + ">") + elif item.startswith("[") and "]" in item: + stopping_strings.append(item.split("]")[0] + "]") + if 'stopping_strings' in state and isinstance(state['stopping_strings'], list): stopping_strings += state.pop('stopping_strings') - return list(set(stopping_strings)) + # Remove redundant items that start with another item + result = [item for item in stopping_strings if not any(item.startswith(other) and item != other for other in stopping_strings)] + result = list(set(result)) + + if shared.args.verbose: + logger.info("STOPPING_STRINGS=") + pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(result) + print() + + return result def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_message=True, for_ui=False): diff --git a/modules/llama_cpp_python_hijack.py b/modules/llama_cpp_python_hijack.py index eb23177f..f3f3f560 100644 --- a/modules/llama_cpp_python_hijack.py +++ b/modules/llama_cpp_python_hijack.py @@ -1,3 +1,4 @@ +import importlib from typing import Sequence from tqdm import tqdm @@ -5,20 +6,55 @@ from tqdm import tqdm from modules import shared from modules.cache_utils import process_llamacpp_cache -try: - import llama_cpp -except: - llama_cpp = None -try: - import llama_cpp_cuda -except: - llama_cpp_cuda = None +imported_module = None -try: - import llama_cpp_cuda_tensorcores -except: - llama_cpp_cuda_tensorcores = None + +def llama_cpp_lib(): + global imported_module + + return_lib = None + + if shared.args.cpu: + if imported_module and imported_module != 'llama_cpp': + raise Exception(f"Cannot import 'llama_cpp' because '{imported_module}' is already imported. See issue #1575 in llama-cpp-python. Please restart the server before attempting to use a different version of llama-cpp-python.") + try: + return_lib = importlib.import_module('llama_cpp') + imported_module = 'llama_cpp' + except: + pass + + if shared.args.tensorcores and return_lib is None: + if imported_module and imported_module != 'llama_cpp_cuda_tensorcores': + raise Exception(f"Cannot import 'llama_cpp_cuda_tensorcores' because '{imported_module}' is already imported. See issue #1575 in llama-cpp-python. Please restart the server before attempting to use a different version of llama-cpp-python.") + try: + return_lib = importlib.import_module('llama_cpp_cuda_tensorcores') + imported_module = 'llama_cpp_cuda_tensorcores' + except: + pass + + if return_lib is None: + if imported_module and imported_module != 'llama_cpp_cuda': + raise Exception(f"Cannot import 'llama_cpp_cuda' because '{imported_module}' is already imported. See issue #1575 in llama-cpp-python. Please restart the server before attempting to use a different version of llama-cpp-python.") + try: + return_lib = importlib.import_module('llama_cpp_cuda') + imported_module = 'llama_cpp_cuda' + except: + pass + + if return_lib is None and not shared.args.cpu: + if imported_module and imported_module != 'llama_cpp': + raise Exception(f"Cannot import 'llama_cpp' because '{imported_module}' is already imported. See issue #1575 in llama-cpp-python. Please restart the server before attempting to use a different version of llama-cpp-python.") + try: + return_lib = importlib.import_module('llama_cpp') + imported_module = 'llama_cpp' + except: + pass + + if return_lib is not None: + monkey_patch_llama_cpp_python(return_lib) + + return return_lib def eval_with_progress(self, tokens: Sequence[int]): @@ -63,7 +99,7 @@ def eval_with_progress(self, tokens: Sequence[int]): self.n_tokens += n_tokens -def monkey_patch_generate(lib): +def monkey_patch_llama_cpp_python(lib): def my_generate(self, *args, **kwargs): @@ -77,11 +113,6 @@ def monkey_patch_generate(lib): for output in self.original_generate(*args, **kwargs): yield output + lib.Llama.eval = eval_with_progress lib.Llama.original_generate = lib.Llama.generate lib.Llama.generate = my_generate - - -for lib in [llama_cpp, llama_cpp_cuda, llama_cpp_cuda_tensorcores]: - if lib is not None: - lib.Llama.eval = eval_with_progress - monkey_patch_generate(lib) diff --git a/modules/llamacpp_hf.py b/modules/llamacpp_hf.py index 74af5fbf..327e3a7b 100644 --- a/modules/llamacpp_hf.py +++ b/modules/llamacpp_hf.py @@ -7,35 +7,10 @@ from torch.nn import CrossEntropyLoss from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel from transformers.modeling_outputs import CausalLMOutputWithPast -from modules import llama_cpp_python_hijack, shared +from modules import shared +from modules.llama_cpp_python_hijack import llama_cpp_lib from modules.logging_colors import logger -try: - import llama_cpp -except: - llama_cpp = None - -try: - import llama_cpp_cuda -except: - llama_cpp_cuda = None - -try: - import llama_cpp_cuda_tensorcores -except: - llama_cpp_cuda_tensorcores = None - - -def llama_cpp_lib(): - if shared.args.cpu and llama_cpp is not None: - return llama_cpp - elif shared.args.tensorcores and llama_cpp_cuda_tensorcores is not None: - return llama_cpp_cuda_tensorcores - elif llama_cpp_cuda is not None: - return llama_cpp_cuda - else: - return llama_cpp - class LlamacppHF(PreTrainedModel): def __init__(self, model, path): @@ -221,6 +196,13 @@ class LlamacppHF(PreTrainedModel): 'flash_attn': shared.args.flash_attn } + if shared.args.cache_4bit: + params["type_k"] = 2 + params["type_v"] = 2 + elif shared.args.cache_8bit: + params["type_k"] = 8 + params["type_v"] = 8 + Llama = llama_cpp_lib().Llama model = Llama(**params) diff --git a/modules/llamacpp_model.py b/modules/llamacpp_model.py index d62fd517..a16230ca 100644 --- a/modules/llamacpp_model.py +++ b/modules/llamacpp_model.py @@ -4,37 +4,12 @@ from functools import partial import numpy as np import torch -from modules import llama_cpp_python_hijack, shared +from modules import shared from modules.callbacks import Iteratorize +from modules.llama_cpp_python_hijack import llama_cpp_lib from modules.logging_colors import logger from modules.text_generation import get_max_prompt_length -try: - import llama_cpp -except: - llama_cpp = None - -try: - import llama_cpp_cuda -except: - llama_cpp_cuda = None - -try: - import llama_cpp_cuda_tensorcores -except: - llama_cpp_cuda_tensorcores = None - - -def llama_cpp_lib(): - if shared.args.cpu and llama_cpp is not None: - return llama_cpp - elif shared.args.tensorcores and llama_cpp_cuda_tensorcores is not None: - return llama_cpp_cuda_tensorcores - elif llama_cpp_cuda is not None: - return llama_cpp_cuda - else: - return llama_cpp - def ban_eos_logits_processor(eos_token, input_ids, logits): logits[eos_token] = -float('inf') @@ -100,6 +75,13 @@ class LlamaCppModel: 'flash_attn': shared.args.flash_attn } + if shared.args.cache_4bit: + params["type_k"] = 2 + params["type_v"] = 2 + elif shared.args.cache_8bit: + params["type_k"] = 8 + params["type_v"] = 8 + result.model = Llama(**params) if cache_capacity > 0: result.model.set_cache(LlamaCache(capacity_bytes=cache_capacity)) diff --git a/modules/loaders.py b/modules/loaders.py index 1da37595..78601c17 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -21,6 +21,7 @@ loaders_and_params = OrderedDict({ 'trust_remote_code', 'no_use_fast', 'use_flash_attention_2', + 'use_eager_attention', 'alpha_value', 'compress_pos_emb', 'disable_exllama', @@ -30,6 +31,8 @@ loaders_and_params = OrderedDict({ 'llama.cpp': [ 'n_ctx', 'n_gpu_layers', + 'cache_8bit', + 'cache_4bit', 'tensor_split', 'n_batch', 'threads', @@ -51,6 +54,8 @@ loaders_and_params = OrderedDict({ 'llamacpp_HF': [ 'n_ctx', 'n_gpu_layers', + 'cache_8bit', + 'cache_4bit', 'tensor_split', 'n_batch', 'threads', diff --git a/modules/models.py b/modules/models.py index da741cb0..07c14308 100644 --- a/modules/models.py +++ b/modules/models.py @@ -146,6 +146,9 @@ def huggingface_loader(model_name): if shared.args.force_safetensors: params['force_safetensors'] = True + if shared.args.use_eager_attention: + params['attn_implementation'] = 'eager' + config = AutoConfig.from_pretrained(path_to_model, trust_remote_code=shared.args.trust_remote_code) if 'chatglm' in model_name.lower(): diff --git a/modules/models_settings.py b/modules/models_settings.py index 2e3fff9c..7ae68125 100644 --- a/modules/models_settings.py +++ b/modules/models_settings.py @@ -9,6 +9,8 @@ from modules import chat, loaders, metadata_gguf, shared, ui def get_fallback_settings(): return { + 'bf16': False, + 'use_eager_attention': False, 'wbits': 'None', 'groupsize': 'None', 'desc_act': False, @@ -97,10 +99,18 @@ def get_model_metadata(model): elif 'attn_config' in metadata and 'rope_theta' in metadata['attn_config']: model_settings['rope_freq_base'] = metadata['attn_config']['rope_theta'] - if 'rope_scaling' in metadata and type(metadata['rope_scaling']) is dict and all(key in metadata['rope_scaling'] for key in ('type', 'factor')): + if 'rope_scaling' in metadata and isinstance(metadata['rope_scaling'], dict) and all(key in metadata['rope_scaling'] for key in ('type', 'factor')): if metadata['rope_scaling']['type'] == 'linear': model_settings['compress_pos_emb'] = metadata['rope_scaling']['factor'] + # For Gemma-2 + if 'torch_dtype' in metadata and metadata['torch_dtype'] == 'bfloat16': + model_settings['bf16'] = True + + # For Gemma-2 + if 'architectures' in metadata and isinstance(metadata['architectures'], list) and 'Gemma2ForCausalLM' in metadata['architectures']: + model_settings['use_eager_attention'] = True + # Read GPTQ metadata for old GPTQ loaders if 'quantization_config' in metadata and metadata['quantization_config'].get('quant_method', '') != 'exl2': if 'bits' in metadata['quantization_config']: @@ -133,7 +143,7 @@ def get_model_metadata(model): for k in ['eos_token', 'bos_token']: if k in metadata: value = metadata[k] - if type(value) is dict: + if isinstance(value, dict): value = value['content'] template = template.replace(k, "'{}'".format(value)) @@ -168,7 +178,7 @@ def infer_loader(model_name, model_settings): path_to_model = Path(f'{shared.args.model_dir}/{model_name}') if not path_to_model.exists(): loader = None - elif (path_to_model / 'quantize_config.json').exists() or ('wbits' in model_settings and type(model_settings['wbits']) is int and model_settings['wbits'] > 0): + elif (path_to_model / 'quantize_config.json').exists() or ('wbits' in model_settings and isinstance(model_settings['wbits'], int) and model_settings['wbits'] > 0): loader = 'ExLlamav2_HF' elif (path_to_model / 'quant_config.json').exists() or re.match(r'.*-awq', model_name.lower()): loader = 'AutoAWQ' diff --git a/modules/sampler_hijack.py b/modules/sampler_hijack.py index ad74d658..9fb661ae 100644 --- a/modules/sampler_hijack.py +++ b/modules/sampler_hijack.py @@ -359,14 +359,14 @@ class RepetitionPenaltyLogitsProcessorWithRange(LogitsProcessor): return scores -def get_logits_warper_patch(self, generation_config): +def get_logits_warper_patch(self, generation_config, **kwargs): # Parameter sanitization if isinstance(generation_config.temperature, int): generation_config.temperature = float(generation_config.temperature) # Must be float # Get the original warpers - warpers = self._get_logits_warper_old(generation_config) + warpers = self._get_logits_warper_old(generation_config, **kwargs) # Replace temperature with our modified class. # Currently, it behaves identically to the original. diff --git a/modules/shared.py b/modules/shared.py index ebbfc268..e04c549a 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -106,6 +106,7 @@ group.add_argument('--trust-remote-code', action='store_true', help='Set trust_r group.add_argument('--force-safetensors', action='store_true', help='Set use_safetensors=True while loading the model. This prevents arbitrary code execution.') group.add_argument('--no_use_fast', action='store_true', help='Set use_fast=False while loading the tokenizer (it\'s True by default). Use this if you have any problems related to use_fast.') group.add_argument('--use_flash_attention_2', action='store_true', help='Set use_flash_attention_2=True while loading the model.') +group.add_argument('--use_eager_attention', action='store_true', help='Set attn_implementation= eager while loading the model.') # bitsandbytes 4-bit group = parser.add_argument_group('bitsandbytes 4-bit') diff --git a/modules/ui.py b/modules/ui.py index c20a7888..b1c1cf6d 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -43,6 +43,11 @@ theme = gr.themes.Default( body_text_color_subdued='#484848', background_fill_secondary='#eaeaea', background_fill_primary='var(--neutral-50)', + body_background_fill="white", + block_background_fill="#f4f4f4", + body_text_color="#333", + button_secondary_background_fill="#f4f4f4", + button_secondary_border_color="var(--border-color-primary)" ) if Path("notification.mp3").exists(): @@ -64,6 +69,7 @@ def list_model_elements(): 'trust_remote_code', 'no_use_fast', 'use_flash_attention_2', + 'use_eager_attention', 'load_in_4bit', 'compute_dtype', 'quant_type', diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 91951624..6942588e 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -87,16 +87,11 @@ def create_ui(): with gr.Row(): shared.gradio['mode'] = gr.Radio(choices=['chat', 'chat-instruct', 'instruct'], label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template selected under Parameters > Instruction template must match the current model.', elem_id='chat-mode') - with gr.Row(): - shared.gradio['character_menu'] = gr.Dropdown(value=None, choices=utils.get_available_characters(), label='Character', elem_id='character-menu', elem_classes='slim-dropdown') - shared.gradio['refresh_character'] = ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button', interactive=not mu) - shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu) - with gr.Row(): shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct') with gr.Row(): - shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=16, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=False, elem_classes=['add_scrollbar']) + shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=12, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=False, elem_classes=['add_scrollbar']) def create_chat_settings_ui(): @@ -105,10 +100,15 @@ def create_chat_settings_ui(): with gr.Row(): with gr.Column(scale=8): with gr.Tab("Character"): + with gr.Row(): + shared.gradio['character_menu'] = gr.Dropdown(value=None, choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown') + ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button', interactive=not mu) + shared.gradio['save_character'] = gr.Button('💾', elem_classes='refresh-button', elem_id="save-character", interactive=not mu) + shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu) + shared.gradio['name2'] = gr.Textbox(value='', lines=1, label='Character\'s name') shared.gradio['context'] = gr.Textbox(value='', lines=10, label='Context', elem_classes=['add_scrollbar']) shared.gradio['greeting'] = gr.Textbox(value='', lines=5, label='Greeting', elem_classes=['add_scrollbar']) - shared.gradio['save_character'] = gr.Button('Save character', elem_classes=['small-button'], interactive=not mu) with gr.Tab("User"): shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Name') @@ -300,8 +300,10 @@ def create_event_handlers(): lambda x: gr.update(choices=(histories := chat.find_all_histories_with_first_prompts(x)), value=histories[0][1]), gradio('interface_state'), gradio('unique_id'), show_progress=False).then( None, None, None, js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}') + shared.gradio['mode'].change(None, gradio('mode'), None, js="(mode) => {mode === 'instruct' ? document.getElementById('character-menu').parentNode.parentNode.style.display = 'none' : document.getElementById('character-menu').parentNode.parentNode.style.display = ''}") + shared.gradio['mode'].change( - lambda x: [gr.update(visible=(x != 'instruct'))] * 4 + [gr.update(visible=(x == 'chat-instruct'))], gradio('mode'), gradio('character_menu', 'refresh_character', 'delete_character', 'chat_style', 'chat-instruct_command'), show_progress=False).then( + lambda x: [gr.update(visible=x != 'instruct'), gr.update(visible=x == 'chat-instruct')], gradio('mode'), gradio('chat_style', 'chat-instruct_command'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.load_latest_history, gradio('interface_state'), gradio('history')).then( chat.redraw_html, gradio(reload_arr), gradio('display')).then( diff --git a/modules/ui_default.py b/modules/ui_default.py index bf9800f6..e3bfe784 100644 --- a/modules/ui_default.py +++ b/modules/ui_default.py @@ -16,7 +16,6 @@ outputs = ('output_textbox', 'html-default') def create_ui(): mu = shared.args.multi_user with gr.Tab('Default', elem_id='default-tab'): - shared.gradio['last_input-default'] = gr.State('') with gr.Row(): with gr.Column(): with gr.Row(): @@ -63,14 +62,12 @@ def create_ui(): def create_event_handlers(): shared.gradio['Generate-default'].click( - lambda x: x, gradio('textbox-default'), gradio('last_input-default')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['textbox-default'].submit( - lambda x: x, gradio('textbox-default'), gradio('last_input-default')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 3ebcd126..9a4e7351 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -115,6 +115,7 @@ def create_ui(): shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit) shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant) shared.gradio['use_flash_attention_2'] = gr.Checkbox(label="use_flash_attention_2", value=shared.args.use_flash_attention_2, info='Set use_flash_attention_2=True while loading the model.') + shared.gradio['use_eager_attention'] = gr.Checkbox(label="use_eager_attention", value=shared.args.use_eager_attention, info='Set attn_implementation= eager while loading the model.') shared.gradio['flash_attn'] = gr.Checkbox(label="flash_attn", value=shared.args.flash_attn, info='Use flash-attention.') shared.gradio['auto_devices'] = gr.Checkbox(label="auto-devices", value=shared.args.auto_devices) shared.gradio['tensorcores'] = gr.Checkbox(label="tensorcores", value=shared.args.tensorcores, info='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards.') diff --git a/requirements.txt b/requirements.txt index fb35c7d8..22ae8bed 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -accelerate==0.30.* -aqlm[gpu,cpu]==1.1.5; platform_system == "Linux" +accelerate==0.31.* +aqlm[gpu,cpu]==1.1.6; platform_system == "Linux" auto-gptq==0.7.1 bitsandbytes==0.43.* colorama @@ -7,7 +7,7 @@ datasets einops gradio==4.26.* hqq==0.1.7.post3 -jinja2==3.1.2 +jinja2==3.1.4 lm_eval==0.3.0 markdown numba==0.59.* @@ -24,7 +24,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.41.* +transformers==4.42.* tqdm wandb @@ -35,22 +35,22 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements_amd.txt b/requirements_amd.txt index 5bb68522..464a09f4 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -1,10 +1,10 @@ -accelerate==0.30.* +accelerate==0.31.* colorama datasets einops gradio==4.26.* hqq==0.1.7.post3 -jinja2==3.1.2 +jinja2==3.1.4 lm_eval==0.3.0 markdown numba==0.59.* @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.41.* +transformers==4.42.* tqdm wandb @@ -32,14 +32,14 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.79+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.79+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.81+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.81+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index eee2c662..9f700b4b 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -1,10 +1,10 @@ -accelerate==0.30.* +accelerate==0.31.* colorama datasets einops gradio==4.26.* hqq==0.1.7.post3 -jinja2==3.1.2 +jinja2==3.1.4 lm_eval==0.3.0 markdown numba==0.59.* @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.41.* +transformers==4.42.* tqdm wandb @@ -32,10 +32,10 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 61e3c47c..ef9a6d61 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -1,10 +1,10 @@ -accelerate==0.30.* +accelerate==0.31.* colorama datasets einops gradio==4.26.* hqq==0.1.7.post3 -jinja2==3.1.2 +jinja2==3.1.4 lm_eval==0.3.0 markdown numba==0.59.* @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.41.* +transformers==4.42.* tqdm wandb @@ -32,10 +32,8 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6-py3-none-any.whl diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index b9497470..0e4574ee 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -1,10 +1,10 @@ -accelerate==0.30.* +accelerate==0.31.* colorama datasets einops gradio==4.26.* hqq==0.1.7.post3 -jinja2==3.1.2 +jinja2==3.1.4 lm_eval==0.3.0 markdown numba==0.59.* @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.41.* +transformers==4.42.* tqdm wandb @@ -32,12 +32,10 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6-py3-none-any.whl diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index ad38f23d..37a2d102 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -1,10 +1,10 @@ -accelerate==0.30.* +accelerate==0.31.* colorama datasets einops gradio==4.26.* hqq==0.1.7.post3 -jinja2==3.1.2 +jinja2==3.1.4 lm_eval==0.3.0 markdown numba==0.59.* @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.41.* +transformers==4.42.* tqdm wandb @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 0debda88..d2cc0cbe 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -1,10 +1,10 @@ -accelerate==0.30.* +accelerate==0.31.* colorama datasets einops gradio==4.26.* hqq==0.1.7.post3 -jinja2==3.1.2 +jinja2==3.1.4 lm_eval==0.3.0 markdown numba==0.59.* @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.41.* +transformers==4.42.* tqdm wandb @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 78342a2e..6742f2e5 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -1,5 +1,5 @@ -accelerate==0.30.* -aqlm[gpu,cpu]==1.1.5; platform_system == "Linux" +accelerate==0.31.* +aqlm[gpu,cpu]==1.1.6; platform_system == "Linux" auto-gptq==0.7.1 bitsandbytes==0.43.* colorama @@ -7,7 +7,7 @@ datasets einops gradio==4.26.* hqq==0.1.7.post3 -jinja2==3.1.2 +jinja2==3.1.4 lm_eval==0.3.0 markdown numba==0.59.* @@ -24,7 +24,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.41.* +transformers==4.42.* tqdm wandb @@ -35,22 +35,22 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index e96d468f..21025a62 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -1,10 +1,10 @@ -accelerate==0.30.* +accelerate==0.31.* colorama datasets einops gradio==4.26.* hqq==0.1.7.post3 -jinja2==3.1.2 +jinja2==3.1.4 lm_eval==0.3.0 markdown numba==0.59.* @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.41.* +transformers==4.42.* tqdm wandb