mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-26 01:30:20 +01:00
commit
363efe54f4
@ -49,7 +49,7 @@
|
||||
|
||||
.gradio-container .chat .assistant-message {
|
||||
padding: 20px;
|
||||
background: var(--color-grey-200);
|
||||
background: #f4f4f4;
|
||||
margin-top: 9px !important;
|
||||
margin-bottom: 12px !important;
|
||||
border-radius: 7px;
|
||||
|
11
css/main.css
11
css/main.css
@ -95,7 +95,7 @@ gradio-app > :first-child {
|
||||
}
|
||||
|
||||
.header_bar {
|
||||
background-color: #f7f7f7;
|
||||
background-color: #f4f4f4;
|
||||
box-shadow: 0 0 3px rgba(22 22 22 / 35%);
|
||||
margin-bottom: 0;
|
||||
overflow-x: scroll;
|
||||
@ -336,6 +336,11 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
|
||||
padding-left: 0;
|
||||
padding-right: 0;
|
||||
}
|
||||
|
||||
.chat {
|
||||
padding-left: 0;
|
||||
padding-right: 0;
|
||||
}
|
||||
}
|
||||
|
||||
.chat {
|
||||
@ -391,7 +396,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
|
||||
|
||||
.chat .message:last-child {
|
||||
margin-bottom: 0 !important;
|
||||
padding-bottom: 0 !important;
|
||||
padding-bottom: 15px !important;
|
||||
}
|
||||
|
||||
.message-body li {
|
||||
@ -510,7 +515,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
|
||||
#show-controls {
|
||||
position: absolute;
|
||||
height: 100%;
|
||||
background-color: var(--background-fill-primary);
|
||||
background-color: transparent;
|
||||
border: 0 !important;
|
||||
border-radius: 0;
|
||||
}
|
||||
|
@ -33,7 +33,7 @@ params = {
|
||||
'hr_upscaler': 'ESRGAN_4x',
|
||||
'hr_scale': '1.0',
|
||||
'seed': -1,
|
||||
'sampler_name': 'DPM++ 2M Karras',
|
||||
'sampler_name': 'DPM++ 2M',
|
||||
'steps': 32,
|
||||
'cfg_scale': 7,
|
||||
'textgen_prefix': 'Please provide a detailed and vivid description of [subject]',
|
||||
|
86
extensions/whisper_stt/script.js
Normal file
86
extensions/whisper_stt/script.js
Normal file
@ -0,0 +1,86 @@
|
||||
console.log("Whisper STT script loaded");
|
||||
|
||||
let mediaRecorder;
|
||||
let audioChunks = [];
|
||||
let isRecording = false;
|
||||
|
||||
window.startStopRecording = function() {
|
||||
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
|
||||
console.error("getUserMedia not supported on your browser!");
|
||||
return;
|
||||
}
|
||||
|
||||
if (isRecording == false) {
|
||||
//console.log("Start recording function called");
|
||||
navigator.mediaDevices.getUserMedia({ audio: true })
|
||||
.then(stream => {
|
||||
//console.log("Got audio stream");
|
||||
mediaRecorder = new MediaRecorder(stream);
|
||||
audioChunks = []; // Reset audio chunks
|
||||
mediaRecorder.start();
|
||||
//console.log("MediaRecorder started");
|
||||
recButton.icon;
|
||||
recordButton.innerHTML = recButton.innerHTML = "Stop";
|
||||
isRecording = true;
|
||||
|
||||
mediaRecorder.addEventListener("dataavailable", event => {
|
||||
//console.log("Data available event, data size: ", event.data.size);
|
||||
audioChunks.push(event.data);
|
||||
});
|
||||
|
||||
mediaRecorder.addEventListener("stop", () => {
|
||||
//console.log("MediaRecorder stopped");
|
||||
if (audioChunks.length > 0) {
|
||||
const audioBlob = new Blob(audioChunks, { type: "audio/webm" });
|
||||
//console.log("Audio blob created, size: ", audioBlob.size);
|
||||
const reader = new FileReader();
|
||||
reader.readAsDataURL(audioBlob);
|
||||
reader.onloadend = function() {
|
||||
const base64data = reader.result;
|
||||
//console.log("Audio converted to base64, length: ", base64data.length);
|
||||
|
||||
const audioBase64Input = document.querySelector("#audio-base64 textarea");
|
||||
if (audioBase64Input) {
|
||||
audioBase64Input.value = base64data;
|
||||
audioBase64Input.dispatchEvent(new Event("input", { bubbles: true }));
|
||||
audioBase64Input.dispatchEvent(new Event("change", { bubbles: true }));
|
||||
//console.log("Updated textarea with base64 data");
|
||||
} else {
|
||||
console.error("Could not find audio-base64 textarea");
|
||||
}
|
||||
};
|
||||
} else {
|
||||
console.error("No audio data recorded for Whisper");
|
||||
}
|
||||
});
|
||||
});
|
||||
} else {
|
||||
//console.log("Stopping MediaRecorder");
|
||||
recordButton.innerHTML = recButton.innerHTML = "Rec.";
|
||||
isRecording = false;
|
||||
mediaRecorder.stop();
|
||||
}
|
||||
};
|
||||
|
||||
const recordButton = gradioApp().querySelector("#record-button");
|
||||
recordButton.addEventListener("click", window.startStopRecording);
|
||||
|
||||
|
||||
function gradioApp() {
|
||||
const elems = document.getElementsByTagName("gradio-app");
|
||||
const gradioShadowRoot = elems.length == 0 ? null : elems[0].shadowRoot;
|
||||
return gradioShadowRoot ? gradioShadowRoot : document;
|
||||
}
|
||||
|
||||
|
||||
// extra rec button next to generate button
|
||||
var recButton = recordButton.cloneNode(true);
|
||||
var generate_button = document.getElementById("Generate");
|
||||
generate_button.insertAdjacentElement("afterend", recButton);
|
||||
|
||||
recButton.style.setProperty("margin-left", "-10px");
|
||||
recButton.innerHTML = "Rec.";
|
||||
|
||||
recButton.addEventListener("click", function() {
|
||||
recordButton.click();
|
||||
});
|
@ -1,5 +1,13 @@
|
||||
import base64
|
||||
import gc
|
||||
import io
|
||||
from pathlib import Path
|
||||
|
||||
import gradio as gr
|
||||
import speech_recognition as sr
|
||||
import numpy as np
|
||||
import torch
|
||||
import whisper
|
||||
from pydub import AudioSegment
|
||||
|
||||
from modules import shared
|
||||
|
||||
@ -8,13 +16,16 @@ input_hijack = {
|
||||
'value': ["", ""]
|
||||
}
|
||||
|
||||
# parameters which can be customized in settings.json of webui
|
||||
# parameters which can be customized in settings.yaml of webui
|
||||
params = {
|
||||
'whipser_language': 'english',
|
||||
'whipser_model': 'small.en',
|
||||
'auto_submit': True
|
||||
}
|
||||
|
||||
startup_device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
WHISPERMODEL = whisper.load_model(params['whipser_model'], device=startup_device)
|
||||
|
||||
|
||||
def chat_input_modifier(text, visible_text, state):
|
||||
global input_hijack
|
||||
@ -25,47 +36,84 @@ def chat_input_modifier(text, visible_text, state):
|
||||
return text, visible_text
|
||||
|
||||
|
||||
def do_stt(audio, whipser_model, whipser_language):
|
||||
transcription = ""
|
||||
r = sr.Recognizer()
|
||||
def do_stt(audio, whipser_language):
|
||||
# use pydub to convert sample_rate and sample_width for whisper input
|
||||
dubaudio = AudioSegment.from_file(io.BytesIO(audio))
|
||||
dubaudio = dubaudio.set_channels(1)
|
||||
dubaudio = dubaudio.set_frame_rate(16000)
|
||||
dubaudio = dubaudio.set_sample_width(2)
|
||||
|
||||
# Convert to AudioData
|
||||
audio_data = sr.AudioData(sample_rate=audio[0], frame_data=audio[1], sample_width=4)
|
||||
# same method to get the array as openai whisper repo used from wav file
|
||||
audio_np = np.frombuffer(dubaudio.raw_data, np.int16).flatten().astype(np.float32) / 32768.0
|
||||
|
||||
try:
|
||||
transcription = r.recognize_whisper(audio_data, language=whipser_language, model=whipser_model)
|
||||
except sr.UnknownValueError:
|
||||
print("Whisper could not understand audio")
|
||||
except sr.RequestError as e:
|
||||
print("Could not request results from Whisper", e)
|
||||
if len(whipser_language) == 0:
|
||||
result = WHISPERMODEL.transcribe(audio=audio_np)
|
||||
else:
|
||||
result = WHISPERMODEL.transcribe(audio=audio_np, language=whipser_language)
|
||||
return result["text"]
|
||||
|
||||
|
||||
def auto_transcribe(audio, auto_submit, whipser_language):
|
||||
if audio is None or audio == "":
|
||||
print("Whisper received no audio data")
|
||||
return "", ""
|
||||
audio_bytes = base64.b64decode(audio.split(',')[1])
|
||||
|
||||
transcription = do_stt(audio_bytes, whipser_language)
|
||||
if auto_submit:
|
||||
input_hijack.update({"state": True, "value": [transcription, transcription]})
|
||||
return transcription
|
||||
|
||||
|
||||
def auto_transcribe(audio, auto_submit, whipser_model, whipser_language):
|
||||
if audio is None:
|
||||
return "", ""
|
||||
transcription = do_stt(audio, whipser_model, whipser_language)
|
||||
if auto_submit:
|
||||
input_hijack.update({"state": True, "value": [transcription, transcription]})
|
||||
def reload_whispermodel(whisper_model_name: str, whisper_language: str, device: str):
|
||||
if len(whisper_model_name) > 0:
|
||||
global WHISPERMODEL
|
||||
WHISPERMODEL = None
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
gc.collect()
|
||||
|
||||
return transcription, None
|
||||
if device != "none":
|
||||
if device == "cuda":
|
||||
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
|
||||
WHISPERMODEL = whisper.load_model(whisper_model_name, device=device)
|
||||
params.update({"whipser_model": whisper_model_name})
|
||||
if ".en" in whisper_model_name:
|
||||
whisper_language = "english"
|
||||
audio_update = gr.Audio.update(interactive=True)
|
||||
else:
|
||||
audio_update = gr.Audio.update(interactive=False)
|
||||
return [whisper_model_name, whisper_language, str(device), audio_update]
|
||||
|
||||
|
||||
def ui():
|
||||
with gr.Accordion("Whisper STT", open=True):
|
||||
with gr.Row():
|
||||
audio = gr.Audio(source="microphone")
|
||||
audio = gr.Textbox(elem_id="audio-base64", visible=False)
|
||||
record_button = gr.Button("Rec.", elem_id="record-button", elem_classes="custom-button")
|
||||
with gr.Row():
|
||||
with gr.Accordion("Settings", open=False):
|
||||
auto_submit = gr.Checkbox(label='Submit the transcribed audio automatically', value=params['auto_submit'])
|
||||
whipser_model = gr.Dropdown(label='Whisper Model', value=params['whipser_model'], choices=["tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "large"])
|
||||
whipser_language = gr.Dropdown(label='Whisper Language', value=params['whipser_language'], choices=["chinese", "german", "spanish", "russian", "korean", "french", "japanese", "portuguese", "turkish", "polish", "catalan", "dutch", "arabic", "swedish", "italian", "indonesian", "hindi", "finnish", "vietnamese", "hebrew", "ukrainian", "greek", "malay", "czech", "romanian", "danish", "hungarian", "tamil", "norwegian", "thai", "urdu", "croatian", "bulgarian", "lithuanian", "latin", "maori", "malayalam", "welsh", "slovak", "telugu", "persian", "latvian", "bengali", "serbian", "azerbaijani", "slovenian", "kannada", "estonian", "macedonian", "breton", "basque", "icelandic", "armenian", "nepali", "mongolian", "bosnian", "kazakh", "albanian", "swahili", "galician", "marathi", "punjabi", "sinhala", "khmer", "shona", "yoruba", "somali", "afrikaans", "occitan", "georgian", "belarusian", "tajik", "sindhi", "gujarati", "amharic", "yiddish", "lao", "uzbek", "faroese", "haitian creole", "pashto", "turkmen", "nynorsk", "maltese", "sanskrit", "luxembourgish", "myanmar", "tibetan", "tagalog", "malagasy", "assamese", "tatar", "hawaiian", "lingala", "hausa", "bashkir", "javanese", "sundanese"])
|
||||
device_dropd = gr.Dropdown(label='Device', value=str(startup_device), choices=["cuda", "cpu", "none"])
|
||||
whisper_model_dropd = gr.Dropdown(label='Whisper Model', value=params['whipser_model'], choices=["tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "large"])
|
||||
whisper_language = gr.Dropdown(label='Whisper Language', value=params['whipser_language'], choices=["english", "chinese", "german", "spanish", "russian", "korean", "french", "japanese", "portuguese", "turkish", "polish", "catalan", "dutch", "arabic", "swedish", "italian", "indonesian", "hindi", "finnish", "vietnamese", "hebrew", "ukrainian", "greek", "malay", "czech", "romanian", "danish", "hungarian", "tamil", "norwegian", "thai", "urdu", "croatian", "bulgarian", "lithuanian", "latin", "maori", "malayalam", "welsh", "slovak", "telugu", "persian", "latvian", "bengali", "serbian", "azerbaijani", "slovenian", "kannada", "estonian", "macedonian", "breton", "basque", "icelandic", "armenian", "nepali", "mongolian", "bosnian", "kazakh", "albanian", "swahili", "galician", "marathi", "punjabi", "sinhala", "khmer", "shona", "yoruba", "somali", "afrikaans", "occitan", "georgian", "belarusian", "tajik", "sindhi", "gujarati", "amharic", "yiddish", "lao", "uzbek", "faroese", "haitian creole", "pashto", "turkmen", "nynorsk", "maltese", "sanskrit", "luxembourgish", "myanmar", "tibetan", "tagalog", "malagasy", "assamese", "tatar", "hawaiian", "lingala", "hausa", "bashkir", "javanese", "sundanese"])
|
||||
|
||||
audio.stop_recording(
|
||||
auto_transcribe, [audio, auto_submit, whipser_model, whipser_language], [shared.gradio['textbox'], audio]).then(
|
||||
None, auto_submit, None, js="(check) => {if (check) { document.getElementById('Generate').click() }}")
|
||||
audio.change(
|
||||
auto_transcribe, [audio, auto_submit, whisper_language], [shared.gradio['textbox']]).then(
|
||||
None, auto_submit, None, _js="(check) => {if (check) { document.getElementById('Generate').click() }}")
|
||||
|
||||
whipser_model.change(lambda x: params.update({"whipser_model": x}), whipser_model, None)
|
||||
whipser_language.change(lambda x: params.update({"whipser_language": x}), whipser_language, None)
|
||||
device_dropd.input(reload_whispermodel, [whisper_model_dropd, whisper_language, device_dropd], [whisper_model_dropd, whisper_language, device_dropd, audio])
|
||||
whisper_model_dropd.change(reload_whispermodel, [whisper_model_dropd, whisper_language, device_dropd], [whisper_model_dropd, whisper_language, device_dropd, audio])
|
||||
whisper_language.change(lambda x: params.update({"whipser_language": x}), whisper_language, None)
|
||||
auto_submit.change(lambda x: params.update({"auto_submit": x}), auto_submit, None)
|
||||
|
||||
|
||||
def custom_js():
|
||||
"""
|
||||
Returns custom javascript as a string. It is applied whenever the web UI is
|
||||
loaded.
|
||||
:return:
|
||||
"""
|
||||
with open(Path(__file__).parent.resolve() / "script.js", "r") as f:
|
||||
return f.read()
|
||||
|
103
js/main.js
103
js/main.js
@ -7,30 +7,30 @@ main_parent.parentNode.style = "gap: 0";
|
||||
main_parent.parentNode.parentNode.style = "padding: 0";
|
||||
|
||||
document.querySelector(".header_bar").addEventListener("click", function(event) {
|
||||
if (event.target.tagName === "BUTTON") {
|
||||
const buttonText = event.target.textContent.trim();
|
||||
if (event.target.tagName !== "BUTTON") return;
|
||||
|
||||
let chat_visible = (buttonText == "Chat");
|
||||
let default_visible = (buttonText == "Default");
|
||||
let notebook_visible = (buttonText == "Notebook");
|
||||
const buttonText = event.target.textContent.trim();
|
||||
const extensionsVisible = ["Chat", "Default", "Notebook"].includes(buttonText);
|
||||
const chatVisible = buttonText === "Chat";
|
||||
const showControlsChecked = document.querySelector("#show-controls input").checked;
|
||||
const extensions = document.querySelector("#extensions");
|
||||
|
||||
// Check if one of the generation tabs is visible
|
||||
if (chat_visible || notebook_visible || default_visible) {
|
||||
extensions && (extensions.style.display = "flex");
|
||||
|
||||
if (chat_visible) {
|
||||
this.style.marginBottom = "0px";
|
||||
extensions && (extensions.style.maxWidth = "880px");
|
||||
extensions && (extensions.style.padding = "0px");
|
||||
} else {
|
||||
this.style.marginBottom = "19px";
|
||||
extensions && (extensions.style.maxWidth = "none");
|
||||
extensions && (extensions.style.padding = "15px");
|
||||
}
|
||||
} else {
|
||||
this.style.marginBottom = "19px";
|
||||
extensions && (extensions.style.display = "none");
|
||||
if (extensionsVisible) {
|
||||
if (extensions) {
|
||||
extensions.style.display = "flex";
|
||||
extensions.style.maxWidth = chatVisible ? "880px" : "none";
|
||||
extensions.style.padding = chatVisible ? "0px" : "15px";
|
||||
}
|
||||
this.style.marginBottom = chatVisible ? "0px" : "19px";
|
||||
|
||||
if (chatVisible && !showControlsChecked) {
|
||||
document.querySelectorAll("#chat-tab > div > :nth-child(n+2), #extensions").forEach(element => {
|
||||
element.style.display = "none";
|
||||
});
|
||||
}
|
||||
} else {
|
||||
this.style.marginBottom = "19px";
|
||||
if (extensions) extensions.style.display = "none";
|
||||
}
|
||||
});
|
||||
|
||||
@ -539,3 +539,64 @@ document.querySelectorAll(".focus-on-chat-input").forEach(element => {
|
||||
// Fix a border around the "past chats" menu
|
||||
//------------------------------------------------
|
||||
document.getElementById("past-chats").parentNode.style.borderRadius = "0px";
|
||||
|
||||
//------------------------------------------------
|
||||
// Allow the character dropdown to coexist at the
|
||||
// Chat tab and the Parameters > Character tab
|
||||
//------------------------------------------------
|
||||
|
||||
const headerBar = document.querySelector(".header_bar");
|
||||
let originalParent;
|
||||
let originalIndex; // To keep track of the original position
|
||||
let movedElement;
|
||||
|
||||
function moveToChatTab() {
|
||||
const characterMenu = document.getElementById("character-menu");
|
||||
const grandParent = characterMenu.parentElement.parentElement;
|
||||
|
||||
// Save the initial location for the character dropdown
|
||||
if (!originalParent) {
|
||||
originalParent = grandParent.parentElement;
|
||||
originalIndex = Array.from(originalParent.children).indexOf(grandParent);
|
||||
movedElement = grandParent;
|
||||
}
|
||||
|
||||
// Do not show the Character dropdown in the Chat tab when "instruct" mode is selected
|
||||
const instructRadio = document.querySelector("#chat-mode input[value=\"instruct\"]");
|
||||
if (instructRadio && instructRadio.checked) {
|
||||
grandParent.style.display = "none";
|
||||
}
|
||||
|
||||
const chatControlsFirstChild = document.querySelector("#chat-controls").firstElementChild;
|
||||
const newParent = chatControlsFirstChild;
|
||||
let newPosition = newParent.children.length - 2;
|
||||
|
||||
newParent.insertBefore(grandParent, newParent.children[newPosition]);
|
||||
document.getElementById("save-character").style.display = "none";
|
||||
}
|
||||
|
||||
function restoreOriginalPosition() {
|
||||
if (originalParent && movedElement) {
|
||||
if (originalIndex >= originalParent.children.length) {
|
||||
originalParent.appendChild(movedElement);
|
||||
} else {
|
||||
originalParent.insertBefore(movedElement, originalParent.children[originalIndex]);
|
||||
}
|
||||
|
||||
document.getElementById("save-character").style.display = "";
|
||||
movedElement.style.display = "";
|
||||
}
|
||||
}
|
||||
|
||||
headerBar.addEventListener("click", (e) => {
|
||||
if (e.target.tagName === "BUTTON") {
|
||||
const tabName = e.target.textContent.trim();
|
||||
if (tabName === "Chat") {
|
||||
moveToChatTab();
|
||||
} else {
|
||||
restoreOriginalPosition();
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
moveToChatTab();
|
||||
|
@ -3,6 +3,7 @@ import copy
|
||||
import functools
|
||||
import html
|
||||
import json
|
||||
import pprint
|
||||
import re
|
||||
from datetime import datetime
|
||||
from functools import partial
|
||||
@ -259,10 +260,27 @@ def get_stopping_strings(state):
|
||||
suffix_bot + prefix_user,
|
||||
]
|
||||
|
||||
# Try to find the EOT token
|
||||
for item in stopping_strings.copy():
|
||||
item = item.strip()
|
||||
if item.startswith("<") and ">" in item:
|
||||
stopping_strings.append(item.split(">")[0] + ">")
|
||||
elif item.startswith("[") and "]" in item:
|
||||
stopping_strings.append(item.split("]")[0] + "]")
|
||||
|
||||
if 'stopping_strings' in state and isinstance(state['stopping_strings'], list):
|
||||
stopping_strings += state.pop('stopping_strings')
|
||||
|
||||
return list(set(stopping_strings))
|
||||
# Remove redundant items that start with another item
|
||||
result = [item for item in stopping_strings if not any(item.startswith(other) and item != other for other in stopping_strings)]
|
||||
result = list(set(result))
|
||||
|
||||
if shared.args.verbose:
|
||||
logger.info("STOPPING_STRINGS=")
|
||||
pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(result)
|
||||
print()
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_message=True, for_ui=False):
|
||||
|
@ -1,3 +1,4 @@
|
||||
import importlib
|
||||
from typing import Sequence
|
||||
|
||||
from tqdm import tqdm
|
||||
@ -5,20 +6,55 @@ from tqdm import tqdm
|
||||
from modules import shared
|
||||
from modules.cache_utils import process_llamacpp_cache
|
||||
|
||||
try:
|
||||
import llama_cpp
|
||||
except:
|
||||
llama_cpp = None
|
||||
|
||||
try:
|
||||
import llama_cpp_cuda
|
||||
except:
|
||||
llama_cpp_cuda = None
|
||||
imported_module = None
|
||||
|
||||
try:
|
||||
import llama_cpp_cuda_tensorcores
|
||||
except:
|
||||
llama_cpp_cuda_tensorcores = None
|
||||
|
||||
def llama_cpp_lib():
|
||||
global imported_module
|
||||
|
||||
return_lib = None
|
||||
|
||||
if shared.args.cpu:
|
||||
if imported_module and imported_module != 'llama_cpp':
|
||||
raise Exception(f"Cannot import 'llama_cpp' because '{imported_module}' is already imported. See issue #1575 in llama-cpp-python. Please restart the server before attempting to use a different version of llama-cpp-python.")
|
||||
try:
|
||||
return_lib = importlib.import_module('llama_cpp')
|
||||
imported_module = 'llama_cpp'
|
||||
except:
|
||||
pass
|
||||
|
||||
if shared.args.tensorcores and return_lib is None:
|
||||
if imported_module and imported_module != 'llama_cpp_cuda_tensorcores':
|
||||
raise Exception(f"Cannot import 'llama_cpp_cuda_tensorcores' because '{imported_module}' is already imported. See issue #1575 in llama-cpp-python. Please restart the server before attempting to use a different version of llama-cpp-python.")
|
||||
try:
|
||||
return_lib = importlib.import_module('llama_cpp_cuda_tensorcores')
|
||||
imported_module = 'llama_cpp_cuda_tensorcores'
|
||||
except:
|
||||
pass
|
||||
|
||||
if return_lib is None:
|
||||
if imported_module and imported_module != 'llama_cpp_cuda':
|
||||
raise Exception(f"Cannot import 'llama_cpp_cuda' because '{imported_module}' is already imported. See issue #1575 in llama-cpp-python. Please restart the server before attempting to use a different version of llama-cpp-python.")
|
||||
try:
|
||||
return_lib = importlib.import_module('llama_cpp_cuda')
|
||||
imported_module = 'llama_cpp_cuda'
|
||||
except:
|
||||
pass
|
||||
|
||||
if return_lib is None and not shared.args.cpu:
|
||||
if imported_module and imported_module != 'llama_cpp':
|
||||
raise Exception(f"Cannot import 'llama_cpp' because '{imported_module}' is already imported. See issue #1575 in llama-cpp-python. Please restart the server before attempting to use a different version of llama-cpp-python.")
|
||||
try:
|
||||
return_lib = importlib.import_module('llama_cpp')
|
||||
imported_module = 'llama_cpp'
|
||||
except:
|
||||
pass
|
||||
|
||||
if return_lib is not None:
|
||||
monkey_patch_llama_cpp_python(return_lib)
|
||||
|
||||
return return_lib
|
||||
|
||||
|
||||
def eval_with_progress(self, tokens: Sequence[int]):
|
||||
@ -63,7 +99,7 @@ def eval_with_progress(self, tokens: Sequence[int]):
|
||||
self.n_tokens += n_tokens
|
||||
|
||||
|
||||
def monkey_patch_generate(lib):
|
||||
def monkey_patch_llama_cpp_python(lib):
|
||||
|
||||
def my_generate(self, *args, **kwargs):
|
||||
|
||||
@ -77,11 +113,6 @@ def monkey_patch_generate(lib):
|
||||
for output in self.original_generate(*args, **kwargs):
|
||||
yield output
|
||||
|
||||
lib.Llama.eval = eval_with_progress
|
||||
lib.Llama.original_generate = lib.Llama.generate
|
||||
lib.Llama.generate = my_generate
|
||||
|
||||
|
||||
for lib in [llama_cpp, llama_cpp_cuda, llama_cpp_cuda_tensorcores]:
|
||||
if lib is not None:
|
||||
lib.Llama.eval = eval_with_progress
|
||||
monkey_patch_generate(lib)
|
||||
|
@ -7,35 +7,10 @@ from torch.nn import CrossEntropyLoss
|
||||
from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel
|
||||
from transformers.modeling_outputs import CausalLMOutputWithPast
|
||||
|
||||
from modules import llama_cpp_python_hijack, shared
|
||||
from modules import shared
|
||||
from modules.llama_cpp_python_hijack import llama_cpp_lib
|
||||
from modules.logging_colors import logger
|
||||
|
||||
try:
|
||||
import llama_cpp
|
||||
except:
|
||||
llama_cpp = None
|
||||
|
||||
try:
|
||||
import llama_cpp_cuda
|
||||
except:
|
||||
llama_cpp_cuda = None
|
||||
|
||||
try:
|
||||
import llama_cpp_cuda_tensorcores
|
||||
except:
|
||||
llama_cpp_cuda_tensorcores = None
|
||||
|
||||
|
||||
def llama_cpp_lib():
|
||||
if shared.args.cpu and llama_cpp is not None:
|
||||
return llama_cpp
|
||||
elif shared.args.tensorcores and llama_cpp_cuda_tensorcores is not None:
|
||||
return llama_cpp_cuda_tensorcores
|
||||
elif llama_cpp_cuda is not None:
|
||||
return llama_cpp_cuda
|
||||
else:
|
||||
return llama_cpp
|
||||
|
||||
|
||||
class LlamacppHF(PreTrainedModel):
|
||||
def __init__(self, model, path):
|
||||
@ -221,6 +196,13 @@ class LlamacppHF(PreTrainedModel):
|
||||
'flash_attn': shared.args.flash_attn
|
||||
}
|
||||
|
||||
if shared.args.cache_4bit:
|
||||
params["type_k"] = 2
|
||||
params["type_v"] = 2
|
||||
elif shared.args.cache_8bit:
|
||||
params["type_k"] = 8
|
||||
params["type_v"] = 8
|
||||
|
||||
Llama = llama_cpp_lib().Llama
|
||||
model = Llama(**params)
|
||||
|
||||
|
@ -4,37 +4,12 @@ from functools import partial
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from modules import llama_cpp_python_hijack, shared
|
||||
from modules import shared
|
||||
from modules.callbacks import Iteratorize
|
||||
from modules.llama_cpp_python_hijack import llama_cpp_lib
|
||||
from modules.logging_colors import logger
|
||||
from modules.text_generation import get_max_prompt_length
|
||||
|
||||
try:
|
||||
import llama_cpp
|
||||
except:
|
||||
llama_cpp = None
|
||||
|
||||
try:
|
||||
import llama_cpp_cuda
|
||||
except:
|
||||
llama_cpp_cuda = None
|
||||
|
||||
try:
|
||||
import llama_cpp_cuda_tensorcores
|
||||
except:
|
||||
llama_cpp_cuda_tensorcores = None
|
||||
|
||||
|
||||
def llama_cpp_lib():
|
||||
if shared.args.cpu and llama_cpp is not None:
|
||||
return llama_cpp
|
||||
elif shared.args.tensorcores and llama_cpp_cuda_tensorcores is not None:
|
||||
return llama_cpp_cuda_tensorcores
|
||||
elif llama_cpp_cuda is not None:
|
||||
return llama_cpp_cuda
|
||||
else:
|
||||
return llama_cpp
|
||||
|
||||
|
||||
def ban_eos_logits_processor(eos_token, input_ids, logits):
|
||||
logits[eos_token] = -float('inf')
|
||||
@ -100,6 +75,13 @@ class LlamaCppModel:
|
||||
'flash_attn': shared.args.flash_attn
|
||||
}
|
||||
|
||||
if shared.args.cache_4bit:
|
||||
params["type_k"] = 2
|
||||
params["type_v"] = 2
|
||||
elif shared.args.cache_8bit:
|
||||
params["type_k"] = 8
|
||||
params["type_v"] = 8
|
||||
|
||||
result.model = Llama(**params)
|
||||
if cache_capacity > 0:
|
||||
result.model.set_cache(LlamaCache(capacity_bytes=cache_capacity))
|
||||
|
@ -21,6 +21,7 @@ loaders_and_params = OrderedDict({
|
||||
'trust_remote_code',
|
||||
'no_use_fast',
|
||||
'use_flash_attention_2',
|
||||
'use_eager_attention',
|
||||
'alpha_value',
|
||||
'compress_pos_emb',
|
||||
'disable_exllama',
|
||||
@ -30,6 +31,8 @@ loaders_and_params = OrderedDict({
|
||||
'llama.cpp': [
|
||||
'n_ctx',
|
||||
'n_gpu_layers',
|
||||
'cache_8bit',
|
||||
'cache_4bit',
|
||||
'tensor_split',
|
||||
'n_batch',
|
||||
'threads',
|
||||
@ -51,6 +54,8 @@ loaders_and_params = OrderedDict({
|
||||
'llamacpp_HF': [
|
||||
'n_ctx',
|
||||
'n_gpu_layers',
|
||||
'cache_8bit',
|
||||
'cache_4bit',
|
||||
'tensor_split',
|
||||
'n_batch',
|
||||
'threads',
|
||||
|
@ -146,6 +146,9 @@ def huggingface_loader(model_name):
|
||||
if shared.args.force_safetensors:
|
||||
params['force_safetensors'] = True
|
||||
|
||||
if shared.args.use_eager_attention:
|
||||
params['attn_implementation'] = 'eager'
|
||||
|
||||
config = AutoConfig.from_pretrained(path_to_model, trust_remote_code=shared.args.trust_remote_code)
|
||||
|
||||
if 'chatglm' in model_name.lower():
|
||||
|
@ -9,6 +9,8 @@ from modules import chat, loaders, metadata_gguf, shared, ui
|
||||
|
||||
def get_fallback_settings():
|
||||
return {
|
||||
'bf16': False,
|
||||
'use_eager_attention': False,
|
||||
'wbits': 'None',
|
||||
'groupsize': 'None',
|
||||
'desc_act': False,
|
||||
@ -97,10 +99,18 @@ def get_model_metadata(model):
|
||||
elif 'attn_config' in metadata and 'rope_theta' in metadata['attn_config']:
|
||||
model_settings['rope_freq_base'] = metadata['attn_config']['rope_theta']
|
||||
|
||||
if 'rope_scaling' in metadata and type(metadata['rope_scaling']) is dict and all(key in metadata['rope_scaling'] for key in ('type', 'factor')):
|
||||
if 'rope_scaling' in metadata and isinstance(metadata['rope_scaling'], dict) and all(key in metadata['rope_scaling'] for key in ('type', 'factor')):
|
||||
if metadata['rope_scaling']['type'] == 'linear':
|
||||
model_settings['compress_pos_emb'] = metadata['rope_scaling']['factor']
|
||||
|
||||
# For Gemma-2
|
||||
if 'torch_dtype' in metadata and metadata['torch_dtype'] == 'bfloat16':
|
||||
model_settings['bf16'] = True
|
||||
|
||||
# For Gemma-2
|
||||
if 'architectures' in metadata and isinstance(metadata['architectures'], list) and 'Gemma2ForCausalLM' in metadata['architectures']:
|
||||
model_settings['use_eager_attention'] = True
|
||||
|
||||
# Read GPTQ metadata for old GPTQ loaders
|
||||
if 'quantization_config' in metadata and metadata['quantization_config'].get('quant_method', '') != 'exl2':
|
||||
if 'bits' in metadata['quantization_config']:
|
||||
@ -133,7 +143,7 @@ def get_model_metadata(model):
|
||||
for k in ['eos_token', 'bos_token']:
|
||||
if k in metadata:
|
||||
value = metadata[k]
|
||||
if type(value) is dict:
|
||||
if isinstance(value, dict):
|
||||
value = value['content']
|
||||
|
||||
template = template.replace(k, "'{}'".format(value))
|
||||
@ -168,7 +178,7 @@ def infer_loader(model_name, model_settings):
|
||||
path_to_model = Path(f'{shared.args.model_dir}/{model_name}')
|
||||
if not path_to_model.exists():
|
||||
loader = None
|
||||
elif (path_to_model / 'quantize_config.json').exists() or ('wbits' in model_settings and type(model_settings['wbits']) is int and model_settings['wbits'] > 0):
|
||||
elif (path_to_model / 'quantize_config.json').exists() or ('wbits' in model_settings and isinstance(model_settings['wbits'], int) and model_settings['wbits'] > 0):
|
||||
loader = 'ExLlamav2_HF'
|
||||
elif (path_to_model / 'quant_config.json').exists() or re.match(r'.*-awq', model_name.lower()):
|
||||
loader = 'AutoAWQ'
|
||||
|
@ -359,14 +359,14 @@ class RepetitionPenaltyLogitsProcessorWithRange(LogitsProcessor):
|
||||
return scores
|
||||
|
||||
|
||||
def get_logits_warper_patch(self, generation_config):
|
||||
def get_logits_warper_patch(self, generation_config, **kwargs):
|
||||
|
||||
# Parameter sanitization
|
||||
if isinstance(generation_config.temperature, int):
|
||||
generation_config.temperature = float(generation_config.temperature) # Must be float
|
||||
|
||||
# Get the original warpers
|
||||
warpers = self._get_logits_warper_old(generation_config)
|
||||
warpers = self._get_logits_warper_old(generation_config, **kwargs)
|
||||
|
||||
# Replace temperature with our modified class.
|
||||
# Currently, it behaves identically to the original.
|
||||
|
@ -106,6 +106,7 @@ group.add_argument('--trust-remote-code', action='store_true', help='Set trust_r
|
||||
group.add_argument('--force-safetensors', action='store_true', help='Set use_safetensors=True while loading the model. This prevents arbitrary code execution.')
|
||||
group.add_argument('--no_use_fast', action='store_true', help='Set use_fast=False while loading the tokenizer (it\'s True by default). Use this if you have any problems related to use_fast.')
|
||||
group.add_argument('--use_flash_attention_2', action='store_true', help='Set use_flash_attention_2=True while loading the model.')
|
||||
group.add_argument('--use_eager_attention', action='store_true', help='Set attn_implementation= eager while loading the model.')
|
||||
|
||||
# bitsandbytes 4-bit
|
||||
group = parser.add_argument_group('bitsandbytes 4-bit')
|
||||
|
@ -43,6 +43,11 @@ theme = gr.themes.Default(
|
||||
body_text_color_subdued='#484848',
|
||||
background_fill_secondary='#eaeaea',
|
||||
background_fill_primary='var(--neutral-50)',
|
||||
body_background_fill="white",
|
||||
block_background_fill="#f4f4f4",
|
||||
body_text_color="#333",
|
||||
button_secondary_background_fill="#f4f4f4",
|
||||
button_secondary_border_color="var(--border-color-primary)"
|
||||
)
|
||||
|
||||
if Path("notification.mp3").exists():
|
||||
@ -64,6 +69,7 @@ def list_model_elements():
|
||||
'trust_remote_code',
|
||||
'no_use_fast',
|
||||
'use_flash_attention_2',
|
||||
'use_eager_attention',
|
||||
'load_in_4bit',
|
||||
'compute_dtype',
|
||||
'quant_type',
|
||||
|
@ -87,16 +87,11 @@ def create_ui():
|
||||
with gr.Row():
|
||||
shared.gradio['mode'] = gr.Radio(choices=['chat', 'chat-instruct', 'instruct'], label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template selected under Parameters > Instruction template must match the current model.', elem_id='chat-mode')
|
||||
|
||||
with gr.Row():
|
||||
shared.gradio['character_menu'] = gr.Dropdown(value=None, choices=utils.get_available_characters(), label='Character', elem_id='character-menu', elem_classes='slim-dropdown')
|
||||
shared.gradio['refresh_character'] = ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button', interactive=not mu)
|
||||
shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
|
||||
|
||||
with gr.Row():
|
||||
shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct')
|
||||
|
||||
with gr.Row():
|
||||
shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=16, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=False, elem_classes=['add_scrollbar'])
|
||||
shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=12, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=False, elem_classes=['add_scrollbar'])
|
||||
|
||||
|
||||
def create_chat_settings_ui():
|
||||
@ -105,10 +100,15 @@ def create_chat_settings_ui():
|
||||
with gr.Row():
|
||||
with gr.Column(scale=8):
|
||||
with gr.Tab("Character"):
|
||||
with gr.Row():
|
||||
shared.gradio['character_menu'] = gr.Dropdown(value=None, choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown')
|
||||
ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button', interactive=not mu)
|
||||
shared.gradio['save_character'] = gr.Button('💾', elem_classes='refresh-button', elem_id="save-character", interactive=not mu)
|
||||
shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
|
||||
|
||||
shared.gradio['name2'] = gr.Textbox(value='', lines=1, label='Character\'s name')
|
||||
shared.gradio['context'] = gr.Textbox(value='', lines=10, label='Context', elem_classes=['add_scrollbar'])
|
||||
shared.gradio['greeting'] = gr.Textbox(value='', lines=5, label='Greeting', elem_classes=['add_scrollbar'])
|
||||
shared.gradio['save_character'] = gr.Button('Save character', elem_classes=['small-button'], interactive=not mu)
|
||||
|
||||
with gr.Tab("User"):
|
||||
shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Name')
|
||||
@ -300,8 +300,10 @@ def create_event_handlers():
|
||||
lambda x: gr.update(choices=(histories := chat.find_all_histories_with_first_prompts(x)), value=histories[0][1]), gradio('interface_state'), gradio('unique_id'), show_progress=False).then(
|
||||
None, None, None, js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}')
|
||||
|
||||
shared.gradio['mode'].change(None, gradio('mode'), None, js="(mode) => {mode === 'instruct' ? document.getElementById('character-menu').parentNode.parentNode.style.display = 'none' : document.getElementById('character-menu').parentNode.parentNode.style.display = ''}")
|
||||
|
||||
shared.gradio['mode'].change(
|
||||
lambda x: [gr.update(visible=(x != 'instruct'))] * 4 + [gr.update(visible=(x == 'chat-instruct'))], gradio('mode'), gradio('character_menu', 'refresh_character', 'delete_character', 'chat_style', 'chat-instruct_command'), show_progress=False).then(
|
||||
lambda x: [gr.update(visible=x != 'instruct'), gr.update(visible=x == 'chat-instruct')], gradio('mode'), gradio('chat_style', 'chat-instruct_command'), show_progress=False).then(
|
||||
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
|
||||
chat.load_latest_history, gradio('interface_state'), gradio('history')).then(
|
||||
chat.redraw_html, gradio(reload_arr), gradio('display')).then(
|
||||
|
@ -16,7 +16,6 @@ outputs = ('output_textbox', 'html-default')
|
||||
def create_ui():
|
||||
mu = shared.args.multi_user
|
||||
with gr.Tab('Default', elem_id='default-tab'):
|
||||
shared.gradio['last_input-default'] = gr.State('')
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
with gr.Row():
|
||||
@ -63,14 +62,12 @@ def create_ui():
|
||||
|
||||
def create_event_handlers():
|
||||
shared.gradio['Generate-default'].click(
|
||||
lambda x: x, gradio('textbox-default'), gradio('last_input-default')).then(
|
||||
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
|
||||
generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
|
||||
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
|
||||
None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
|
||||
|
||||
shared.gradio['textbox-default'].submit(
|
||||
lambda x: x, gradio('textbox-default'), gradio('last_input-default')).then(
|
||||
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
|
||||
generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
|
||||
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
|
||||
|
@ -115,6 +115,7 @@ def create_ui():
|
||||
shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit)
|
||||
shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant)
|
||||
shared.gradio['use_flash_attention_2'] = gr.Checkbox(label="use_flash_attention_2", value=shared.args.use_flash_attention_2, info='Set use_flash_attention_2=True while loading the model.')
|
||||
shared.gradio['use_eager_attention'] = gr.Checkbox(label="use_eager_attention", value=shared.args.use_eager_attention, info='Set attn_implementation= eager while loading the model.')
|
||||
shared.gradio['flash_attn'] = gr.Checkbox(label="flash_attn", value=shared.args.flash_attn, info='Use flash-attention.')
|
||||
shared.gradio['auto_devices'] = gr.Checkbox(label="auto-devices", value=shared.args.auto_devices)
|
||||
shared.gradio['tensorcores'] = gr.Checkbox(label="tensorcores", value=shared.args.tensorcores, info='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards.')
|
||||
|
@ -1,5 +1,5 @@
|
||||
accelerate==0.30.*
|
||||
aqlm[gpu,cpu]==1.1.5; platform_system == "Linux"
|
||||
accelerate==0.31.*
|
||||
aqlm[gpu,cpu]==1.1.6; platform_system == "Linux"
|
||||
auto-gptq==0.7.1
|
||||
bitsandbytes==0.43.*
|
||||
colorama
|
||||
@ -7,7 +7,7 @@ datasets
|
||||
einops
|
||||
gradio==4.26.*
|
||||
hqq==0.1.7.post3
|
||||
jinja2==3.1.2
|
||||
jinja2==3.1.4
|
||||
lm_eval==0.3.0
|
||||
markdown
|
||||
numba==0.59.*
|
||||
@ -24,7 +24,7 @@ safetensors==0.4.*
|
||||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.41.*
|
||||
transformers==4.42.*
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
@ -35,22 +35,22 @@ sse-starlette==1.6.5
|
||||
tiktoken
|
||||
|
||||
# llama-cpp-python (CPU only, AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
|
||||
# llama-cpp-python (CUDA, no tensor cores)
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
|
||||
# llama-cpp-python (CUDA, tensor cores)
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
|
@ -1,10 +1,10 @@
|
||||
accelerate==0.30.*
|
||||
accelerate==0.31.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
gradio==4.26.*
|
||||
hqq==0.1.7.post3
|
||||
jinja2==3.1.2
|
||||
jinja2==3.1.4
|
||||
lm_eval==0.3.0
|
||||
markdown
|
||||
numba==0.59.*
|
||||
@ -21,7 +21,7 @@ safetensors==0.4.*
|
||||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.41.*
|
||||
transformers==4.42.*
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
@ -32,14 +32,14 @@ sse-starlette==1.6.5
|
||||
tiktoken
|
||||
|
||||
# llama-cpp-python (CPU only, AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
|
||||
# AMD wheels
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.79+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.79+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.81+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.81+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||
|
@ -1,10 +1,10 @@
|
||||
accelerate==0.30.*
|
||||
accelerate==0.31.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
gradio==4.26.*
|
||||
hqq==0.1.7.post3
|
||||
jinja2==3.1.2
|
||||
jinja2==3.1.4
|
||||
lm_eval==0.3.0
|
||||
markdown
|
||||
numba==0.59.*
|
||||
@ -21,7 +21,7 @@ safetensors==0.4.*
|
||||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.41.*
|
||||
transformers==4.42.*
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
@ -32,10 +32,10 @@ sse-starlette==1.6.5
|
||||
tiktoken
|
||||
|
||||
# llama-cpp-python (CPU only, no AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
|
||||
# AMD wheels
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
|
@ -1,10 +1,10 @@
|
||||
accelerate==0.30.*
|
||||
accelerate==0.31.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
gradio==4.26.*
|
||||
hqq==0.1.7.post3
|
||||
jinja2==3.1.2
|
||||
jinja2==3.1.4
|
||||
lm_eval==0.3.0
|
||||
markdown
|
||||
numba==0.59.*
|
||||
@ -21,7 +21,7 @@ safetensors==0.4.*
|
||||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.41.*
|
||||
transformers==4.42.*
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
@ -32,10 +32,8 @@ sse-starlette==1.6.5
|
||||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6-py3-none-any.whl
|
||||
|
@ -1,10 +1,10 @@
|
||||
accelerate==0.30.*
|
||||
accelerate==0.31.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
gradio==4.26.*
|
||||
hqq==0.1.7.post3
|
||||
jinja2==3.1.2
|
||||
jinja2==3.1.4
|
||||
lm_eval==0.3.0
|
||||
markdown
|
||||
numba==0.59.*
|
||||
@ -21,7 +21,7 @@ safetensors==0.4.*
|
||||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.41.*
|
||||
transformers==4.42.*
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
@ -32,12 +32,10 @@ sse-starlette==1.6.5
|
||||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6-py3-none-any.whl
|
||||
|
@ -1,10 +1,10 @@
|
||||
accelerate==0.30.*
|
||||
accelerate==0.31.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
gradio==4.26.*
|
||||
hqq==0.1.7.post3
|
||||
jinja2==3.1.2
|
||||
jinja2==3.1.4
|
||||
lm_eval==0.3.0
|
||||
markdown
|
||||
numba==0.59.*
|
||||
@ -21,7 +21,7 @@ safetensors==0.4.*
|
||||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.41.*
|
||||
transformers==4.42.*
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
@ -32,7 +32,7 @@ sse-starlette==1.6.5
|
||||
tiktoken
|
||||
|
||||
# llama-cpp-python (CPU only, AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
|
@ -1,10 +1,10 @@
|
||||
accelerate==0.30.*
|
||||
accelerate==0.31.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
gradio==4.26.*
|
||||
hqq==0.1.7.post3
|
||||
jinja2==3.1.2
|
||||
jinja2==3.1.4
|
||||
lm_eval==0.3.0
|
||||
markdown
|
||||
numba==0.59.*
|
||||
@ -21,7 +21,7 @@ safetensors==0.4.*
|
||||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.41.*
|
||||
transformers==4.42.*
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
@ -32,7 +32,7 @@ sse-starlette==1.6.5
|
||||
tiktoken
|
||||
|
||||
# llama-cpp-python (CPU only, no AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
|
@ -1,5 +1,5 @@
|
||||
accelerate==0.30.*
|
||||
aqlm[gpu,cpu]==1.1.5; platform_system == "Linux"
|
||||
accelerate==0.31.*
|
||||
aqlm[gpu,cpu]==1.1.6; platform_system == "Linux"
|
||||
auto-gptq==0.7.1
|
||||
bitsandbytes==0.43.*
|
||||
colorama
|
||||
@ -7,7 +7,7 @@ datasets
|
||||
einops
|
||||
gradio==4.26.*
|
||||
hqq==0.1.7.post3
|
||||
jinja2==3.1.2
|
||||
jinja2==3.1.4
|
||||
lm_eval==0.3.0
|
||||
markdown
|
||||
numba==0.59.*
|
||||
@ -24,7 +24,7 @@ safetensors==0.4.*
|
||||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.41.*
|
||||
transformers==4.42.*
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
@ -35,22 +35,22 @@ sse-starlette==1.6.5
|
||||
tiktoken
|
||||
|
||||
# llama-cpp-python (CPU only, no AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
|
||||
# llama-cpp-python (CUDA, no tensor cores)
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
|
||||
# llama-cpp-python (CUDA, tensor cores)
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
|
@ -1,10 +1,10 @@
|
||||
accelerate==0.30.*
|
||||
accelerate==0.31.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
gradio==4.26.*
|
||||
hqq==0.1.7.post3
|
||||
jinja2==3.1.2
|
||||
jinja2==3.1.4
|
||||
lm_eval==0.3.0
|
||||
markdown
|
||||
numba==0.59.*
|
||||
@ -21,7 +21,7 @@ safetensors==0.4.*
|
||||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.41.*
|
||||
transformers==4.42.*
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user