mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-21 15:48:04 +01:00
Add Ascend NPU support (basic) (#5541)
This commit is contained in:
parent
a90509d82e
commit
fd4e46bce2
@ -5,7 +5,7 @@ from threading import Thread
|
|||||||
|
|
||||||
import torch
|
import torch
|
||||||
import transformers
|
import transformers
|
||||||
from transformers import is_torch_xpu_available
|
from transformers import is_torch_npu_available, is_torch_xpu_available
|
||||||
|
|
||||||
import modules.shared as shared
|
import modules.shared as shared
|
||||||
|
|
||||||
@ -99,5 +99,7 @@ def clear_torch_cache():
|
|||||||
if not shared.args.cpu:
|
if not shared.args.cpu:
|
||||||
if is_torch_xpu_available():
|
if is_torch_xpu_available():
|
||||||
torch.xpu.empty_cache()
|
torch.xpu.empty_cache()
|
||||||
|
elif is_torch_npu_available():
|
||||||
|
torch.npu.empty_cache()
|
||||||
else:
|
else:
|
||||||
torch.cuda.empty_cache()
|
torch.cuda.empty_cache()
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
import torch
|
import torch
|
||||||
from transformers import is_torch_xpu_available
|
from transformers import is_torch_npu_available, is_torch_xpu_available
|
||||||
|
|
||||||
from modules import sampler_hijack, shared
|
from modules import sampler_hijack, shared
|
||||||
from modules.logging_colors import logger
|
from modules.logging_colors import logger
|
||||||
@ -34,6 +34,8 @@ def get_next_logits(prompt, state, use_samplers, previous, top_logits=25, return
|
|||||||
if is_non_hf_exllamav2:
|
if is_non_hf_exllamav2:
|
||||||
if is_torch_xpu_available():
|
if is_torch_xpu_available():
|
||||||
tokens = shared.tokenizer.encode(prompt).to("xpu:0")
|
tokens = shared.tokenizer.encode(prompt).to("xpu:0")
|
||||||
|
elif is_torch_npu_available():
|
||||||
|
tokens = shared.tokenizer.encode(prompt).to("npu:0")
|
||||||
else:
|
else:
|
||||||
tokens = shared.tokenizer.encode(prompt).cuda()
|
tokens = shared.tokenizer.encode(prompt).cuda()
|
||||||
scores = shared.model.get_logits(tokens)[-1][-1]
|
scores = shared.model.get_logits(tokens)[-1][-1]
|
||||||
@ -43,6 +45,8 @@ def get_next_logits(prompt, state, use_samplers, previous, top_logits=25, return
|
|||||||
else:
|
else:
|
||||||
if is_torch_xpu_available():
|
if is_torch_xpu_available():
|
||||||
tokens = shared.tokenizer.encode(prompt, return_tensors='pt').to("xpu:0")
|
tokens = shared.tokenizer.encode(prompt, return_tensors='pt').to("xpu:0")
|
||||||
|
elif is_torch_npu_available():
|
||||||
|
tokens = shared.tokenizer.encode(prompt, return_tensors='pt').to("npu:0")
|
||||||
else:
|
else:
|
||||||
tokens = shared.tokenizer.encode(prompt, return_tensors='pt').cuda()
|
tokens = shared.tokenizer.encode(prompt, return_tensors='pt').cuda()
|
||||||
output = shared.model(input_ids=tokens)
|
output = shared.model(input_ids=tokens)
|
||||||
|
@ -10,7 +10,11 @@ from pathlib import Path
|
|||||||
import torch
|
import torch
|
||||||
import transformers
|
import transformers
|
||||||
from accelerate import infer_auto_device_map, init_empty_weights
|
from accelerate import infer_auto_device_map, init_empty_weights
|
||||||
from accelerate.utils import is_ccl_available, is_xpu_available
|
from accelerate.utils import (
|
||||||
|
is_ccl_available,
|
||||||
|
is_npu_available,
|
||||||
|
is_xpu_available
|
||||||
|
)
|
||||||
from transformers import (
|
from transformers import (
|
||||||
AutoConfig,
|
AutoConfig,
|
||||||
AutoModel,
|
AutoModel,
|
||||||
@ -45,6 +49,9 @@ if shared.args.deepspeed:
|
|||||||
if is_xpu_available() and is_ccl_available():
|
if is_xpu_available() and is_ccl_available():
|
||||||
torch.xpu.set_device(local_rank)
|
torch.xpu.set_device(local_rank)
|
||||||
deepspeed.init_distributed(backend="ccl")
|
deepspeed.init_distributed(backend="ccl")
|
||||||
|
elif is_npu_available():
|
||||||
|
torch.npu.set_device(local_rank)
|
||||||
|
deepspeed.init_distributed(dist_backend="hccl")
|
||||||
else:
|
else:
|
||||||
torch.cuda.set_device(local_rank)
|
torch.cuda.set_device(local_rank)
|
||||||
deepspeed.init_distributed()
|
deepspeed.init_distributed()
|
||||||
@ -164,6 +171,9 @@ def huggingface_loader(model_name):
|
|||||||
elif is_xpu_available():
|
elif is_xpu_available():
|
||||||
device = torch.device("xpu")
|
device = torch.device("xpu")
|
||||||
model = model.to(device)
|
model = model.to(device)
|
||||||
|
elif is_npu_available():
|
||||||
|
device = torch.device("npu")
|
||||||
|
model = model.to(device)
|
||||||
else:
|
else:
|
||||||
model = model.cuda()
|
model = model.cuda()
|
||||||
|
|
||||||
|
@ -10,7 +10,11 @@ import traceback
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import transformers
|
import transformers
|
||||||
from transformers import LogitsProcessorList, is_torch_xpu_available
|
from transformers import (
|
||||||
|
LogitsProcessorList,
|
||||||
|
is_torch_npu_available,
|
||||||
|
is_torch_xpu_available
|
||||||
|
)
|
||||||
|
|
||||||
import modules.shared as shared
|
import modules.shared as shared
|
||||||
from modules.cache_utils import process_llamacpp_cache
|
from modules.cache_utils import process_llamacpp_cache
|
||||||
@ -24,7 +28,7 @@ from modules.grammar.grammar_utils import initialize_grammar
|
|||||||
from modules.grammar.logits_process import GrammarConstrainedLogitsProcessor
|
from modules.grammar.logits_process import GrammarConstrainedLogitsProcessor
|
||||||
from modules.html_generator import generate_basic_html
|
from modules.html_generator import generate_basic_html
|
||||||
from modules.logging_colors import logger
|
from modules.logging_colors import logger
|
||||||
from modules.models import clear_torch_cache, local_rank
|
from modules.models import clear_torch_cache
|
||||||
|
|
||||||
|
|
||||||
def generate_reply(*args, **kwargs):
|
def generate_reply(*args, **kwargs):
|
||||||
@ -131,12 +135,15 @@ def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_lengt
|
|||||||
if shared.model.__class__.__name__ in ['LlamaCppModel', 'Exllamav2Model'] or shared.args.cpu:
|
if shared.model.__class__.__name__ in ['LlamaCppModel', 'Exllamav2Model'] or shared.args.cpu:
|
||||||
return input_ids
|
return input_ids
|
||||||
elif shared.args.deepspeed:
|
elif shared.args.deepspeed:
|
||||||
return input_ids.to(device=local_rank)
|
import deepspeed
|
||||||
|
return input_ids.to(deepspeed.get_accelerator().current_device_name())
|
||||||
elif torch.backends.mps.is_available():
|
elif torch.backends.mps.is_available():
|
||||||
device = torch.device('mps')
|
device = torch.device('mps')
|
||||||
return input_ids.to(device)
|
return input_ids.to(device)
|
||||||
elif is_torch_xpu_available():
|
elif is_torch_xpu_available():
|
||||||
return input_ids.to("xpu:0")
|
return input_ids.to("xpu:0")
|
||||||
|
elif is_torch_npu_available():
|
||||||
|
return input_ids.to("npu:0")
|
||||||
else:
|
else:
|
||||||
return input_ids.cuda()
|
return input_ids.cuda()
|
||||||
|
|
||||||
@ -213,6 +220,8 @@ def set_manual_seed(seed):
|
|||||||
torch.cuda.manual_seed_all(seed)
|
torch.cuda.manual_seed_all(seed)
|
||||||
elif is_torch_xpu_available():
|
elif is_torch_xpu_available():
|
||||||
torch.xpu.manual_seed_all(seed)
|
torch.xpu.manual_seed_all(seed)
|
||||||
|
elif is_torch_npu_available():
|
||||||
|
torch.npu.manual_seed_all(seed)
|
||||||
|
|
||||||
return seed
|
return seed
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@ from pathlib import Path
|
|||||||
import gradio as gr
|
import gradio as gr
|
||||||
import psutil
|
import psutil
|
||||||
import torch
|
import torch
|
||||||
from transformers import is_torch_xpu_available
|
from transformers import is_torch_npu_available, is_torch_xpu_available
|
||||||
|
|
||||||
from modules import loaders, shared, ui, utils
|
from modules import loaders, shared, ui, utils
|
||||||
from modules.logging_colors import logger
|
from modules.logging_colors import logger
|
||||||
@ -32,6 +32,9 @@ def create_ui():
|
|||||||
if is_torch_xpu_available():
|
if is_torch_xpu_available():
|
||||||
for i in range(torch.xpu.device_count()):
|
for i in range(torch.xpu.device_count()):
|
||||||
total_mem.append(math.floor(torch.xpu.get_device_properties(i).total_memory / (1024 * 1024)))
|
total_mem.append(math.floor(torch.xpu.get_device_properties(i).total_memory / (1024 * 1024)))
|
||||||
|
elif is_torch_npu_available():
|
||||||
|
for i in range(torch.npu.device_count()):
|
||||||
|
total_mem.append(math.floor(torch.npu.get_device_properties(i).total_memory / (1024 * 1024)))
|
||||||
else:
|
else:
|
||||||
for i in range(torch.cuda.device_count()):
|
for i in range(torch.cuda.device_count()):
|
||||||
total_mem.append(math.floor(torch.cuda.get_device_properties(i).total_memory / (1024 * 1024)))
|
total_mem.append(math.floor(torch.cuda.get_device_properties(i).total_memory / (1024 * 1024)))
|
||||||
|
Loading…
Reference in New Issue
Block a user