mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-22 16:17:57 +01:00
Do not expose alpha_value to llama.cpp & rope_freq_base to transformers
To avoid confusion
This commit is contained in:
parent
b48ab482f8
commit
536f8d58d4
@ -1,18 +0,0 @@
|
|||||||
def get_alpha_value(alpha, base):
|
|
||||||
'''
|
|
||||||
Gets alpha_value from alpha_value and rope_freq_base
|
|
||||||
'''
|
|
||||||
if base > 0:
|
|
||||||
return (base / 10000.) ** (63 / 64.)
|
|
||||||
else:
|
|
||||||
return alpha
|
|
||||||
|
|
||||||
|
|
||||||
def get_rope_freq_base(alpha, base):
|
|
||||||
'''
|
|
||||||
Gets rope_freq_base from alpha_value and rope_freq_base
|
|
||||||
'''
|
|
||||||
if base > 0:
|
|
||||||
return base
|
|
||||||
else:
|
|
||||||
return 10000 * alpha ** (64 / 63.)
|
|
@ -7,7 +7,7 @@ from torch.nn import CrossEntropyLoss
|
|||||||
from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel
|
from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel
|
||||||
from transformers.modeling_outputs import CausalLMOutputWithPast
|
from transformers.modeling_outputs import CausalLMOutputWithPast
|
||||||
|
|
||||||
from modules import RoPE, llama_cpp_python_hijack, shared
|
from modules import llama_cpp_python_hijack, shared
|
||||||
from modules.logging_colors import logger
|
from modules.logging_colors import logger
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -212,7 +212,7 @@ class LlamacppHF(PreTrainedModel):
|
|||||||
'mul_mat_q': not shared.args.no_mul_mat_q,
|
'mul_mat_q': not shared.args.no_mul_mat_q,
|
||||||
'numa': shared.args.numa,
|
'numa': shared.args.numa,
|
||||||
'n_gpu_layers': shared.args.n_gpu_layers,
|
'n_gpu_layers': shared.args.n_gpu_layers,
|
||||||
'rope_freq_base': RoPE.get_rope_freq_base(shared.args.alpha_value, shared.args.rope_freq_base),
|
'rope_freq_base': shared.args.rope_freq_base,
|
||||||
'tensor_split': tensor_split_list,
|
'tensor_split': tensor_split_list,
|
||||||
'rope_freq_scale': 1.0 / shared.args.compress_pos_emb,
|
'rope_freq_scale': 1.0 / shared.args.compress_pos_emb,
|
||||||
'logits_all': shared.args.logits_all,
|
'logits_all': shared.args.logits_all,
|
||||||
|
@ -4,7 +4,7 @@ from functools import partial
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from modules import RoPE, llama_cpp_python_hijack, shared
|
from modules import llama_cpp_python_hijack, shared
|
||||||
from modules.callbacks import Iteratorize
|
from modules.callbacks import Iteratorize
|
||||||
from modules.logging_colors import logger
|
from modules.logging_colors import logger
|
||||||
from modules.text_generation import get_max_prompt_length
|
from modules.text_generation import get_max_prompt_length
|
||||||
@ -92,7 +92,7 @@ class LlamaCppModel:
|
|||||||
'mul_mat_q': not shared.args.no_mul_mat_q,
|
'mul_mat_q': not shared.args.no_mul_mat_q,
|
||||||
'numa': shared.args.numa,
|
'numa': shared.args.numa,
|
||||||
'n_gpu_layers': shared.args.n_gpu_layers,
|
'n_gpu_layers': shared.args.n_gpu_layers,
|
||||||
'rope_freq_base': RoPE.get_rope_freq_base(shared.args.alpha_value, shared.args.rope_freq_base),
|
'rope_freq_base': shared.args.rope_freq_base,
|
||||||
'tensor_split': tensor_split_list,
|
'tensor_split': tensor_split_list,
|
||||||
'rope_freq_scale': 1.0 / shared.args.compress_pos_emb,
|
'rope_freq_scale': 1.0 / shared.args.compress_pos_emb,
|
||||||
'offload_kqv': not shared.args.no_offload_kqv,
|
'offload_kqv': not shared.args.no_offload_kqv,
|
||||||
|
@ -22,7 +22,6 @@ loaders_and_params = OrderedDict({
|
|||||||
'no_use_fast',
|
'no_use_fast',
|
||||||
'use_flash_attention_2',
|
'use_flash_attention_2',
|
||||||
'alpha_value',
|
'alpha_value',
|
||||||
'rope_freq_base',
|
|
||||||
'compress_pos_emb',
|
'compress_pos_emb',
|
||||||
'disable_exllama',
|
'disable_exllama',
|
||||||
'disable_exllamav2',
|
'disable_exllamav2',
|
||||||
@ -38,7 +37,6 @@ loaders_and_params = OrderedDict({
|
|||||||
'no_mmap',
|
'no_mmap',
|
||||||
'mlock',
|
'mlock',
|
||||||
'no_mul_mat_q',
|
'no_mul_mat_q',
|
||||||
'alpha_value',
|
|
||||||
'rope_freq_base',
|
'rope_freq_base',
|
||||||
'compress_pos_emb',
|
'compress_pos_emb',
|
||||||
'cpu',
|
'cpu',
|
||||||
@ -60,7 +58,6 @@ loaders_and_params = OrderedDict({
|
|||||||
'no_mmap',
|
'no_mmap',
|
||||||
'mlock',
|
'mlock',
|
||||||
'no_mul_mat_q',
|
'no_mul_mat_q',
|
||||||
'alpha_value',
|
|
||||||
'rope_freq_base',
|
'rope_freq_base',
|
||||||
'compress_pos_emb',
|
'compress_pos_emb',
|
||||||
'cpu',
|
'cpu',
|
||||||
|
@ -25,7 +25,7 @@ from transformers import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
import modules.shared as shared
|
import modules.shared as shared
|
||||||
from modules import RoPE, sampler_hijack
|
from modules import sampler_hijack
|
||||||
from modules.logging_colors import logger
|
from modules.logging_colors import logger
|
||||||
from modules.models_settings import get_model_metadata
|
from modules.models_settings import get_model_metadata
|
||||||
|
|
||||||
@ -248,7 +248,7 @@ def huggingface_loader(model_name):
|
|||||||
if shared.args.compress_pos_emb > 1:
|
if shared.args.compress_pos_emb > 1:
|
||||||
params['rope_scaling'] = {'type': 'linear', 'factor': shared.args.compress_pos_emb}
|
params['rope_scaling'] = {'type': 'linear', 'factor': shared.args.compress_pos_emb}
|
||||||
elif shared.args.alpha_value > 1:
|
elif shared.args.alpha_value > 1:
|
||||||
params['rope_scaling'] = {'type': 'dynamic', 'factor': RoPE.get_alpha_value(shared.args.alpha_value, shared.args.rope_freq_base)}
|
params['rope_scaling'] = {'type': 'dynamic', 'factor': shared.args.alpha_value}
|
||||||
|
|
||||||
logger.info("TRANSFORMERS_PARAMS=")
|
logger.info("TRANSFORMERS_PARAMS=")
|
||||||
pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(params)
|
pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(params)
|
||||||
|
Loading…
Reference in New Issue
Block a user