mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-01-12 21:37:35 +01:00
Do not expose alpha_value to llama.cpp & rope_freq_base to transformers
To avoid confusion
This commit is contained in:
parent
b48ab482f8
commit
536f8d58d4
@ -1,18 +0,0 @@
|
||||
def get_alpha_value(alpha, base):
|
||||
'''
|
||||
Gets alpha_value from alpha_value and rope_freq_base
|
||||
'''
|
||||
if base > 0:
|
||||
return (base / 10000.) ** (63 / 64.)
|
||||
else:
|
||||
return alpha
|
||||
|
||||
|
||||
def get_rope_freq_base(alpha, base):
|
||||
'''
|
||||
Gets rope_freq_base from alpha_value and rope_freq_base
|
||||
'''
|
||||
if base > 0:
|
||||
return base
|
||||
else:
|
||||
return 10000 * alpha ** (64 / 63.)
|
@ -7,7 +7,7 @@ from torch.nn import CrossEntropyLoss
|
||||
from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel
|
||||
from transformers.modeling_outputs import CausalLMOutputWithPast
|
||||
|
||||
from modules import RoPE, llama_cpp_python_hijack, shared
|
||||
from modules import llama_cpp_python_hijack, shared
|
||||
from modules.logging_colors import logger
|
||||
|
||||
try:
|
||||
@ -212,7 +212,7 @@ class LlamacppHF(PreTrainedModel):
|
||||
'mul_mat_q': not shared.args.no_mul_mat_q,
|
||||
'numa': shared.args.numa,
|
||||
'n_gpu_layers': shared.args.n_gpu_layers,
|
||||
'rope_freq_base': RoPE.get_rope_freq_base(shared.args.alpha_value, shared.args.rope_freq_base),
|
||||
'rope_freq_base': shared.args.rope_freq_base,
|
||||
'tensor_split': tensor_split_list,
|
||||
'rope_freq_scale': 1.0 / shared.args.compress_pos_emb,
|
||||
'logits_all': shared.args.logits_all,
|
||||
|
@ -4,7 +4,7 @@ from functools import partial
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from modules import RoPE, llama_cpp_python_hijack, shared
|
||||
from modules import llama_cpp_python_hijack, shared
|
||||
from modules.callbacks import Iteratorize
|
||||
from modules.logging_colors import logger
|
||||
from modules.text_generation import get_max_prompt_length
|
||||
@ -92,7 +92,7 @@ class LlamaCppModel:
|
||||
'mul_mat_q': not shared.args.no_mul_mat_q,
|
||||
'numa': shared.args.numa,
|
||||
'n_gpu_layers': shared.args.n_gpu_layers,
|
||||
'rope_freq_base': RoPE.get_rope_freq_base(shared.args.alpha_value, shared.args.rope_freq_base),
|
||||
'rope_freq_base': shared.args.rope_freq_base,
|
||||
'tensor_split': tensor_split_list,
|
||||
'rope_freq_scale': 1.0 / shared.args.compress_pos_emb,
|
||||
'offload_kqv': not shared.args.no_offload_kqv,
|
||||
|
@ -22,7 +22,6 @@ loaders_and_params = OrderedDict({
|
||||
'no_use_fast',
|
||||
'use_flash_attention_2',
|
||||
'alpha_value',
|
||||
'rope_freq_base',
|
||||
'compress_pos_emb',
|
||||
'disable_exllama',
|
||||
'disable_exllamav2',
|
||||
@ -38,7 +37,6 @@ loaders_and_params = OrderedDict({
|
||||
'no_mmap',
|
||||
'mlock',
|
||||
'no_mul_mat_q',
|
||||
'alpha_value',
|
||||
'rope_freq_base',
|
||||
'compress_pos_emb',
|
||||
'cpu',
|
||||
@ -60,7 +58,6 @@ loaders_and_params = OrderedDict({
|
||||
'no_mmap',
|
||||
'mlock',
|
||||
'no_mul_mat_q',
|
||||
'alpha_value',
|
||||
'rope_freq_base',
|
||||
'compress_pos_emb',
|
||||
'cpu',
|
||||
|
@ -25,7 +25,7 @@ from transformers import (
|
||||
)
|
||||
|
||||
import modules.shared as shared
|
||||
from modules import RoPE, sampler_hijack
|
||||
from modules import sampler_hijack
|
||||
from modules.logging_colors import logger
|
||||
from modules.models_settings import get_model_metadata
|
||||
|
||||
@ -248,7 +248,7 @@ def huggingface_loader(model_name):
|
||||
if shared.args.compress_pos_emb > 1:
|
||||
params['rope_scaling'] = {'type': 'linear', 'factor': shared.args.compress_pos_emb}
|
||||
elif shared.args.alpha_value > 1:
|
||||
params['rope_scaling'] = {'type': 'dynamic', 'factor': RoPE.get_alpha_value(shared.args.alpha_value, shared.args.rope_freq_base)}
|
||||
params['rope_scaling'] = {'type': 'dynamic', 'factor': shared.args.alpha_value}
|
||||
|
||||
logger.info("TRANSFORMERS_PARAMS=")
|
||||
pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(params)
|
||||
|
Loading…
x
Reference in New Issue
Block a user