2024-12-11 17:29:23 +01:00
|
|
|
#include "llama-model.h"
|
2024-12-22 15:20:20 +01:00
|
|
|
|
|
|
|
std::string llama_model_ftype_name(llama_ftype ftype) {
|
|
|
|
if (ftype & LLAMA_FTYPE_GUESSED) {
|
|
|
|
return llama_model_ftype_name((enum llama_ftype) (ftype & ~LLAMA_FTYPE_GUESSED)) + " (guessed)";
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (ftype) {
|
|
|
|
case LLAMA_FTYPE_ALL_F32: return "all F32";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_F16: return "F16";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_BF16: return "BF16";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_Q4_0: return "Q4_0";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_Q4_1: return "Q4_1";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_Q5_0: return "Q5_0";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_Q5_1: return "Q5_1";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_Q8_0: return "Q8_0";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_Q2_K: return "Q2_K - Medium";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_Q2_K_S: return "Q2_K - Small";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_Q3_K_S: return "Q3_K - Small";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_Q3_K_M: return "Q3_K - Medium";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_Q3_K_L: return "Q3_K - Large";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_Q4_K_S: return "Q4_K - Small";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_Q4_K_M: return "Q4_K - Medium";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_Q5_K_S: return "Q5_K - Small";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_Q5_K_M: return "Q5_K - Medium";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_Q6_K: return "Q6_K";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_TQ1_0: return "TQ1_0 - 1.69 bpw ternary";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_TQ2_0: return "TQ2_0 - 2.06 bpw ternary";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_IQ2_XXS: return "IQ2_XXS - 2.0625 bpw";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_IQ2_XS: return "IQ2_XS - 2.3125 bpw";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_IQ2_S: return "IQ2_S - 2.5 bpw";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_IQ2_M: return "IQ2_M - 2.7 bpw";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_IQ3_XS: return "IQ3_XS - 3.3 bpw";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_IQ3_XXS: return "IQ3_XXS - 3.0625 bpw";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_IQ1_S: return "IQ1_S - 1.5625 bpw";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_IQ1_M: return "IQ1_M - 1.75 bpw";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_IQ4_NL: return "IQ4_NL - 4.5 bpw";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_IQ4_XS: return "IQ4_XS - 4.25 bpw";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_IQ3_S: return "IQ3_S - 3.4375 bpw";
|
|
|
|
case LLAMA_FTYPE_MOSTLY_IQ3_M: return "IQ3_S mix - 3.66 bpw";
|
|
|
|
|
|
|
|
default: return "unknown, may not work";
|
|
|
|
}
|
|
|
|
}
|