diff --git a/scripts/compare-llama-bench.py b/scripts/compare-llama-bench.py index 4ac6b5fc0..6125a080a 100755 --- a/scripts/compare-llama-bench.py +++ b/scripts/compare-llama-bench.py @@ -19,22 +19,22 @@ logger = logging.getLogger("compare-llama-bench") # Properties by which to differentiate results per commit: KEY_PROPERTIES = [ - "cpu_info", "gpu_info", "n_gpu_layers", "cuda", "vulkan", "kompute", "metal", "sycl", "rpc", "gpu_blas", - "blas", "model_filename", "model_type", "n_batch", "n_ubatch", "embeddings", "n_threads", - "type_k", "type_v", "use_mmap", "no_kv_offload", "split_mode", "main_gpu", "tensor_split", "flash_attn", "n_prompt", "n_gen" + "cpu_info", "gpu_info", "backends", "n_gpu_layers", "model_filename", "model_type", "n_batch", "n_ubatch", + "embeddings", "cpu_mask", "cpu_strict", "poll", "n_threads", "type_k", "type_v", "use_mmap", "no_kv_offload", + "split_mode", "main_gpu", "tensor_split", "flash_attn", "n_prompt", "n_gen" ] # Properties that are boolean and are converted to Yes/No for the table: -BOOL_PROPERTIES = ["cuda", "vulkan", "kompute", "metal", "sycl", "gpu_blas", "blas", "embeddings", "use_mmap", "no_kv_offload", "flash_attn"] +BOOL_PROPERTIES = ["embeddings", "cpu_strict", "use_mmap", "no_kv_offload", "flash_attn"] # Header names for the table: PRETTY_NAMES = { - "cuda": "CUDA", "vulkan": "Vulkan", "kompute": "Kompute", "metal": "Metal", "sycl": "SYCL", "rpc": "RPC", - "gpu_blas": "GPU BLAS", "blas": "BLAS", "cpu_info": "CPU", "gpu_info": "GPU", "model_filename": "File", "model_type": "Model", - "model_size": "Model Size [GiB]", "model_n_params": "Num. of Par.", "n_batch": "Batch size", "n_ubatch": "Microbatch size", - "n_threads": "Threads", "type_k": "K type", "type_v": "V type", "n_gpu_layers": "GPU layers", "split_mode": "Split mode", - "main_gpu": "Main GPU", "no_kv_offload": "NKVO", "flash_attn": "FlashAttention", "tensor_split": "Tensor split", - "use_mmap": "Use mmap", "embeddings": "Embeddings", + "cpu_info": "CPU", "gpu_info": "GPU", "backends": "Backends", "n_gpu_layers": "GPU layers", + "model_filename": "File", "model_type": "Model", "model_size": "Model size [GiB]", + "model_n_params": "Num. of par.", "n_batch": "Batch size", "n_ubatch": "Microbatch size", + "embeddings": "Embeddings", "cpu_mask": "CPU mask", "cpu_strict": "CPU strict", "poll": "Poll", + "n_threads": "Threads", "type_k": "K type", "type_v": "V type", "split_mode": "Split mode", "main_gpu": "Main GPU", + "no_kv_offload": "NKVO", "flash_attn": "FlashAttention", "tensor_split": "Tensor split", "use_mmap": "Use mmap", } DEFAULT_SHOW = ["model_type"] # Always show these properties by default.