From ed8b35efd2c6304d9debf84c494ba69139c2f20e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 4 Mar 2023 01:04:02 -0300 Subject: [PATCH] Add --pin-weight parameter for FlexGen --- modules/models.py | 2 +- modules/shared.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/modules/models.py b/modules/models.py index 36589044..904d8ae2 100644 --- a/modules/models.py +++ b/modules/models.py @@ -58,7 +58,7 @@ def load_model(model_name): shared.args.percent[0], shared.args.percent[1], shared.args.percent[2], shared.args.percent[3], shared.args.percent[4], shared.args.percent[5], - overlap=True, sep_layer=True, pin_weight=True, + overlap=True, sep_layer=True, pin_weight=shared.args.pin_weight, cpu_cache_compute=False, attn_sparsity=1.0, compress_weight=shared.args.compress_weight, comp_weight_config=CompressionConfig( diff --git a/modules/shared.py b/modules/shared.py index 462d637c..e9dfdaa2 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -53,6 +53,16 @@ settings = { } } +def str2bool(v): + if isinstance(v, bool): + return v + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + parser = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog,max_help_position=54)) parser.add_argument('--model', type=str, help='Name of the model to load by default.') parser.add_argument('--notebook', action='store_true', help='Launch the web UI in notebook mode, where the output is written to the same text box as the input.') @@ -69,6 +79,7 @@ parser.add_argument('--cpu-memory', type=int, help='Maximum CPU memory in GiB to parser.add_argument('--flexgen', action='store_true', help='Enable the use of FlexGen offloading.') parser.add_argument('--percent', type=int, nargs="+", default=[0, 100, 100, 0, 100, 0], help='FlexGen: allocation percentages. Must be 6 numbers separated by spaces (default: 0, 100, 100, 0, 100, 0).') parser.add_argument("--compress-weight", action="store_true", help="FlexGen: activate weight compression.") +parser.add_argument("--pin-weight", type=str2bool, nargs="?", const=True, default=True, help="FlexGen: whether to pin weights (setting this to False reduces CPU memory by 20%%).") parser.add_argument('--deepspeed', action='store_true', help='Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration.') parser.add_argument('--nvme-offload-dir', type=str, help='DeepSpeed: Directory to use for ZeRO-3 NVME offloading.') parser.add_argument('--local_rank', type=int, default=0, help='DeepSpeed: Optional argument for distributed setups.')