mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-22 16:17:57 +01:00
Add --pin-weight parameter for FlexGen
This commit is contained in:
parent
05e703b4a4
commit
ed8b35efd2
@ -58,7 +58,7 @@ def load_model(model_name):
|
|||||||
shared.args.percent[0], shared.args.percent[1],
|
shared.args.percent[0], shared.args.percent[1],
|
||||||
shared.args.percent[2], shared.args.percent[3],
|
shared.args.percent[2], shared.args.percent[3],
|
||||||
shared.args.percent[4], shared.args.percent[5],
|
shared.args.percent[4], shared.args.percent[5],
|
||||||
overlap=True, sep_layer=True, pin_weight=True,
|
overlap=True, sep_layer=True, pin_weight=shared.args.pin_weight,
|
||||||
cpu_cache_compute=False, attn_sparsity=1.0,
|
cpu_cache_compute=False, attn_sparsity=1.0,
|
||||||
compress_weight=shared.args.compress_weight,
|
compress_weight=shared.args.compress_weight,
|
||||||
comp_weight_config=CompressionConfig(
|
comp_weight_config=CompressionConfig(
|
||||||
|
@ -53,6 +53,16 @@ settings = {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def str2bool(v):
|
||||||
|
if isinstance(v, bool):
|
||||||
|
return v
|
||||||
|
if v.lower() in ('yes', 'true', 't', 'y', '1'):
|
||||||
|
return True
|
||||||
|
elif v.lower() in ('no', 'false', 'f', 'n', '0'):
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
raise argparse.ArgumentTypeError('Boolean value expected.')
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog,max_help_position=54))
|
parser = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog,max_help_position=54))
|
||||||
parser.add_argument('--model', type=str, help='Name of the model to load by default.')
|
parser.add_argument('--model', type=str, help='Name of the model to load by default.')
|
||||||
parser.add_argument('--notebook', action='store_true', help='Launch the web UI in notebook mode, where the output is written to the same text box as the input.')
|
parser.add_argument('--notebook', action='store_true', help='Launch the web UI in notebook mode, where the output is written to the same text box as the input.')
|
||||||
@ -69,6 +79,7 @@ parser.add_argument('--cpu-memory', type=int, help='Maximum CPU memory in GiB to
|
|||||||
parser.add_argument('--flexgen', action='store_true', help='Enable the use of FlexGen offloading.')
|
parser.add_argument('--flexgen', action='store_true', help='Enable the use of FlexGen offloading.')
|
||||||
parser.add_argument('--percent', type=int, nargs="+", default=[0, 100, 100, 0, 100, 0], help='FlexGen: allocation percentages. Must be 6 numbers separated by spaces (default: 0, 100, 100, 0, 100, 0).')
|
parser.add_argument('--percent', type=int, nargs="+", default=[0, 100, 100, 0, 100, 0], help='FlexGen: allocation percentages. Must be 6 numbers separated by spaces (default: 0, 100, 100, 0, 100, 0).')
|
||||||
parser.add_argument("--compress-weight", action="store_true", help="FlexGen: activate weight compression.")
|
parser.add_argument("--compress-weight", action="store_true", help="FlexGen: activate weight compression.")
|
||||||
|
parser.add_argument("--pin-weight", type=str2bool, nargs="?", const=True, default=True, help="FlexGen: whether to pin weights (setting this to False reduces CPU memory by 20%%).")
|
||||||
parser.add_argument('--deepspeed', action='store_true', help='Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration.')
|
parser.add_argument('--deepspeed', action='store_true', help='Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration.')
|
||||||
parser.add_argument('--nvme-offload-dir', type=str, help='DeepSpeed: Directory to use for ZeRO-3 NVME offloading.')
|
parser.add_argument('--nvme-offload-dir', type=str, help='DeepSpeed: Directory to use for ZeRO-3 NVME offloading.')
|
||||||
parser.add_argument('--local_rank', type=int, default=0, help='DeepSpeed: Optional argument for distributed setups.')
|
parser.add_argument('--local_rank', type=int, default=0, help='DeepSpeed: Optional argument for distributed setups.')
|
||||||
|
Loading…
Reference in New Issue
Block a user