From 498fec2c7c7df376007f264261acfcfcb76168ea Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 25 Jul 2024 15:09:31 -0700 Subject: [PATCH 01/41] UI: fix saving characters --- modules/ui_file_saving.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ui_file_saving.py b/modules/ui_file_saving.py index 51fac69f..ac72c623 100644 --- a/modules/ui_file_saving.py +++ b/modules/ui_file_saving.py @@ -107,7 +107,7 @@ def handle_save_character_confirm_click(name2, greeting, context, character_pict try: chat.save_character(name2, greeting, context, character_picture, filename) available_characters = utils.get_available_characters() - output = gr.update(choices=available_characters, value=filename), + output = gr.update(choices=available_characters, value=filename) except Exception: output = gr.update() traceback.print_exc() From ac30b004efd7b9e2511d0baf619e8025c745ba09 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 6 Sep 2024 18:38:39 -0700 Subject: [PATCH 02/41] Pin fastapi/pydantic requirement versions --- requirements.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 08b7d56d..51558a84 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ bitsandbytes==0.43.* colorama datasets einops +fastapi==0.112.4 gradio==4.26.* hqq==0.1.7.post3 jinja2==3.1.4 @@ -17,6 +18,7 @@ pandas peft==0.12.* Pillow>=9.5.0 psutil +pydantic==2.8.2 pyyaml requests rich @@ -69,4 +71,4 @@ https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310 https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" \ No newline at end of file +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" From f98431c7448381bfa4e859ace70e0379f6431018 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 6 Sep 2024 18:47:25 -0700 Subject: [PATCH 03/41] Apply the change to all requirements (oops) --- requirements_amd.txt | 4 +++- requirements_amd_noavx2.txt | 4 +++- requirements_apple_intel.txt | 2 ++ requirements_apple_silicon.txt | 2 ++ requirements_cpu_only.txt | 2 ++ requirements_cpu_only_noavx2.txt | 2 ++ requirements_noavx2.txt | 4 +++- requirements_nowheels.txt | 2 ++ 8 files changed, 19 insertions(+), 3 deletions(-) diff --git a/requirements_amd.txt b/requirements_amd.txt index 52e36510..e9dfb128 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -2,6 +2,7 @@ accelerate==0.33.* colorama datasets einops +fastapi==0.112.4 gradio==4.26.* hqq==0.1.7.post3 jinja2==3.1.4 @@ -14,6 +15,7 @@ pandas peft==0.12.* Pillow>=9.5.0 psutil +pydantic==2.8.2 pyyaml requests rich @@ -46,4 +48,4 @@ https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8- https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" \ No newline at end of file +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 18a81d04..69a350ad 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -2,6 +2,7 @@ accelerate==0.33.* colorama datasets einops +fastapi==0.112.4 gradio==4.26.* hqq==0.1.7.post3 jinja2==3.1.4 @@ -14,6 +15,7 @@ pandas peft==0.12.* Pillow>=9.5.0 psutil +pydantic==2.8.2 pyyaml requests rich @@ -44,4 +46,4 @@ https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8- https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" \ No newline at end of file +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index af02904b..0326916e 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -2,6 +2,7 @@ accelerate==0.33.* colorama datasets einops +fastapi==0.112.4 gradio==4.26.* hqq==0.1.7.post3 jinja2==3.1.4 @@ -14,6 +15,7 @@ pandas peft==0.12.* Pillow>=9.5.0 psutil +pydantic==2.8.2 pyyaml requests rich diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 8cdd8519..70ee3d27 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -2,6 +2,7 @@ accelerate==0.33.* colorama datasets einops +fastapi==0.112.4 gradio==4.26.* hqq==0.1.7.post3 jinja2==3.1.4 @@ -14,6 +15,7 @@ pandas peft==0.12.* Pillow>=9.5.0 psutil +pydantic==2.8.2 pyyaml requests rich diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 807c182a..fe2d796c 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -2,6 +2,7 @@ accelerate==0.33.* colorama datasets einops +fastapi==0.112.4 gradio==4.26.* hqq==0.1.7.post3 jinja2==3.1.4 @@ -14,6 +15,7 @@ pandas peft==0.12.* Pillow>=9.5.0 psutil +pydantic==2.8.2 pyyaml requests rich diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index e2a89936..982a12a1 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -2,6 +2,7 @@ accelerate==0.33.* colorama datasets einops +fastapi==0.112.4 gradio==4.26.* hqq==0.1.7.post3 jinja2==3.1.4 @@ -14,6 +15,7 @@ pandas peft==0.12.* Pillow>=9.5.0 psutil +pydantic==2.8.2 pyyaml requests rich diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index d22eb72c..fc253f5c 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -5,6 +5,7 @@ bitsandbytes==0.43.* colorama datasets einops +fastapi==0.112.4 gradio==4.26.* hqq==0.1.7.post3 jinja2==3.1.4 @@ -17,6 +18,7 @@ pandas peft==0.12.* Pillow>=9.5.0 psutil +pydantic==2.8.2 pyyaml requests rich @@ -69,4 +71,4 @@ https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310 https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" \ No newline at end of file +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index ffb45fe3..ec18464d 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -2,6 +2,7 @@ accelerate==0.33.* colorama datasets einops +fastapi==0.112.4 gradio==4.26.* hqq==0.1.7.post3 jinja2==3.1.4 @@ -14,6 +15,7 @@ pandas peft==0.12.* Pillow>=9.5.0 psutil +pydantic==2.8.2 pyyaml requests rich From c497a3237229db0de4a4704e829d75a45973c91f Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 26 Sep 2024 11:55:51 -0700 Subject: [PATCH 04/41] Bump transformers to 4.45 --- requirements.txt | 2 +- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index 7164ac80..913c2463 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,7 +26,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.44.* +transformers==4.45.* tqdm wandb diff --git a/requirements_amd.txt b/requirements_amd.txt index 20ba9c79..b9d74b69 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -23,7 +23,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.44.* +transformers==4.45.* tqdm wandb diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index e5c18681..ccacab29 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -23,7 +23,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.44.* +transformers==4.45.* tqdm wandb diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index e60937c7..dc1757e2 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -23,7 +23,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.44.* +transformers==4.45.* tqdm wandb diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index e1b726bb..a913a63c 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -23,7 +23,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.44.* +transformers==4.45.* tqdm wandb diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index b0480c43..b0dbc91a 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -23,7 +23,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.44.* +transformers==4.45.* tqdm wandb diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index a64d68a7..828d0c97 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -23,7 +23,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.44.* +transformers==4.45.* tqdm wandb diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index cae98bb2..458c1410 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -26,7 +26,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.44.* +transformers==4.45.* tqdm wandb diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index ec18464d..e7cbc29c 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -23,7 +23,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.44.* +transformers==4.45.* tqdm wandb From 7424f789bf699c9b2efc874da2b84168bb9e7a6f Mon Sep 17 00:00:00 2001 From: oobabooga Date: Fri, 27 Sep 2024 19:03:25 -0300 Subject: [PATCH 05/41] Fix the sampling monkey patch (and add more options to sampler_priority) (#6411) --- modules/presets.py | 2 +- modules/sampler_hijack.py | 192 +++++++++++++++++++++++--------------- 2 files changed, 116 insertions(+), 78 deletions(-) diff --git a/modules/presets.py b/modules/presets.py index b00e829e..14c5a777 100644 --- a/modules/presets.py +++ b/modules/presets.py @@ -44,7 +44,7 @@ def default_preset(): 'dry_base': 1.75, 'dry_allowed_length': 2, 'dry_sequence_breakers': '"\\n", ":", "\\"", "*"', - 'sampler_priority': 'temperature\ndynamic_temperature\nquadratic_sampling\ntop_k\ntop_p\ntypical_p\nepsilon_cutoff\neta_cutoff\ntfs\ntop_a\nmin_p\nmirostat' + 'sampler_priority': 'repetition_penalty\npresence_penalty\nfrequency_penalty\ntemperature\ndynamic_temperature\nquadratic_sampling\ntop_k\ntop_p\ntypical_p\nepsilon_cutoff\neta_cutoff\ntfs\ntop_a\nmin_p\nmirostat\nencoder_repetition_penalty\nno_repeat_ngram' } diff --git a/modules/sampler_hijack.py b/modules/sampler_hijack.py index 9fb661ae..9e865bb3 100644 --- a/modules/sampler_hijack.py +++ b/modules/sampler_hijack.py @@ -323,62 +323,141 @@ class SpyLogitsWarper(LogitsWarper): class RepetitionPenaltyLogitsProcessorWithRange(LogitsProcessor): - ''' - Copied from the transformers library - ''' - - def __init__(self, penalty: float, presence_penalty: float, frequency_penalty: float, _range: int): + def __init__(self, penalty: float, _range: int): if not (penalty > 0): raise ValueError(f"`penalty` has to be strictly positive, but is {penalty}") - self.penalty = penalty - self.presence_penalty = presence_penalty - self.frequency_penalty = frequency_penalty self._range = _range + def apply_repetition_penalty(self, input_ids_row, scores_row): + unique_ids = torch.unique(input_ids_row) + score = torch.gather(scores_row, 0, unique_ids) + + # Apply multiplicative repetition penalty + score = torch.where(score < 0, score * self.penalty, score / self.penalty) + scores_row.scatter_(0, unique_ids, score) + return scores_row + def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor: input_ids = input_ids[:, -self._range:] - - # We loop here because torch.unique() needs to process each row separately in the - # case that batch_size > 1. for input_ids_row, scores_row in zip(input_ids, scores): - unique_ids, counts = torch.unique(input_ids_row, return_counts=True) - score = torch.gather(scores_row, 0, unique_ids) - - # multiplicative repetition penalty - # if score < 0 then repetition penalty has to be multiplied to reduce the previous token probability - score = torch.where(score < 0, score * self.penalty, score / self.penalty) - scores_row.scatter_(0, unique_ids, score) - - # presence_penalty and frequency_penalty - raw_presence_penalty = (counts > 0).to(scores.dtype) - raw_frequency_penalty = counts.to(scores.dtype) - additive_penalty = raw_presence_penalty * self.presence_penalty + raw_frequency_penalty * self.frequency_penalty - scores_row.scatter_add_(0, unique_ids, -additive_penalty) + scores_row = self.apply_repetition_penalty(input_ids_row, scores_row) return scores -def get_logits_warper_patch(self, generation_config, **kwargs): +class PresencePenaltyLogitsProcessor(LogitsProcessor): + def __init__(self, presence_penalty: float, _range: int): + self.presence_penalty = presence_penalty + self._range = _range + + def apply_presence_penalty(self, input_ids_row, scores_row): + unique_ids, counts = torch.unique(input_ids_row, return_counts=True) + + # Apply presence penalty + raw_presence_penalty = (counts > 0).to(scores_row.dtype) + presence_penalty = raw_presence_penalty * self.presence_penalty + scores_row.scatter_add_(0, unique_ids, -presence_penalty) + return scores_row + + def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor: + input_ids = input_ids[:, -self._range:] + for input_ids_row, scores_row in zip(input_ids, scores): + scores_row = self.apply_presence_penalty(input_ids_row, scores_row) + return scores + + +class FrequencyPenaltyLogitsProcessor(LogitsProcessor): + def __init__(self, frequency_penalty: float, _range: int): + self.frequency_penalty = frequency_penalty + self._range = _range + + def apply_frequency_penalty(self, input_ids_row, scores_row): + unique_ids, counts = torch.unique(input_ids_row, return_counts=True) + + # Apply frequency penalty + raw_frequency_penalty = counts.to(scores_row.dtype) + frequency_penalty = raw_frequency_penalty * self.frequency_penalty + scores_row.scatter_add_(0, unique_ids, -frequency_penalty) + return scores_row + + def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor: + input_ids = input_ids[:, -self._range:] + for input_ids_row, scores_row in zip(input_ids, scores): + scores_row = self.apply_frequency_penalty(input_ids_row, scores_row) + return scores + + +def get_logits_processor_patch(self, **kwargs): + generation_config = kwargs['generation_config'] # Parameter sanitization if isinstance(generation_config.temperature, int): generation_config.temperature = float(generation_config.temperature) # Must be float # Get the original warpers - warpers = self._get_logits_warper_old(generation_config, **kwargs) + warpers = self._get_logits_processor_old(**kwargs) - # Replace temperature with our modified class. - # Currently, it behaves identically to the original. - for i in range(len(warpers)): + for i in range(len(warpers) - 1, -1, -1): + # Replace temperature with our modified class. if warpers[i].__class__.__name__ == 'TemperatureLogitsWarper': warpers[i] = TemperatureLogitsWarperCustom( generation_config.temperature, ) + # Stuff we don't need + elif warpers[i].__class__.__name__ in ['SuppressTokensLogitsProcessor', 'RepetitionPenaltyLogitsProcessor']: + del warpers[i] + # Add custom warpers warpers_to_add = LogitsProcessorList() min_tokens_to_keep = 2 if generation_config.num_beams > 1 else 1 + + if generation_config.repetition_penalty is not None and generation_config.repetition_penalty != 1.0: + warpers_to_add.append( + RepetitionPenaltyLogitsProcessorWithRange( + penalty=generation_config.repetition_penalty, + _range=generation_config.repetition_penalty_range + ) + ) + + if generation_config.presence_penalty is not None and generation_config.presence_penalty != 0.0: + warpers_to_add.append( + PresencePenaltyLogitsProcessor( + presence_penalty=generation_config.presence_penalty, + _range=generation_config.repetition_penalty_range + ) + ) + + if generation_config.frequency_penalty is not None and generation_config.frequency_penalty != 0.0: + warpers_to_add.append( + FrequencyPenaltyLogitsProcessor( + frequency_penalty=generation_config.frequency_penalty, + _range=generation_config.repetition_penalty_range + ) + ) + + if generation_config.dry_multiplier is not None and generation_config.dry_multiplier > 0.0: + dry_sequence_breakers = generation_config.dry_sequence_breakers + + # Support both JSON array notation and comma-separated strings. + if not dry_sequence_breakers.startswith("["): + dry_sequence_breakers = "[" + dry_sequence_breakers + "]" + + sequence_breaker_strings = json.loads(dry_sequence_breakers) + # Prefix with 'a' to get the correct encoding of the token at the end of a text. + sequence_breakers = {shared.tokenizer.encode(f'a{s}')[-1] for s in sequence_breaker_strings} + + warpers.append( + DRYLogitsProcessor( + multiplier=generation_config.dry_multiplier, + base=generation_config.dry_base, + allowed_length=generation_config.dry_allowed_length, + sequence_breakers=sequence_breakers, + _range=generation_config.repetition_penalty_range, + ) + ) + if generation_config.tfs is not None and 0.0 <= generation_config.tfs < 1.0: warpers_to_add.append( TailFreeLogitsWarper( @@ -454,7 +533,12 @@ def get_logits_warper_patch(self, generation_config, **kwargs): 'TopALogitsWarper': 'top_a', 'TopKLogitsWarper': 'top_k', 'TopPLogitsWarper': 'top_p', - 'TypicalLogitsWarper': 'typical_p' + 'TypicalLogitsWarper': 'typical_p', + 'RepetitionPenaltyLogitsProcessorWithRange': 'repetition_penalty', + 'PresencePenaltyLogitsProcessor': 'presence_penalty', + 'FrequencyPenaltyLogitsProcessor': 'frequency_penalty', + 'EncoderRepetitionPenaltyLogitsProcessor': 'encoder_repetition_penalty', + 'NoRepeatNGramLogitsProcessor': 'no_repeat_ngram', } def custom_sort_key(obj): @@ -482,49 +566,6 @@ def get_logits_warper_patch(self, generation_config, **kwargs): return warpers -def get_logits_processor_patch(self, **kwargs): - generation_config = kwargs['generation_config'] - - do_rep_pen_hijack = (generation_config.repetition_penalty > 1) or (generation_config.presence_penalty != 0) or (generation_config.frequency_penalty != 0) - if do_rep_pen_hijack: - generation_config.repetition_penalty = 1.1 # Set to value > 1 to ensure RepetitionPenaltyLogitsProcessor is created - - result = self._get_logits_processor_old(**kwargs) - - if do_rep_pen_hijack: - for i in range(len(result)): - if result[i].__class__.__name__ == 'RepetitionPenaltyLogitsProcessor': - result[i] = RepetitionPenaltyLogitsProcessorWithRange( - generation_config.repetition_penalty, - generation_config.presence_penalty, - generation_config.frequency_penalty, - generation_config.repetition_penalty_range - ) - - if generation_config.dry_multiplier is not None and generation_config.dry_multiplier > 0.0: - dry_sequence_breakers = generation_config.dry_sequence_breakers - - # Support both JSON array notation and comma-separated strings. - if not dry_sequence_breakers.startswith("["): - dry_sequence_breakers = "[" + dry_sequence_breakers + "]" - - sequence_breaker_strings = json.loads(dry_sequence_breakers) - # Prefix with 'a' to get the correct encoding of the token at the end of a text. - sequence_breakers = {shared.tokenizer.encode(f'a{s}')[-1] for s in sequence_breaker_strings} - - result.append( - DRYLogitsProcessor( - multiplier=generation_config.dry_multiplier, - base=generation_config.dry_base, - allowed_length=generation_config.dry_allowed_length, - sequence_breakers=sequence_breakers, - _range=generation_config.repetition_penalty_range, - ) - ) - - return result - - def generation_config_init_patch(self, **kwargs): self.__init___old(**kwargs) self.min_p = kwargs.pop("min_p", 0.0) @@ -547,13 +588,10 @@ def generation_config_init_patch(self, **kwargs): self.dry_allowed_length = kwargs.pop("dry_allowed_length", 2) self.dry_sequence_breakers = kwargs.pop("dry_sequence_breakers", '"\\n", ":", "\\"", "*"') self.temperature_last = kwargs.pop("temperature_last", False) - self.sampler_priority = kwargs.pop("sampler_priority", ['temperature', 'dynamic_temperature', 'quadratic_sampling', 'top_k', 'top_p', 'typical_p', 'epsilon_cutoff', 'eta_cutoff', 'tfs', 'top_a', 'min_p', 'mirostat']) + self.sampler_priority = kwargs.pop("sampler_priority", ['repetition_penalty', 'presence_penalty', 'frequency_penalty', 'temperature', 'dynamic_temperature', 'quadratic_sampling', 'top_k', 'top_p', 'typical_p', 'epsilon_cutoff', 'eta_cutoff', 'tfs', 'top_a', 'min_p', 'mirostat', 'encoder_repetition_penalty', 'no_repeat_ngram']) def hijack_samplers(): - transformers.GenerationMixin._get_logits_warper_old = transformers.GenerationMixin._get_logits_warper - transformers.GenerationMixin._get_logits_warper = get_logits_warper_patch - transformers.GenerationMixin._get_logits_processor_old = transformers.GenerationMixin._get_logits_processor transformers.GenerationMixin._get_logits_processor = get_logits_processor_patch From c5f048e912ffbb1c7fd4b72609a67bec8ad07b5c Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 27 Sep 2024 15:04:08 -0700 Subject: [PATCH 06/41] Bump ExLlamaV2 to 0.2.2 --- requirements.txt | 10 +++++----- requirements_amd.txt | 6 +++--- requirements_amd_noavx2.txt | 6 +++--- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_noavx2.txt | 10 +++++----- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/requirements.txt b/requirements.txt index 913c2463..f0360d53 100644 --- a/requirements.txt +++ b/requirements.txt @@ -55,11 +55,11 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/te https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.90+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels -https://github.com/oobabooga/exllamav2/releases/download/v0.2.0/exllamav2-0.2.0+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.0/exllamav2-0.2.0+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.0/exllamav2-0.2.0+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.0/exllamav2-0.2.0+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.0/exllamav2-0.2.0-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_amd.txt b/requirements_amd.txt index b9d74b69..d6bf29e6 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -42,9 +42,9 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp # AMD wheels https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.90+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.90+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.0/exllamav2-0.2.0+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.0/exllamav2-0.2.0+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.0/exllamav2-0.2.0-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index ccacab29..5ce468c3 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -40,9 +40,9 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.90+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels -https://github.com/oobabooga/exllamav2/releases/download/v0.2.0/exllamav2-0.2.0+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.0/exllamav2-0.2.0+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.0/exllamav2-0.2.0-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index dc1757e2..12786ade 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -38,4 +38,4 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/me https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.90-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.90-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.90-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.0/exllamav2-0.2.0-py3-none-any.whl +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2-py3-none-any.whl diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index a913a63c..901c9f10 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -40,4 +40,4 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/me https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.90-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.90-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.90-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.0/exllamav2-0.2.0-py3-none-any.whl +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2-py3-none-any.whl diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 458c1410..87e1bd7f 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -55,11 +55,11 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/te https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.90+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels -https://github.com/oobabooga/exllamav2/releases/download/v0.2.0/exllamav2-0.2.0+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.0/exllamav2-0.2.0+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.0/exllamav2-0.2.0+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.0/exllamav2-0.2.0+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.0/exllamav2-0.2.0-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" From 78b87054004a18f788d4a52d72a8ca9358aca0e9 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 27 Sep 2024 15:06:31 -0700 Subject: [PATCH 07/41] Bump llama-cpp-python to 0.3.0 (except for AMD) --- requirements.txt | 24 ++++++++++++------------ requirements_amd.txt | 8 ++++---- requirements_amd_noavx2.txt | 8 ++++---- requirements_apple_intel.txt | 8 ++++---- requirements_apple_silicon.txt | 12 ++++++------ requirements_cpu_only.txt | 8 ++++---- requirements_cpu_only_noavx2.txt | 8 ++++---- requirements_noavx2.txt | 24 ++++++++++++------------ 8 files changed, 50 insertions(+), 50 deletions(-) diff --git a/requirements.txt b/requirements.txt index f0360d53..1041025c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -37,22 +37,22 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.90+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.90+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.90+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.90+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.90+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.90+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.90+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.90+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.0+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.0+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.90+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.90+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.90+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.90+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.0+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.0+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements_amd.txt b/requirements_amd.txt index d6bf29e6..b224ce14 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -34,10 +34,10 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.90+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.90+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.90+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.90+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.90+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 5ce468c3..3cdb8a7a 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -34,10 +34,10 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.90+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.90+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.90+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.90+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 12786ade..fd91464d 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -34,8 +34,8 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.90-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.90-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.90-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.90-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.0-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.0-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.0-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.0-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2-py3-none-any.whl diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 901c9f10..8a50c8b4 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -34,10 +34,10 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.90-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.90-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.90-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.90-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.90-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.90-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.0-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.0-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.0-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.0-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.0-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.0-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2-py3-none-any.whl diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index b0dbc91a..4e89b6ad 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -34,7 +34,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.90+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.90+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.90+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.90+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 828d0c97..2c00ccf5 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -34,7 +34,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.90+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.90+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.90+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.90+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 87e1bd7f..69d880ac 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -37,22 +37,22 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.90+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.90+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.90+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.90+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.90+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.90+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.90+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.90+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.0+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.0+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.0+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.0+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.90+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.90+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.90+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.90+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.0+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.0+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.0+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.0+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" From 5c918c5b2d870f41a5215cfd5c45c652ad6d9b3a Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 27 Sep 2024 15:40:48 -0700 Subject: [PATCH 08/41] Make it possible to sort DRY --- modules/presets.py | 2 +- modules/sampler_hijack.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/presets.py b/modules/presets.py index 14c5a777..06e2be70 100644 --- a/modules/presets.py +++ b/modules/presets.py @@ -44,7 +44,7 @@ def default_preset(): 'dry_base': 1.75, 'dry_allowed_length': 2, 'dry_sequence_breakers': '"\\n", ":", "\\"", "*"', - 'sampler_priority': 'repetition_penalty\npresence_penalty\nfrequency_penalty\ntemperature\ndynamic_temperature\nquadratic_sampling\ntop_k\ntop_p\ntypical_p\nepsilon_cutoff\neta_cutoff\ntfs\ntop_a\nmin_p\nmirostat\nencoder_repetition_penalty\nno_repeat_ngram' + 'sampler_priority': 'repetition_penalty\npresence_penalty\nfrequency_penalty\ndry\ntemperature\ndynamic_temperature\nquadratic_sampling\ntop_k\ntop_p\ntypical_p\nepsilon_cutoff\neta_cutoff\ntfs\ntop_a\nmin_p\nmirostat\nencoder_repetition_penalty\nno_repeat_ngram' } diff --git a/modules/sampler_hijack.py b/modules/sampler_hijack.py index 9e865bb3..65503c12 100644 --- a/modules/sampler_hijack.py +++ b/modules/sampler_hijack.py @@ -537,6 +537,7 @@ def get_logits_processor_patch(self, **kwargs): 'RepetitionPenaltyLogitsProcessorWithRange': 'repetition_penalty', 'PresencePenaltyLogitsProcessor': 'presence_penalty', 'FrequencyPenaltyLogitsProcessor': 'frequency_penalty', + 'DRYLogitsProcessor': 'dry', 'EncoderRepetitionPenaltyLogitsProcessor': 'encoder_repetition_penalty', 'NoRepeatNGramLogitsProcessor': 'no_repeat_ngram', } @@ -588,7 +589,7 @@ def generation_config_init_patch(self, **kwargs): self.dry_allowed_length = kwargs.pop("dry_allowed_length", 2) self.dry_sequence_breakers = kwargs.pop("dry_sequence_breakers", '"\\n", ":", "\\"", "*"') self.temperature_last = kwargs.pop("temperature_last", False) - self.sampler_priority = kwargs.pop("sampler_priority", ['repetition_penalty', 'presence_penalty', 'frequency_penalty', 'temperature', 'dynamic_temperature', 'quadratic_sampling', 'top_k', 'top_p', 'typical_p', 'epsilon_cutoff', 'eta_cutoff', 'tfs', 'top_a', 'min_p', 'mirostat', 'encoder_repetition_penalty', 'no_repeat_ngram']) + self.sampler_priority = kwargs.pop("sampler_priority", ['repetition_penalty', 'presence_penalty', 'frequency_penalty', 'dry', 'temperature', 'dynamic_temperature', 'quadratic_sampling', 'top_k', 'top_p', 'typical_p', 'epsilon_cutoff', 'eta_cutoff', 'tfs', 'top_a', 'min_p', 'mirostat', 'encoder_repetition_penalty', 'no_repeat_ngram']) def hijack_samplers(): From 626b0a043702c9586478d9969184718d24ebf935 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thireus=20=E2=98=A0?= Date: Fri, 27 Sep 2024 23:47:04 +0100 Subject: [PATCH 09/41] Force /bin/bash shell for conda (#6386) --- one_click.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/one_click.py b/one_click.py index 0a0412ba..d35ecc89 100644 --- a/one_click.py +++ b/one_click.py @@ -190,7 +190,7 @@ def run_cmd(cmd, assert_success=False, environment=False, capture_output=False, cmd = f'. "{conda_sh_path}" && conda activate "{conda_env_path}" && {cmd}' # Run shell commands - result = subprocess.run(cmd, shell=True, capture_output=capture_output, env=env) + result = subprocess.run(cmd, shell=True, executable='/bin/bash', capture_output=capture_output, env=env) # Assert the command ran successfully if assert_success and result.returncode != 0: From 3492e33fd50bcc78a81850fdcc995e097a79b8ef Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 27 Sep 2024 16:59:30 -0700 Subject: [PATCH 10/41] Bump bitsandbytes to 0.44 --- requirements.txt | 2 +- requirements_noavx2.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 1041025c..54bd414a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ accelerate==0.33.* aqlm[gpu,cpu]==1.1.6; platform_system == "Linux" auto-gptq==0.7.1 -bitsandbytes==0.43.* +bitsandbytes==0.44.* colorama datasets einops diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 69d880ac..39bfb0bd 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -1,7 +1,7 @@ accelerate==0.33.* aqlm[gpu,cpu]==1.1.6; platform_system == "Linux" auto-gptq==0.7.1 -bitsandbytes==0.43.* +bitsandbytes==0.44.* colorama datasets einops From 301375834e55fcda6fda96332cad187aab2c6f77 Mon Sep 17 00:00:00 2001 From: Philipp Emanuel Weidmann Date: Sat, 28 Sep 2024 07:20:12 +0530 Subject: [PATCH 11/41] =?UTF-8?q?Exclude=20Top=20Choices=20(XTC):=20A=20sa?= =?UTF-8?q?mpler=20that=20boosts=20creativity,=20breaks=20writing=20clich?= =?UTF-8?q?=C3=A9s,=20and=20inhibits=20non-verbatim=20repetition=20(#6335)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- extensions/openai/typing.py | 2 ++ modules/loaders.py | 6 ++++ modules/presets.py | 4 ++- modules/sampler_hijack.py | 61 ++++++++++++++++++++++++++++++++++++- modules/text_generation.py | 2 +- modules/ui.py | 2 ++ modules/ui_parameters.py | 4 +++ 7 files changed, 78 insertions(+), 3 deletions(-) diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py index 4015f6a1..f63c1f39 100644 --- a/extensions/openai/typing.py +++ b/extensions/openai/typing.py @@ -37,6 +37,8 @@ class GenerationOptions(BaseModel): dry_base: float = 1.75 dry_allowed_length: int = 2 dry_sequence_breakers: str = '"\\n", ":", "\\"", "*"' + xtc_threshold: float = 0.1 + xtc_probability: float = 0 truncation_length: int = 0 max_tokens_second: int = 0 prompt_lookup_num_tokens: int = 0 diff --git a/modules/loaders.py b/modules/loaders.py index 549de5fb..16a3106e 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -168,6 +168,8 @@ def transformers_samplers(): 'dry_base', 'dry_allowed_length', 'dry_sequence_breakers', + 'xtc_threshold', + 'xtc_probability', 'seed', 'do_sample', 'penalty_alpha', @@ -242,6 +244,8 @@ loaders_samplers = { 'dry_base', 'dry_allowed_length', 'dry_sequence_breakers', + 'xtc_threshold', + 'xtc_probability', 'seed', 'do_sample', 'mirostat_mode', @@ -304,6 +308,8 @@ loaders_samplers = { 'dry_base', 'dry_allowed_length', 'dry_sequence_breakers', + 'xtc_threshold', + 'xtc_probability', 'seed', 'do_sample', 'mirostat_mode', diff --git a/modules/presets.py b/modules/presets.py index 06e2be70..c8118fb3 100644 --- a/modules/presets.py +++ b/modules/presets.py @@ -44,7 +44,9 @@ def default_preset(): 'dry_base': 1.75, 'dry_allowed_length': 2, 'dry_sequence_breakers': '"\\n", ":", "\\"", "*"', - 'sampler_priority': 'repetition_penalty\npresence_penalty\nfrequency_penalty\ndry\ntemperature\ndynamic_temperature\nquadratic_sampling\ntop_k\ntop_p\ntypical_p\nepsilon_cutoff\neta_cutoff\ntfs\ntop_a\nmin_p\nmirostat\nencoder_repetition_penalty\nno_repeat_ngram' + 'xtc_threshold': 0.1, + 'xtc_probability': 0, + 'sampler_priority': 'repetition_penalty\npresence_penalty\nfrequency_penalty\ndry\ntemperature\ndynamic_temperature\nquadratic_sampling\ntop_k\ntop_p\ntypical_p\nepsilon_cutoff\neta_cutoff\ntfs\ntop_a\nmin_p\nmirostat\nxtc\nencoder_repetition_penalty\nno_repeat_ngram' } diff --git a/modules/sampler_hijack.py b/modules/sampler_hijack.py index 65503c12..6d92978e 100644 --- a/modules/sampler_hijack.py +++ b/modules/sampler_hijack.py @@ -1,6 +1,7 @@ import json import math import pprint +import random import torch import transformers @@ -191,6 +192,53 @@ class TopALogitsWarper(LogitsWarper): return scores +# Exclude Top Choices (XTC) +class XTCLogitsWarper(LogitsWarper): + def __init__(self, threshold: float, probability: float, filter_value: float = -float("Inf")): + self.threshold = threshold + self.probability = probability + self.filter_value = filter_value + self.special_token_ids = [ + shared.tokenizer.encode("\n")[-1], + ] + + if shared.tokenizer.eos_token_id is not None: + self.special_token_ids.append(shared.tokenizer.eos_token_id) + + def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor: + # `random` returns values in the half-open range [0, 1), so setting `probability` + # to 0 means the sampler never takes action, while setting it to 1 means the sampler + # always takes action. + # + # Note that while XTC is most intuitively described as "if multiple tokens meet + # the threshold, then with probability...", reversing the two conditions is logically + # equivalent, and improves performance because processing can immediately be stopped + # if the random check fails. + if random.random() >= self.probability: + return scores + + sorted_logits, sorted_indices = torch.sort(scores, descending=True) + probs = sorted_logits.softmax(dim=-1) + + sorted_indices_to_remove = torch.full_like(probs, False, dtype=torch.bool) + + # This operation sets exactly those indices to `True` for which the next index has + # probability above the threshold. Since `probs` is sorted, those are the indices + # of all tokens that meet the threshold, *except* the least probable one. + sorted_indices_to_remove[..., :-1] = probs[..., 1:] >= self.threshold + + # Convert sorted_indices_to_remove to the original indices + indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove) + + # If newline or EOS tokens would be removed, return the original scores + if indices_to_remove[:, self.special_token_ids].any(): + return scores + + # Otherwise, remove tokens with the mask + scores = scores.masked_fill(indices_to_remove, self.filter_value) + return scores + + class DRYLogitsProcessor(LogitsProcessor): def __init__(self, multiplier: float, base: float, allowed_length: int, sequence_breakers: set[int], _range: int): self.multiplier = multiplier @@ -474,6 +522,14 @@ def get_logits_processor_patch(self, **kwargs): ) ) + if generation_config.xtc_probability is not None and generation_config.xtc_probability > 0: + warpers_to_add.append( + XTCLogitsWarper( + threshold=generation_config.xtc_threshold, + probability=generation_config.xtc_probability, + ) + ) + if generation_config.dynamic_temperature: warpers_to_add.append( DynamicTemperatureLogitsWarper( @@ -534,6 +590,7 @@ def get_logits_processor_patch(self, **kwargs): 'TopKLogitsWarper': 'top_k', 'TopPLogitsWarper': 'top_p', 'TypicalLogitsWarper': 'typical_p', + 'XTCLogitsWarper': 'xtc', 'RepetitionPenaltyLogitsProcessorWithRange': 'repetition_penalty', 'PresencePenaltyLogitsProcessor': 'presence_penalty', 'FrequencyPenaltyLogitsProcessor': 'frequency_penalty', @@ -588,8 +645,10 @@ def generation_config_init_patch(self, **kwargs): self.dry_base = kwargs.pop("dry_base", 1.75) self.dry_allowed_length = kwargs.pop("dry_allowed_length", 2) self.dry_sequence_breakers = kwargs.pop("dry_sequence_breakers", '"\\n", ":", "\\"", "*"') + self.xtc_threshold = kwargs.pop("xtc_threshold", 0.1) + self.xtc_probability = kwargs.pop("xtc_probability", 0) self.temperature_last = kwargs.pop("temperature_last", False) - self.sampler_priority = kwargs.pop("sampler_priority", ['repetition_penalty', 'presence_penalty', 'frequency_penalty', 'dry', 'temperature', 'dynamic_temperature', 'quadratic_sampling', 'top_k', 'top_p', 'typical_p', 'epsilon_cutoff', 'eta_cutoff', 'tfs', 'top_a', 'min_p', 'mirostat', 'encoder_repetition_penalty', 'no_repeat_ngram']) + self.sampler_priority = kwargs.pop("sampler_priority", ['repetition_penalty', 'presence_penalty', 'frequency_penalty', 'dry', 'temperature', 'dynamic_temperature', 'quadratic_sampling', 'top_k', 'top_p', 'typical_p', 'epsilon_cutoff', 'eta_cutoff', 'tfs', 'top_a', 'min_p', 'mirostat', 'xtc', 'encoder_repetition_penalty', 'no_repeat_ngram']) def hijack_samplers(): diff --git a/modules/text_generation.py b/modules/text_generation.py index e7a2b43f..654eff89 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -289,7 +289,7 @@ def get_reply_from_output_ids(output_ids, state=None, starting_from=0): def generate_reply_HF(question, original_question, seed, state, stopping_strings=None, is_chat=False): generate_params = {} - for k in ['max_new_tokens', 'temperature', 'temperature_last', 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'smoothing_factor', 'smoothing_curve', 'top_p', 'min_p', 'top_k', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'typical_p', 'tfs', 'top_a', 'guidance_scale', 'penalty_alpha', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'do_sample', 'encoder_repetition_penalty', 'no_repeat_ngram_size', 'dry_multiplier', 'dry_base', 'dry_allowed_length', 'dry_sequence_breakers']: + for k in ['max_new_tokens', 'temperature', 'temperature_last', 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'smoothing_factor', 'smoothing_curve', 'top_p', 'min_p', 'top_k', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'typical_p', 'tfs', 'top_a', 'guidance_scale', 'penalty_alpha', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'do_sample', 'encoder_repetition_penalty', 'no_repeat_ngram_size', 'dry_multiplier', 'dry_base', 'dry_allowed_length', 'dry_sequence_breakers', 'xtc_threshold', 'xtc_probability']: if k in state: generate_params[k] = state[k] diff --git a/modules/ui.py b/modules/ui.py index 47f92cf0..76b1c009 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -158,6 +158,8 @@ def list_interface_input_elements(): 'dry_base', 'dry_allowed_length', 'dry_sequence_breakers', + 'xtc_threshold', + 'xtc_probability', 'do_sample', 'penalty_alpha', 'mirostat_mode', diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py index eff62c20..a2665e0d 100644 --- a/modules/ui_parameters.py +++ b/modules/ui_parameters.py @@ -45,6 +45,10 @@ def create_ui(default_preset): shared.gradio['dry_base'] = gr.Slider(1, 4, value=generate_params['dry_base'], step=0.01, label='dry_base', info='Controls how fast the penalty grows with increasing sequence length.') shared.gradio['dry_sequence_breakers'] = gr.Textbox(value=generate_params['dry_sequence_breakers'], label='dry_sequence_breakers', info='Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.') + with gr.Blocks(): + shared.gradio['xtc_threshold'] = gr.Slider(0, 0.5, value=generate_params['xtc_threshold'], step=0.01, label='xtc_threshold', info='If 2 or more tokens have probability above this threshold, consider removing all but the last one.') + shared.gradio['xtc_probability'] = gr.Slider(0, 1, value=generate_params['xtc_probability'], step=0.01, label='xtc_probability', info='Probability that the removal will actually happen. 0 disables the sampler. 1 makes it always happen.') + gr.Markdown("[Learn more](https://github.com/oobabooga/text-generation-webui/wiki/03-%E2%80%90-Parameters-Tab)") with gr.Column(): From 46996f6519d483841a49c8a857104a6bf6e2ac7a Mon Sep 17 00:00:00 2001 From: RandoInternetPreson Date: Fri, 27 Sep 2024 23:26:03 -0400 Subject: [PATCH 12/41] ExllamaV2 tensor parallelism to increase multi gpu inference speeds (#6356) --- modules/exllamav2.py | 26 ++++++++++++++++++-------- modules/exllamav2_hf.py | 28 +++++++++++++++++++--------- modules/shared.py | 1 + 3 files changed, 38 insertions(+), 17 deletions(-) diff --git a/modules/exllamav2.py b/modules/exllamav2.py index a770e342..42b9ade1 100644 --- a/modules/exllamav2.py +++ b/modules/exllamav2.py @@ -7,6 +7,7 @@ from exllamav2 import ( ExLlamaV2Cache, ExLlamaV2Cache_8bit, ExLlamaV2Cache_Q4, + ExLlamaV2Cache_TP, ExLlamaV2Config, ExLlamaV2Tokenizer ) @@ -54,21 +55,30 @@ class Exllamav2Model: model = ExLlamaV2(config) - if not shared.args.autosplit: - split = None - if shared.args.gpu_split: - split = [float(alloc) for alloc in shared.args.gpu_split.split(",")] + split = None + if shared.args.gpu_split: + split = [float(alloc) for alloc in shared.args.gpu_split.split(",")] + if shared.args.enable_tp: + model.load_tp(split) + elif not shared.args.autosplit: model.load(split) + # Determine the correct cache type if shared.args.cache_8bit: - cache = ExLlamaV2Cache_8bit(model, lazy=shared.args.autosplit) + cache_type = ExLlamaV2Cache_8bit elif shared.args.cache_4bit: - cache = ExLlamaV2Cache_Q4(model, lazy=shared.args.autosplit) + cache_type = ExLlamaV2Cache_Q4 else: - cache = ExLlamaV2Cache(model, lazy=shared.args.autosplit) + cache_type = ExLlamaV2Cache - if shared.args.autosplit: + # Use TP if specified + if shared.args.enable_tp: + cache = ExLlamaV2Cache_TP(model, base=cache_type) + else: + cache = cache_type(model, lazy=shared.args.autosplit) + + if shared.args.autosplit and not shared.args.enable_tp: model.load_autosplit(cache) tokenizer = ExLlamaV2Tokenizer(config) diff --git a/modules/exllamav2_hf.py b/modules/exllamav2_hf.py index 53143d9a..febb2c64 100644 --- a/modules/exllamav2_hf.py +++ b/modules/exllamav2_hf.py @@ -9,6 +9,7 @@ from exllamav2 import ( ExLlamaV2Cache, ExLlamaV2Cache_8bit, ExLlamaV2Cache_Q4, + ExLlamaV2Cache_TP, ExLlamaV2Config ) from torch.nn import CrossEntropyLoss @@ -42,21 +43,30 @@ class Exllamav2HF(PreTrainedModel): self.ex_model = ExLlamaV2(config) - if not shared.args.autosplit: - split = None - if shared.args.gpu_split: - split = [float(alloc) for alloc in shared.args.gpu_split.split(",")] + split = None + if shared.args.gpu_split: + split = [float(alloc) for alloc in shared.args.gpu_split.split(",")] - self.ex_model.load(split) + if shared.args.enable_tp: + model.load_tp(split) + elif not shared.args.autosplit: + model.load(split) + # Determine the correct cache type if shared.args.cache_8bit: - self.ex_cache = ExLlamaV2Cache_8bit(self.ex_model, lazy=shared.args.autosplit) + cache_type = ExLlamaV2Cache_8bit elif shared.args.cache_4bit: - self.ex_cache = ExLlamaV2Cache_Q4(self.ex_model, lazy=shared.args.autosplit) + cache_type = ExLlamaV2Cache_Q4 else: - self.ex_cache = ExLlamaV2Cache(self.ex_model, lazy=shared.args.autosplit) + cache_type = ExLlamaV2Cache - if shared.args.autosplit: + # Use TP if specified + if shared.args.enable_tp: + self.ex_cache = ExLlamaV2Cache_TP(self.ex_model, base=cache_type) + else: + self.ex_cache = cache_type(self.ex_model, lazy=shared.args.autosplit) + + if shared.args.autosplit and not shared.args.enable_tp: self.ex_model.load_autosplit(self.ex_cache) self.past_seq = None diff --git a/modules/shared.py b/modules/shared.py index 43533a14..894ed6fe 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -146,6 +146,7 @@ group.add_argument('--no_sdpa', action='store_true', help='Force Torch SDPA to n group.add_argument('--cache_8bit', action='store_true', help='Use 8-bit cache to save VRAM.') group.add_argument('--cache_4bit', action='store_true', help='Use Q4 cache to save VRAM.') group.add_argument('--num_experts_per_token', type=int, default=2, help='Number of experts to use for generation. Applies to MoE models like Mixtral.') +group.add_argument('--enable_tp', action='store_true', help='Enable Tensor Parallelism (TP) in ExLlamaV2.') # AutoGPTQ group = parser.add_argument_group('AutoGPTQ') From 7276dca933626eef587ac0adf53ab8ea06c2e9ac Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 27 Sep 2024 20:26:36 -0700 Subject: [PATCH 13/41] Fix a typo --- modules/exllamav2_hf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/exllamav2_hf.py b/modules/exllamav2_hf.py index febb2c64..96a89429 100644 --- a/modules/exllamav2_hf.py +++ b/modules/exllamav2_hf.py @@ -48,9 +48,9 @@ class Exllamav2HF(PreTrainedModel): split = [float(alloc) for alloc in shared.args.gpu_split.split(",")] if shared.args.enable_tp: - model.load_tp(split) + self.ex_model.load_tp(split) elif not shared.args.autosplit: - model.load(split) + self.ex_model.load(split) # Determine the correct cache type if shared.args.cache_8bit: From ca5a2dba72817def91b616e9571d8490b9b64a13 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 28 Sep 2024 09:39:37 -0700 Subject: [PATCH 14/41] Bump rocm to 6.1.2 --- README.md | 2 +- one_click.py | 6 +++--- requirements_amd.txt | 8 ++++---- requirements_amd_noavx2.txt | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 22ceeeb8..4449639d 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,7 @@ conda activate textgen |--------|---------|---------| | Linux/WSL | NVIDIA | `pip3 install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2 --index-url https://download.pytorch.org/whl/cu121` | | Linux/WSL | CPU only | `pip3 install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2 --index-url https://download.pytorch.org/whl/cpu` | -| Linux | AMD | `pip3 install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2 --index-url https://download.pytorch.org/whl/rocm5.6` | +| Linux | AMD | `pip3 install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2 --index-url https://download.pytorch.org/whl/rocm6.1` | | MacOS + MPS | Any | `pip3 install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2` | | Windows | NVIDIA | `pip3 install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2 --index-url https://download.pytorch.org/whl/cu121` | | Windows | CPU only | `pip3 install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2` | diff --git a/one_click.py b/one_click.py index d35ecc89..9b813873 100644 --- a/one_click.py +++ b/one_click.py @@ -117,7 +117,7 @@ def update_pytorch(): elif is_cuda: install_pytorch += "--index-url https://download.pytorch.org/whl/cu121" elif is_rocm: - install_pytorch += "--index-url https://download.pytorch.org/whl/rocm5.6" + install_pytorch += "--index-url https://download.pytorch.org/whl/rocm6.1" elif is_cpu: install_pytorch += "--index-url https://download.pytorch.org/whl/cpu" elif is_intel: @@ -239,7 +239,7 @@ def install_webui(): "What is your GPU?", { 'A': 'NVIDIA', - 'B': 'AMD (Linux/MacOS only. Requires ROCm SDK 5.6 on Linux)', + 'B': 'AMD (Linux/MacOS only. Requires ROCm SDK 6.1 on Linux)', 'C': 'Apple M Series', 'D': 'Intel Arc (IPEX)', 'N': 'None (I want to run models in CPU mode)' @@ -294,7 +294,7 @@ def install_webui(): else: install_pytorch += "--index-url https://download.pytorch.org/whl/cu121" elif selected_gpu == "AMD": - install_pytorch += "--index-url https://download.pytorch.org/whl/rocm5.6" + install_pytorch += "--index-url https://download.pytorch.org/whl/rocm6.1" elif selected_gpu in ["APPLE", "NONE"]: install_pytorch += "--index-url https://download.pytorch.org/whl/cpu" elif selected_gpu == "INTEL": diff --git a/requirements_amd.txt b/requirements_amd.txt index b224ce14..9f46ed4f 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -40,10 +40,10 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.90+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.90+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.0+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.0+rocm6.1.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm6.1.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm6.1.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 3cdb8a7a..9c5fb5f2 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -40,8 +40,8 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm6.1.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm6.1.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" From 85994e3ef0f75b408ef5b9a6995902eb600be970 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 28 Sep 2024 09:44:08 -0700 Subject: [PATCH 15/41] Bump pytorch to 2.4.1 --- README.md | 14 +++++++------- one_click.py | 6 +++--- requirements.txt | 16 ++++++++-------- requirements_amd.txt | 4 ++-- requirements_amd_noavx2.txt | 4 ++-- requirements_noavx2.txt | 16 ++++++++-------- 6 files changed, 30 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index 4449639d..350dd090 100644 --- a/README.md +++ b/README.md @@ -80,12 +80,12 @@ conda activate textgen | System | GPU | Command | |--------|---------|---------| -| Linux/WSL | NVIDIA | `pip3 install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2 --index-url https://download.pytorch.org/whl/cu121` | -| Linux/WSL | CPU only | `pip3 install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2 --index-url https://download.pytorch.org/whl/cpu` | -| Linux | AMD | `pip3 install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2 --index-url https://download.pytorch.org/whl/rocm6.1` | -| MacOS + MPS | Any | `pip3 install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2` | -| Windows | NVIDIA | `pip3 install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2 --index-url https://download.pytorch.org/whl/cu121` | -| Windows | CPU only | `pip3 install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2` | +| Linux/WSL | NVIDIA | `pip3 install torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1 --index-url https://download.pytorch.org/whl/cu121` | +| Linux/WSL | CPU only | `pip3 install torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1 --index-url https://download.pytorch.org/whl/cpu` | +| Linux | AMD | `pip3 install torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1 --index-url https://download.pytorch.org/whl/rocm6.1` | +| MacOS + MPS | Any | `pip3 install torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1` | +| Windows | NVIDIA | `pip3 install torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1 --index-url https://download.pytorch.org/whl/cu121` | +| Windows | CPU only | `pip3 install torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1` | The up-to-date commands can be found here: https://pytorch.org/get-started/locally/. @@ -150,7 +150,7 @@ Then browse to 1) For Kepler GPUs and older, you will need to install CUDA 11.8 instead of 12: ``` -pip3 install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2 --index-url https://download.pytorch.org/whl/cu118 +pip3 install torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1 --index-url https://download.pytorch.org/whl/cu118 conda install -y -c "nvidia/label/cuda-11.8.0" cuda-runtime ``` diff --git a/one_click.py b/one_click.py index 9b813873..0f9c703d 100644 --- a/one_click.py +++ b/one_click.py @@ -16,9 +16,9 @@ import sys # Define the required PyTorch version -TORCH_VERSION = "2.2.2" -TORCHVISION_VERSION = "0.17.2" -TORCHAUDIO_VERSION = "2.2.2" +TORCH_VERSION = "2.4.1" +TORCHVISION_VERSION = "0.19.1" +TORCHAUDIO_VERSION = "2.4.1" # Environment script_dir = os.getcwd() diff --git a/requirements.txt b/requirements.txt index 54bd414a..25b4b49d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -55,15 +55,15 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/te https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.4.1-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" -https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_amd.txt b/requirements_amd.txt index 9f46ed4f..cd17a600 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -42,8 +42,8 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp # AMD wheels https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.0+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.0+rocm6.1.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm6.1.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm6.1.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 9c5fb5f2..dd9bd893 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -40,8 +40,8 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm6.1.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm6.1.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 39bfb0bd..ec0bbb1d 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -55,15 +55,15 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/te https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.0+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.4.1-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" -https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" From 1a870b3ea7f0b3337a0c0202fe4b4dddb4e559a0 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 28 Sep 2024 19:38:56 -0700 Subject: [PATCH 16/41] Remove AutoAWQ and AutoGPTQ from requirements (no wheels available) --- requirements.txt | 9 --------- requirements_amd.txt | 4 ---- requirements_amd_noavx2.txt | 4 ---- requirements_noavx2.txt | 9 --------- 4 files changed, 26 deletions(-) diff --git a/requirements.txt b/requirements.txt index 25b4b49d..f42247a6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,5 @@ accelerate==0.33.* aqlm[gpu,cpu]==1.1.6; platform_system == "Linux" -auto-gptq==0.7.1 bitsandbytes==0.44.* colorama datasets @@ -64,11 +63,3 @@ https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_amd.txt b/requirements_amd.txt index cd17a600..88fdcad7 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -45,7 +45,3 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/ro https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" -https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index dd9bd893..0ad440e3 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -43,7 +43,3 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" -https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index ec0bbb1d..61981ca5 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -1,6 +1,5 @@ accelerate==0.33.* aqlm[gpu,cpu]==1.1.6; platform_system == "Linux" -auto-gptq==0.7.1 bitsandbytes==0.44.* colorama datasets @@ -64,11 +63,3 @@ https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" From 65e58640846d65200364b4a65a7975ce48eb4618 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 28 Sep 2024 20:25:26 -0700 Subject: [PATCH 17/41] Update README --- README.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 350dd090..fdd9c46c 100644 --- a/README.md +++ b/README.md @@ -10,17 +10,17 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. ## Features -* Multiple backends for text generation in a single UI and API, including [Transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp) (through [llama-cpp-python](https://github.com/abetlen/llama-cpp-python)), [ExLlamaV2](https://github.com/turboderp/exllamav2), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). [AutoAWQ](https://github.com/casper-hansen/AutoAWQ), [HQQ](https://github.com/mobiusml/hqq), and [AQLM](https://github.com/Vahe1994/AQLM) are also supported through the Transformers loader. -* OpenAI-compatible API server with Chat and Completions endpoints – see the [examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples). -* Automatic prompt formatting for each model using the Jinja2 template in its metadata. -* Three chat modes: `instruct`, `chat-instruct`, and `chat`, allowing for both instruction-following and casual conversations with characters. `chat-instruct` mode automatically applies the model's template to the chat prompt, ensuring high-quality outputs without manual setup. -* "Past chats" menu to quickly switch between conversations and start new ones. -* Free-form generation in the Default/Notebook tabs without being limited to chat turns. Send formatted chat conversations from the Chat tab to these tabs. -* Multiple sampling parameters and generation options for sophisticated text generation control. -* Easy switching between different models through the UI without restarting, using the "Model" tab. -* Simple LoRA fine-tuning tool to customize models with your data. -* All in one folder. The requirements are installed in a self-contained `installer_files` folder that doesn't interfere with the system's environment. -* Extensions support, including numerous built-in and user-contributed extensions. See [the wiki](https://github.com/oobabooga/text-generation-webui/wiki/07-%E2%80%90-Extensions) and [the extensions directory](https://github.com/oobabooga/text-generation-webui-extensions) for details. +- Supports multiple text generation backends in one UI/API, including [Transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp), [ExLlamaV2](https://github.com/turboderp/exllamav2), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). Also supports [AutoAWQ](https://github.com/casper-hansen/AutoAWQ), [HQQ](https://github.com/mobiusml/hqq), and [AQLM](https://github.com/Vahe1994/AQLM) through the Transformers loader. +- OpenAI-compatible API with Chat and Completions endpoints – see [examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples). +- Automatic prompt formatting using Jinja2 templates. +- Three chat modes: `instruct`, `chat-instruct`, and `chat`, with auto-prompt templates in `chat-instruct`. +- "Past chats" menu for easy conversation switching. +- Free-form text generation in the Default/Notebook tabs without being limited to chat turns. You can send formatted conversations from the Chat tab to these. +- Multiple sampling parameters and generation options for sophisticated text generation control. +- Switch models easily in the UI without restarting. +- Simple LoRA fine-tuning tool. +- Requirements installed in a self-contained `installer_files` directory that doesn't interfere with the system environment. +- Extension support, with numerous built-in and user-contributed extensions available. See the [wiki](https://github.com/oobabooga/text-generation-webui/wiki/07-%E2%80%90-Extensions) and [extensions directory](https://github.com/oobabooga/text-generation-webui-extensions) for details. ## How to install From b92d7fd43e6f39410d4fc486e6168160b449aacc Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 28 Sep 2024 20:30:24 -0700 Subject: [PATCH 18/41] Add warnings for when AutoGPTQ, TensorRT-LLM, or HQQ are missing --- modules/models.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/modules/models.py b/modules/models.py index b0e2346e..3e8ae3f2 100644 --- a/modules/models.py +++ b/modules/models.py @@ -70,11 +70,11 @@ def load_model(model_name, loader=None): shared.model_name = model_name load_func_map = { 'Transformers': huggingface_loader, - 'AutoGPTQ': AutoGPTQ_loader, 'llama.cpp': llamacpp_loader, 'llamacpp_HF': llamacpp_HF_loader, 'ExLlamav2': ExLlamav2_loader, 'ExLlamav2_HF': ExLlamav2_HF_loader, + 'AutoGPTQ': AutoGPTQ_loader, 'HQQ': HQQ_loader, 'TensorRT-LLM': TensorRT_LLM_loader, } @@ -302,12 +302,6 @@ def llamacpp_HF_loader(model_name): return model -def AutoGPTQ_loader(model_name): - import modules.AutoGPTQ_loader - - return modules.AutoGPTQ_loader.load_quantized(model_name) - - def ExLlamav2_loader(model_name): from modules.exllamav2 import Exllamav2Model @@ -321,9 +315,21 @@ def ExLlamav2_HF_loader(model_name): return Exllamav2HF.from_pretrained(model_name) +def AutoGPTQ_loader(model_name): + try: + import modules.AutoGPTQ_loader + except ModuleNotFoundError: + raise ModuleNotFoundError("Failed to import 'autogptq'. Please install it manually following the instructions in the AutoGPTQ GitHub repository.") + + return modules.AutoGPTQ_loader.load_quantized(model_name) + + def HQQ_loader(model_name): - from hqq.core.quantize import HQQBackend, HQQLinear - from hqq.models.hf.base import AutoHQQHFModel + try: + from hqq.core.quantize import HQQBackend, HQQLinear + from hqq.models.hf.base import AutoHQQHFModel + except ModuleNotFoundError: + raise ModuleNotFoundError("Failed to import 'hqq'. Please install it manually following the instructions in the HQQ GitHub repository.") logger.info(f"Loading HQQ model with backend: \"{shared.args.hqq_backend}\"") @@ -334,7 +340,10 @@ def HQQ_loader(model_name): def TensorRT_LLM_loader(model_name): - from modules.tensorrt_llm import TensorRTLLMModel + try: + from modules.tensorrt_llm import TensorRTLLMModel + except ModuleNotFoundError: + raise ModuleNotFoundError("Failed to import 'tensorrt_llm'. Please install it manually following the instructions in the TensorRT-LLM GitHub repository.") model = TensorRTLLMModel.from_pretrained(model_name) return model From c61b29b9ce143e778fc717accd70e0bbabede720 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 28 Sep 2024 20:33:17 -0700 Subject: [PATCH 19/41] Simplify the warning when flash-attn fails to import --- modules/exllamav2.py | 8 -------- modules/exllamav2_hf.py | 8 -------- 2 files changed, 16 deletions(-) diff --git a/modules/exllamav2.py b/modules/exllamav2.py index 42b9ade1..0498c488 100644 --- a/modules/exllamav2.py +++ b/modules/exllamav2.py @@ -19,14 +19,6 @@ from modules.text_generation import get_max_prompt_length try: import flash_attn -except ModuleNotFoundError: - logger.warning( - 'You are running ExLlamaV2 without flash-attention. This will cause the VRAM usage ' - 'to be a lot higher than it could be.\n' - 'Try installing flash-attention following the instructions here: ' - 'https://github.com/Dao-AILab/flash-attention#installation-and-features' - ) - pass except Exception: logger.warning('Failed to load flash-attention due to the following error:\n') traceback.print_exc() diff --git a/modules/exllamav2_hf.py b/modules/exllamav2_hf.py index 96a89429..320a8d24 100644 --- a/modules/exllamav2_hf.py +++ b/modules/exllamav2_hf.py @@ -21,14 +21,6 @@ from modules.logging_colors import logger try: import flash_attn -except ModuleNotFoundError: - logger.warning( - 'You are running ExLlamaV2 without flash-attention. This will cause the VRAM usage ' - 'to be a lot higher than it could be.\n' - 'Try installing flash-attention following the instructions here: ' - 'https://github.com/Dao-AILab/flash-attention#installation-and-features' - ) - pass except Exception: logger.warning('Failed to load flash-attention due to the following error:\n') traceback.print_exc() From 3b99532e0268e7c516e58c1c83931c6843114cc2 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 28 Sep 2024 20:34:59 -0700 Subject: [PATCH 20/41] Remove HQQ and AQLM from requirements --- requirements.txt | 2 -- requirements_amd.txt | 1 - requirements_amd_noavx2.txt | 1 - requirements_apple_intel.txt | 1 - requirements_apple_silicon.txt | 1 - requirements_cpu_only.txt | 1 - requirements_cpu_only_noavx2.txt | 1 - requirements_noavx2.txt | 2 -- requirements_nowheels.txt | 1 - 9 files changed, 11 deletions(-) diff --git a/requirements.txt b/requirements.txt index f42247a6..d3531b9c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,10 @@ accelerate==0.33.* -aqlm[gpu,cpu]==1.1.6; platform_system == "Linux" bitsandbytes==0.44.* colorama datasets einops fastapi==0.112.4 gradio==4.26.* -hqq==0.1.7.post3 jinja2==3.1.4 lm_eval==0.3.0 markdown diff --git a/requirements_amd.txt b/requirements_amd.txt index 88fdcad7..1be85205 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -4,7 +4,6 @@ datasets einops fastapi==0.112.4 gradio==4.26.* -hqq==0.1.7.post3 jinja2==3.1.4 lm_eval==0.3.0 markdown diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 0ad440e3..5026fa6d 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -4,7 +4,6 @@ datasets einops fastapi==0.112.4 gradio==4.26.* -hqq==0.1.7.post3 jinja2==3.1.4 lm_eval==0.3.0 markdown diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index fd91464d..eee8112d 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -4,7 +4,6 @@ datasets einops fastapi==0.112.4 gradio==4.26.* -hqq==0.1.7.post3 jinja2==3.1.4 lm_eval==0.3.0 markdown diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 8a50c8b4..fa244a93 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -4,7 +4,6 @@ datasets einops fastapi==0.112.4 gradio==4.26.* -hqq==0.1.7.post3 jinja2==3.1.4 lm_eval==0.3.0 markdown diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 4e89b6ad..4c970b91 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -4,7 +4,6 @@ datasets einops fastapi==0.112.4 gradio==4.26.* -hqq==0.1.7.post3 jinja2==3.1.4 lm_eval==0.3.0 markdown diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 2c00ccf5..4b22cb86 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -4,7 +4,6 @@ datasets einops fastapi==0.112.4 gradio==4.26.* -hqq==0.1.7.post3 jinja2==3.1.4 lm_eval==0.3.0 markdown diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 61981ca5..fe6adb46 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -1,12 +1,10 @@ accelerate==0.33.* -aqlm[gpu,cpu]==1.1.6; platform_system == "Linux" bitsandbytes==0.44.* colorama datasets einops fastapi==0.112.4 gradio==4.26.* -hqq==0.1.7.post3 jinja2==3.1.4 lm_eval==0.3.0 markdown diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index e7cbc29c..5c840f56 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -4,7 +4,6 @@ datasets einops fastapi==0.112.4 gradio==4.26.* -hqq==0.1.7.post3 jinja2==3.1.4 lm_eval==0.3.0 markdown From 3fb02f43f648c7735f7ed8b340c3d36d344aa7b6 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 28 Sep 2024 20:38:43 -0700 Subject: [PATCH 21/41] Update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fdd9c46c..59cfb5fc 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. ## Features -- Supports multiple text generation backends in one UI/API, including [Transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp), [ExLlamaV2](https://github.com/turboderp/exllamav2), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). Also supports [AutoAWQ](https://github.com/casper-hansen/AutoAWQ), [HQQ](https://github.com/mobiusml/hqq), and [AQLM](https://github.com/Vahe1994/AQLM) through the Transformers loader. +- Supports multiple text generation backends in one UI/API, including [Transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp), and [ExLlamaV2](https://github.com/turboderp/exllamav2).[TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), [AutoAWQ](https://github.com/casper-hansen/AutoAWQ), [HQQ](https://github.com/mobiusml/hqq), and [AQLM](https://github.com/Vahe1994/AQLM) are also supported but you need to install them manually. - OpenAI-compatible API with Chat and Completions endpoints – see [examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples). - Automatic prompt formatting using Jinja2 templates. - Three chat modes: `instruct`, `chat-instruct`, and `chat`, with auto-prompt templates in `chat-instruct`. From 3f0571b62bb020e19c122d0cd30d5c5df4aa2917 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 28 Sep 2024 20:48:30 -0700 Subject: [PATCH 22/41] Update README --- README.md | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 59cfb5fc..954d7a7d 100644 --- a/README.md +++ b/README.md @@ -10,33 +10,29 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. ## Features -- Supports multiple text generation backends in one UI/API, including [Transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp), and [ExLlamaV2](https://github.com/turboderp/exllamav2).[TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), [AutoAWQ](https://github.com/casper-hansen/AutoAWQ), [HQQ](https://github.com/mobiusml/hqq), and [AQLM](https://github.com/Vahe1994/AQLM) are also supported but you need to install them manually. +- Supports multiple text generation backends in one UI/API, including [Transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp), and [ExLlamaV2](https://github.com/turboderp/exllamav2). [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), [AutoAWQ](https://github.com/casper-hansen/AutoAWQ), [HQQ](https://github.com/mobiusml/hqq), and [AQLM](https://github.com/Vahe1994/AQLM) are also supported but you need to install them manually. - OpenAI-compatible API with Chat and Completions endpoints – see [examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples). - Automatic prompt formatting using Jinja2 templates. -- Three chat modes: `instruct`, `chat-instruct`, and `chat`, with auto-prompt templates in `chat-instruct`. -- "Past chats" menu for easy conversation switching. +- Three chat modes: `instruct`, `chat-instruct`, and `chat`, with automatic prompt templates in `chat-instruct`. +- "Past chats" menu to easily switch between conversations. - Free-form text generation in the Default/Notebook tabs without being limited to chat turns. You can send formatted conversations from the Chat tab to these. - Multiple sampling parameters and generation options for sophisticated text generation control. -- Switch models easily in the UI without restarting. +- Switch between different models easily in the UI without restarting. - Simple LoRA fine-tuning tool. - Requirements installed in a self-contained `installer_files` directory that doesn't interfere with the system environment. - Extension support, with numerous built-in and user-contributed extensions available. See the [wiki](https://github.com/oobabooga/text-generation-webui/wiki/07-%E2%80%90-Extensions) and [extensions directory](https://github.com/oobabooga/text-generation-webui-extensions) for details. ## How to install -1) Clone or [download](https://github.com/oobabooga/text-generation-webui/archive/refs/heads/main.zip) the repository. -2) Run the `start_linux.sh`, `start_windows.bat`, `start_macos.sh`, or `start_wsl.bat` script depending on your OS. +1) Clone or [download the repository](https://github.com/oobabooga/text-generation-webui/archive/refs/heads/main.zip). +2) Run the script that matches your OS: `start_linux.sh`, `start_windows.bat`, `start_macos.sh`, or `start_wsl.bat`. 3) Select your GPU vendor when asked. 4) Once the installation ends, browse to `http://localhost:7860`. 5) Have fun! -To restart the web UI in the future, run the `start_` script again. +To restart the web UI later, just run the same `start_` script. If you need to reinstall, delete the `installer_files` folder created during setup and run the script again. -This script creates an `installer_files` folder where it sets up the project's requirements. If you need to reinstall the requirements, just delete that folder and start the web UI again. - -The script accepts command-line flags, such as `./start_linux.sh --help`. Alternatively, you can edit the `CMD_FLAGS.txt` file with a text editor and add your flags there, such as `--api` in case you need to use the API. - -To get updates in the future, run `update_wizard_linux.sh`, `update_wizard_windows.bat`, `update_wizard_macos.sh`, or `update_wizard_wsl.bat`. +You can use command-line flags, like `./start_linux.sh --help`, or add them to `CMD_FLAGS.txt` (such as `--api` to enable API use). To update the project, run `update_wizard_linux.sh`, `update_wizard_windows.bat`, `update_wizard_macos.sh`, or `update_wizard_wsl.bat`.
From 57160cd6fa37c8367df3c674256af970b31e0add Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 28 Sep 2024 20:50:41 -0700 Subject: [PATCH 23/41] Update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 954d7a7d..fb22d099 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. - OpenAI-compatible API with Chat and Completions endpoints – see [examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples). - Automatic prompt formatting using Jinja2 templates. - Three chat modes: `instruct`, `chat-instruct`, and `chat`, with automatic prompt templates in `chat-instruct`. -- "Past chats" menu to easily switch between conversations. +- "Past chats" menu to quickly switch between conversations. - Free-form text generation in the Default/Notebook tabs without being limited to chat turns. You can send formatted conversations from the Chat tab to these. - Multiple sampling parameters and generation options for sophisticated text generation control. - Switch between different models easily in the UI without restarting. From 055f3f5632f99f99409fd85d4e06c4c70acf8b33 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 28 Sep 2024 20:55:26 -0700 Subject: [PATCH 24/41] Fix after #6386 (thanks @Touch-Night) --- one_click.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/one_click.py b/one_click.py index 0f9c703d..9bad25d1 100644 --- a/one_click.py +++ b/one_click.py @@ -189,8 +189,11 @@ def run_cmd(cmd, assert_success=False, environment=False, capture_output=False, conda_sh_path = os.path.join(script_dir, "installer_files", "conda", "etc", "profile.d", "conda.sh") cmd = f'. "{conda_sh_path}" && conda activate "{conda_env_path}" && {cmd}' + # Set executable to None for Windows, /bin/bash for everything else + executable = None if is_windows() else '/bin/bash' + # Run shell commands - result = subprocess.run(cmd, shell=True, executable='/bin/bash', capture_output=capture_output, env=env) + result = subprocess.run(cmd, shell=True, capture_output=capture_output, env=env, executable=executable) # Assert the command ran successfully if assert_success and result.returncode != 0: From 0f90a1b50fde3bc102bef1a034942ebb4b155845 Mon Sep 17 00:00:00 2001 From: Manuel Schmid <9307310+mashb1t@users.noreply.github.com> Date: Sun, 29 Sep 2024 06:08:55 +0200 Subject: [PATCH 25/41] Do not set value for histories in chat when --multi-user is used (#6317) --- modules/chat.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index 00c4ffa9..b81cfea6 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -1059,7 +1059,12 @@ def handle_start_new_chat_click(state): convert_to_markdown.cache_clear() - return [history, html, gr.update(choices=histories, value=histories[0][1])] + if len(histories) > 0: + past_chats_update = gr.update(choices=histories, value=histories[0][1]) + else: + past_chats_update = gr.update(choices=histories) + + return [history, html, past_chats_update] def handle_delete_chat_confirm_click(state): @@ -1110,10 +1115,15 @@ def handle_upload_chat_history(load_chat_history, state): convert_to_markdown.cache_clear() + if len(histories) > 0: + past_chats_update = gr.update(choices=histories, value=histories[0][1]) + else: + past_chats_update = gr.update(choices=histories) + return [ history, html, - gr.update(choices=histories, value=histories[0][1]) + past_chats_update ] @@ -1132,6 +1142,11 @@ def handle_character_menu_change(state): convert_to_markdown.cache_clear() + if len(histories) > 0: + past_chats_update = gr.update(choices=histories, value=histories[0][1]) + else: + past_chats_update = gr.update(choices=histories) + return [ history, html, @@ -1140,7 +1155,7 @@ def handle_character_menu_change(state): picture, greeting, context, - gr.update(choices=histories, value=histories[0][1]), + past_chats_update, ] @@ -1151,12 +1166,17 @@ def handle_mode_change(state): convert_to_markdown.cache_clear() + if len(histories) > 0: + past_chats_update = gr.update(choices=histories, value=histories[0][1]) + else: + past_chats_update = gr.update(choices=histories) + return [ history, html, gr.update(visible=state['mode'] != 'instruct'), gr.update(visible=state['mode'] == 'chat-instruct'), - gr.update(choices=histories, value=histories[0][1]) + past_chats_update ] From e4b0467f9f4b054723528c93c4cda56a266f8687 Mon Sep 17 00:00:00 2001 From: Hanusz Leszek Date: Sun, 29 Sep 2024 06:14:19 +0200 Subject: [PATCH 26/41] Add beforeunload event to add confirmation dialog when leaving page (#6279) --- js/main.js | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/js/main.js b/js/main.js index 899bd8f0..71489f14 100644 --- a/js/main.js +++ b/js/main.js @@ -600,4 +600,15 @@ headerBar.addEventListener("click", (e) => { } }); +//------------------------------------------------ +// Add a confirmation dialog when leaving the page +// Useful to avoid data loss +//------------------------------------------------ +window.addEventListener('beforeunload', function (event) { + // Cancel the event + event.preventDefault(); + // Chrome requires returnValue to be set + event.returnValue = ''; +}); + moveToChatTab(); From 01362681f2f9e17b6e28ee99b5cf8551a5e62161 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 29 Sep 2024 07:42:44 -0700 Subject: [PATCH 27/41] Bump exllamav2 to 0.2.4 --- requirements.txt | 10 +++++----- requirements_amd.txt | 6 +++--- requirements_amd_noavx2.txt | 6 +++--- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_noavx2.txt | 10 +++++----- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/requirements.txt b/requirements.txt index d3531b9c..c7ba213f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -52,11 +52,11 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/te https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.4.1-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+cu121.torch2.4.1-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_amd.txt b/requirements_amd.txt index 1be85205..6ce5eb62 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -41,6 +41,6 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp # AMD wheels https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.0+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.0+rocm6.1.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 5026fa6d..81ad6b23 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -39,6 +39,6 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index eee8112d..6c3a696a 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -37,4 +37,4 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/me https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.0-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.0-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.0-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2-py3-none-any.whl +https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3-py3-none-any.whl diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index fa244a93..86ef7197 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -39,4 +39,4 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/me https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.0-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.0-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.0-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2-py3-none-any.whl +https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3-py3-none-any.whl diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index fe6adb46..516b0639 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -52,11 +52,11 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/te https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.0+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.4.1-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.2/exllamav2-0.2.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+cu121.torch2.4.1-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" From bbdeed3cf44d413eadb09a8f443b9d43ef0ce261 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 29 Sep 2024 20:45:27 -0700 Subject: [PATCH 28/41] Make sampler priority high if unspecified --- modules/sampler_hijack.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/modules/sampler_hijack.py b/modules/sampler_hijack.py index 6d92978e..87f0b25e 100644 --- a/modules/sampler_hijack.py +++ b/modules/sampler_hijack.py @@ -602,11 +602,10 @@ def get_logits_processor_patch(self, **kwargs): def custom_sort_key(obj): class_name = obj.__class__.__name__ - # Return a large value if class name is not mapped or if the mapped nickname is not in priority + # Return -1 if class_name is not mapped if class_name not in class_name_to_nickname or class_name_to_nickname[class_name] not in sampler_priority: - return float('inf') + return -1 - # Return the index of the nickname in the priority list for sorting return sampler_priority.index(class_name_to_nickname[class_name]) # Sort the list using the custom key function From 9ca0cd7749f734bd5e7083d672dee8d400229257 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 29 Sep 2024 20:47:04 -0700 Subject: [PATCH 29/41] Bump llama-cpp-python to 0.3.1 --- requirements.txt | 24 ++++++++++++------------ requirements_amd.txt | 12 ++++++------ requirements_amd_noavx2.txt | 8 ++++---- requirements_apple_intel.txt | 8 ++++---- requirements_apple_silicon.txt | 12 ++++++------ requirements_cpu_only.txt | 8 ++++---- requirements_cpu_only_noavx2.txt | 8 ++++---- requirements_noavx2.txt | 24 ++++++++++++------------ 8 files changed, 52 insertions(+), 52 deletions(-) diff --git a/requirements.txt b/requirements.txt index c7ba213f..5623f1f7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -34,22 +34,22 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.0+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.0+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.0+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.0+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements_amd.txt b/requirements_amd.txt index 6ce5eb62..d203c157 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -33,14 +33,14 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.0+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.0+rocm6.1.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.1+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.1+rocm6.1.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 81ad6b23..3a7a950d 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -33,10 +33,10 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 6c3a696a..61c5367f 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -33,8 +33,8 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.0-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.0-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.0-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.0-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3-py3-none-any.whl diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 86ef7197..494c127a 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -33,10 +33,10 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.0-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.0-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.0-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.0-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.0-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.0-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3-py3-none-any.whl diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 4c970b91..5838ac47 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -33,7 +33,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 4b22cb86..97d4eb90 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -33,7 +33,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 516b0639..d34a598d 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -34,22 +34,22 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.0+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.0+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.0+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.0+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.0+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.1+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.1+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.1+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.1+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.0+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.0+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.0+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.0+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.1+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.1+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.1+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.1+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" From 4d9ce586d31450d2ed692b1e7a72dbcdfd56e670 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 30 Sep 2024 14:04:21 -0700 Subject: [PATCH 30/41] Update llama_cpp_python_hijack.py, fix llamacpp_hf --- modules/llama_cpp_python_hijack.py | 19 ++++++++++++------- modules/llamacpp_hf.py | 3 ++- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/modules/llama_cpp_python_hijack.py b/modules/llama_cpp_python_hijack.py index 3d42b2d7..2a9c10da 100644 --- a/modules/llama_cpp_python_hijack.py +++ b/modules/llama_cpp_python_hijack.py @@ -2,12 +2,12 @@ import importlib import platform from typing import Sequence +import numpy as np from tqdm import tqdm from modules import shared from modules.cache_utils import process_llamacpp_cache - imported_module = None @@ -57,8 +57,6 @@ def eval_with_progress(self, tokens: Sequence[int]): with tqdm to show prompt processing progress. """ - assert self._ctx.ctx is not None - assert self._batch.batch is not None self._ctx.kv_cache_seq_rm(-1, self.n_tokens, -1) if len(tokens) > self.n_batch: @@ -80,13 +78,20 @@ def eval_with_progress(self, tokens: Sequence[int]): if self.context_params.logits_all: rows = n_tokens cols = self._n_vocab - logits = self._ctx.get_logits()[: rows * cols] - self.scores[n_past : n_past + n_tokens, :].reshape(-1)[: :] = logits + logits = np.ctypeslib.as_array( + self._ctx.get_logits(), shape=(rows * cols,) + ) + self.scores[n_past : n_past + n_tokens, :].reshape(-1)[::] = logits + self.last_updated_index = n_past + n_tokens - 1 else: rows = 1 cols = self._n_vocab - logits = self._ctx.get_logits()[: rows * cols] - self.scores[n_past + n_tokens - 1, :].reshape(-1)[: :] = logits + logits = np.ctypeslib.as_array( + self._ctx.get_logits(), shape=(rows * cols,) + ) + last_token_index = min(n_past + n_tokens - 1, self.scores.shape[0] - 1) + self.scores[last_token_index, :] = logits.reshape(-1) + self.last_updated_index = last_token_index # Update n_tokens self.n_tokens += n_tokens diff --git a/modules/llamacpp_hf.py b/modules/llamacpp_hf.py index 327e3a7b..6611a7c1 100644 --- a/modules/llamacpp_hf.py +++ b/modules/llamacpp_hf.py @@ -127,7 +127,7 @@ class LlamacppHF(PreTrainedModel): self.model.reset() self.model.eval(seq) - logits = torch.tensor(self.model.scores[self.model.n_tokens - 1, :]).view(1, 1, -1).to(input_ids.device) + logits = torch.tensor(self.model.scores[self.model.last_updated_index, :]).view(1, 1, -1).to(input_ids.device) else: self.model.reset() self.model.eval(seq) @@ -205,5 +205,6 @@ class LlamacppHF(PreTrainedModel): Llama = llama_cpp_lib().Llama model = Llama(**params) + model.last_updated_index = -1 return LlamacppHF(model, model_file) From 6063a6641444da58dc6f38626bbd639cd114eeb8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 30 Sep 2024 18:50:38 -0300 Subject: [PATCH 31/41] Update accelerate requirement from ==0.33.* to ==0.34.* (#6416) --- requirements.txt | 2 +- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index 5623f1f7..f623372f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -accelerate==0.33.* +accelerate==0.34.* bitsandbytes==0.44.* colorama datasets diff --git a/requirements_amd.txt b/requirements_amd.txt index d203c157..67e63632 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -1,4 +1,4 @@ -accelerate==0.33.* +accelerate==0.34.* colorama datasets einops diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 3a7a950d..9d1828d6 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -1,4 +1,4 @@ -accelerate==0.33.* +accelerate==0.34.* colorama datasets einops diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 61c5367f..7e279163 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -1,4 +1,4 @@ -accelerate==0.33.* +accelerate==0.34.* colorama datasets einops diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 494c127a..1f476657 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -1,4 +1,4 @@ -accelerate==0.33.* +accelerate==0.34.* colorama datasets einops diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 5838ac47..fe671920 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -1,4 +1,4 @@ -accelerate==0.33.* +accelerate==0.34.* colorama datasets einops diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 97d4eb90..32bfe9c5 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -1,4 +1,4 @@ -accelerate==0.33.* +accelerate==0.34.* colorama datasets einops diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index d34a598d..85f94eef 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -1,4 +1,4 @@ -accelerate==0.33.* +accelerate==0.34.* bitsandbytes==0.44.* colorama datasets diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index 5c840f56..5e566798 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -1,4 +1,4 @@ -accelerate==0.33.* +accelerate==0.34.* colorama datasets einops From 617cd7b70514e97a96c365bb5eda481594d48028 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 1 Oct 2024 09:06:25 -0700 Subject: [PATCH 32/41] Revert "Update accelerate requirement from ==0.33.* to ==0.34.* (#6416)" This reverts commit 6063a6641444da58dc6f38626bbd639cd114eeb8. --- requirements.txt | 2 +- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index f623372f..5623f1f7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -accelerate==0.34.* +accelerate==0.33.* bitsandbytes==0.44.* colorama datasets diff --git a/requirements_amd.txt b/requirements_amd.txt index 67e63632..d203c157 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -1,4 +1,4 @@ -accelerate==0.34.* +accelerate==0.33.* colorama datasets einops diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 9d1828d6..3a7a950d 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -1,4 +1,4 @@ -accelerate==0.34.* +accelerate==0.33.* colorama datasets einops diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 7e279163..61c5367f 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -1,4 +1,4 @@ -accelerate==0.34.* +accelerate==0.33.* colorama datasets einops diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 1f476657..494c127a 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -1,4 +1,4 @@ -accelerate==0.34.* +accelerate==0.33.* colorama datasets einops diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index fe671920..5838ac47 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -1,4 +1,4 @@ -accelerate==0.34.* +accelerate==0.33.* colorama datasets einops diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 32bfe9c5..97d4eb90 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -1,4 +1,4 @@ -accelerate==0.34.* +accelerate==0.33.* colorama datasets einops diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 85f94eef..d34a598d 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -1,4 +1,4 @@ -accelerate==0.34.* +accelerate==0.33.* bitsandbytes==0.44.* colorama datasets diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index 5e566798..5c840f56 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -1,4 +1,4 @@ -accelerate==0.34.* +accelerate==0.33.* colorama datasets einops From c6b50f88dab6435a0c27e5836b33f5b790c072f0 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 1 Oct 2024 10:19:28 -0700 Subject: [PATCH 33/41] Lint --- requirements.txt | 2 +- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6d939275..5623f1f7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -60,4 +60,4 @@ https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3- https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" \ No newline at end of file +https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_amd.txt b/requirements_amd.txt index 7741849b..d203c157 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -43,4 +43,4 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/ro https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.1+rocm6.1.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" \ No newline at end of file +https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index e4765488..3a7a950d 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -41,4 +41,4 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp # AMD wheels https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" \ No newline at end of file +https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index edeae78d..d34a598d 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -60,4 +60,4 @@ https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3- https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" \ No newline at end of file +https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" From cca9d6e22d6cb3e2909121f6d6077af36b2116f5 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 1 Oct 2024 10:21:06 -0700 Subject: [PATCH 34/41] Lint --- modules/text_generation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/text_generation.py b/modules/text_generation.py index 654eff89..86245098 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -274,10 +274,10 @@ def get_reply_from_output_ids(output_ids, state=None, starting_from=0): if (hasattr(shared.tokenizer, 'convert_ids_to_tokens') and len(output_ids) > starting_from) and not reply.startswith(' '): first_token = shared.tokenizer.convert_ids_to_tokens(int(output_ids[starting_from])) if isinstance(first_token, (bytes,)): - #try to decode the bytes to a string + # try to decode the bytes to a string + # if it fails, which means it's not a string in this turn, just ignore it try: first_token = first_token.decode('utf8') - #if it fails, which means it's not a string in this turn, just ignore it except UnicodeDecodeError: first_token = '' From d364aa0a3c0993b09d53966b53eae862f4deaae0 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 1 Oct 2024 10:22:57 -0700 Subject: [PATCH 35/41] Lint --- js/main.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/js/main.js b/js/main.js index 71489f14..3028afac 100644 --- a/js/main.js +++ b/js/main.js @@ -604,11 +604,11 @@ headerBar.addEventListener("click", (e) => { // Add a confirmation dialog when leaving the page // Useful to avoid data loss //------------------------------------------------ -window.addEventListener('beforeunload', function (event) { +window.addEventListener("beforeunload", function (event) { // Cancel the event event.preventDefault(); // Chrome requires returnValue to be set - event.returnValue = ''; + event.returnValue = ""; }); moveToChatTab(); From 93c250b9b6b7d625c9cd698f99400d4fe650779d Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 1 Oct 2024 11:16:15 -0700 Subject: [PATCH 36/41] Add a UI element for enable_tp --- modules/loaders.py | 2 ++ modules/ui.py | 1 + modules/ui_model_menu.py | 1 + 3 files changed, 4 insertions(+) diff --git a/modules/loaders.py b/modules/loaders.py index 16a3106e..deee00a7 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -90,6 +90,7 @@ loaders_and_params = OrderedDict({ 'cache_8bit', 'cache_4bit', 'autosplit', + 'enable_tp', 'alpha_value', 'compress_pos_emb', 'trust_remote_code', @@ -105,6 +106,7 @@ loaders_and_params = OrderedDict({ 'cache_8bit', 'cache_4bit', 'autosplit', + 'enable_tp', 'alpha_value', 'compress_pos_emb', 'exllamav2_info', diff --git a/modules/ui.py b/modules/ui.py index 76b1c009..c07beeb4 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -90,6 +90,7 @@ def list_model_elements(): 'cache_8bit', 'cache_4bit', 'autosplit', + 'enable_tp', 'threads', 'threads_batch', 'n_batch', diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 1883fdca..f87b680a 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -136,6 +136,7 @@ def create_ui(): shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk) shared.gradio['bf16'] = gr.Checkbox(label="bf16", value=shared.args.bf16) shared.gradio['autosplit'] = gr.Checkbox(label="autosplit", value=shared.args.autosplit, info='Automatically split the model tensors across the available GPUs.') + shared.gradio['enable_tp'] = gr.Checkbox(label="enable_tp", value=shared.args.enable_tp, info='Enable Tensor Parallelism (TP).') shared.gradio['no_flash_attn'] = gr.Checkbox(label="no_flash_attn", value=shared.args.no_flash_attn) shared.gradio['no_xformers'] = gr.Checkbox(label="no_xformers", value=shared.args.no_xformers) shared.gradio['no_sdpa'] = gr.Checkbox(label="no_sdpa", value=shared.args.no_sdpa) From 49dfa0adaf52bcff76b3ab9c33326812b281c2cf Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 1 Oct 2024 11:20:48 -0700 Subject: [PATCH 37/41] Fix the "save preset" event --- modules/ui_file_saving.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ui_file_saving.py b/modules/ui_file_saving.py index ac72c623..3a27e1b9 100644 --- a/modules/ui_file_saving.py +++ b/modules/ui_file_saving.py @@ -74,7 +74,7 @@ def handle_save_preset_confirm_click(filename, contents): try: utils.save_file(f"presets/{filename}.yaml", contents) available_presets = utils.get_available_presets() - output = gr.update(choices=available_presets, value=filename), + output = gr.update(choices=available_presets, value=filename) except Exception: output = gr.update() traceback.print_exc() From e1338a1804d51d4e74146f2490586985ba499ce0 Mon Sep 17 00:00:00 2001 From: SeanScripts <64337075+SeanScripts@users.noreply.github.com> Date: Tue, 1 Oct 2024 13:49:35 -0700 Subject: [PATCH 38/41] Add whisper turbo (#6423) --- extensions/whisper_stt/script.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/whisper_stt/script.py b/extensions/whisper_stt/script.py index e45c8b1e..d949e93f 100644 --- a/extensions/whisper_stt/script.py +++ b/extensions/whisper_stt/script.py @@ -96,7 +96,7 @@ def ui(): with gr.Accordion("Settings", open=False): auto_submit = gr.Checkbox(label='Submit the transcribed audio automatically', value=params['auto_submit']) device_dropd = gr.Dropdown(label='Device', value=str(startup_device), choices=["cuda", "cpu", "none"]) - whisper_model_dropd = gr.Dropdown(label='Whisper Model', value=params['whipser_model'], choices=["tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "large"]) + whisper_model_dropd = gr.Dropdown(label='Whisper Model', value=params['whipser_model'], choices=["tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "large", "turbo"]) whisper_language = gr.Dropdown(label='Whisper Language', value=params['whipser_language'], choices=["english", "chinese", "german", "spanish", "russian", "korean", "french", "japanese", "portuguese", "turkish", "polish", "catalan", "dutch", "arabic", "swedish", "italian", "indonesian", "hindi", "finnish", "vietnamese", "hebrew", "ukrainian", "greek", "malay", "czech", "romanian", "danish", "hungarian", "tamil", "norwegian", "thai", "urdu", "croatian", "bulgarian", "lithuanian", "latin", "maori", "malayalam", "welsh", "slovak", "telugu", "persian", "latvian", "bengali", "serbian", "azerbaijani", "slovenian", "kannada", "estonian", "macedonian", "breton", "basque", "icelandic", "armenian", "nepali", "mongolian", "bosnian", "kazakh", "albanian", "swahili", "galician", "marathi", "punjabi", "sinhala", "khmer", "shona", "yoruba", "somali", "afrikaans", "occitan", "georgian", "belarusian", "tajik", "sindhi", "gujarati", "amharic", "yiddish", "lao", "uzbek", "faroese", "haitian creole", "pashto", "turkmen", "nynorsk", "maltese", "sanskrit", "luxembourgish", "myanmar", "tibetan", "tagalog", "malagasy", "assamese", "tatar", "hawaiian", "lingala", "hausa", "bashkir", "javanese", "sundanese"]) audio.change( From 22baa5378f27de6427640a4ce3ba3a54bb3a9722 Mon Sep 17 00:00:00 2001 From: Luana Date: Thu, 3 Oct 2024 00:35:13 -0300 Subject: [PATCH 39/41] Fix for systems that have bash in a non-standard directory (#6428) --- one_click.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/one_click.py b/one_click.py index 9bad25d1..f1c91f47 100644 --- a/one_click.py +++ b/one_click.py @@ -189,8 +189,8 @@ def run_cmd(cmd, assert_success=False, environment=False, capture_output=False, conda_sh_path = os.path.join(script_dir, "installer_files", "conda", "etc", "profile.d", "conda.sh") cmd = f'. "{conda_sh_path}" && conda activate "{conda_env_path}" && {cmd}' - # Set executable to None for Windows, /bin/bash for everything else - executable = None if is_windows() else '/bin/bash' + # Set executable to None for Windows, bash for everything else + executable = None if is_windows() else 'bash' # Run shell commands result = subprocess.run(cmd, shell=True, capture_output=capture_output, env=env, executable=executable) From 9d8b1c5fd94d360da4ab55334442cfa64aaf40d0 Mon Sep 17 00:00:00 2001 From: Grzegorz Lippe Date: Sat, 5 Oct 2024 16:58:17 +0200 Subject: [PATCH 40/41] Fix intel bug described in #6253 (#6433) --- one_click.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/one_click.py b/one_click.py index f1c91f47..8fc1edf0 100644 --- a/one_click.py +++ b/one_click.py @@ -313,7 +313,7 @@ def install_webui(): if selected_gpu == "INTEL": # Install oneAPI dependencies via conda print_big_message("Installing Intel oneAPI runtime libraries.") - run_cmd("conda install -y -c intel dpcpp-cpp-rt=2024.0 mkl-dpcpp=2024.0") + run_cmd("conda install -y -c https://software.repos.intel.com/python/conda/ -c conda-forge dpcpp-cpp-rt=2024.0 mkl-dpcpp=2024.0") # Install libuv required by Intel-patched torch run_cmd("conda install -y libuv") @@ -329,7 +329,7 @@ def install_extensions_requirements(): print_big_message("Installing extensions requirements.\nSome of these may fail on Windows.\nDon\'t worry if you see error messages, as they will not affect the main program.") extensions = get_extensions_names() for i, extension in enumerate(extensions): - print(f"\n\n--- [{i+1}/{len(extensions)}]: {extension}\n\n") + print(f"\n\n--- [{i + 1}/{len(extensions)}]: {extension}\n\n") extension_req_path = os.path.join("extensions", extension, "requirements.txt") run_cmd(f"python -m pip install -r {extension_req_path} --upgrade", assert_success=False, environment=True) From 03a2e7005412efb7765ce3f0bfccd6b94e79a843 Mon Sep 17 00:00:00 2001 From: PIRI <34787507+ThisIsPIRI@users.noreply.github.com> Date: Wed, 9 Oct 2024 14:25:14 +0000 Subject: [PATCH 41/41] Fix temperature_last when temperature not in sampler priority (#6439) --- modules/sampler_hijack.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/modules/sampler_hijack.py b/modules/sampler_hijack.py index 87f0b25e..97951dd2 100644 --- a/modules/sampler_hijack.py +++ b/modules/sampler_hijack.py @@ -571,11 +571,10 @@ def get_logits_processor_patch(self, **kwargs): if generation_config.temperature_last: for param_name in ['temperature', 'dynamic_temperature', 'quadratic_sampling']: if param_name in sampler_priority: - if param_name in sampler_priority: - index = sampler_priority.index(param_name) - sampler_priority.append(sampler_priority.pop(index)) - else: - sampler_priority.append(param_name) + index = sampler_priority.index(param_name) + sampler_priority.append(sampler_priority.pop(index)) + else: + sampler_priority.append(param_name) class_name_to_nickname = { 'DynamicTemperatureLogitsWarper': 'dynamic_temperature',