Downloader: Make progress bars not jump around

Adapted from: https://gist.github.com/NiklasBeierl/13096bfdd8b2084da8c1163dd06f91d3
This commit is contained in:
oobabooga 2025-01-25 07:28:31 -08:00
parent 71a551a622
commit 3d4f3e423c

View File

@ -14,6 +14,7 @@ import json
import os
import re
import sys
from multiprocessing import Array
from pathlib import Path
from time import sleep
@ -27,9 +28,10 @@ base = os.environ.get("HF_ENDPOINT") or "https://huggingface.co"
class ModelDownloader:
def __init__(self, max_retries=5):
def __init__(self, max_retries=7):
self.max_retries = max_retries
self.session = self.get_session()
self._progress_bar_slots = None
def get_session(self):
session = requests.Session()
@ -186,12 +188,37 @@ class ModelDownloader:
output_folder = Path(base_folder) / output_folder
return output_folder
@property
def progress_bar_slots(self):
if self._progress_bar_slots is None:
raise RuntimeError("Progress bar slots not initialized. Start download threads first.")
return self._progress_bar_slots
def initialize_progress_bar_slots(self, num_threads):
self._progress_bar_slots = Array("B", [0] * num_threads)
def get_progress_bar_position(self):
with self.progress_bar_slots.get_lock():
for i in range(len(self.progress_bar_slots)):
if self.progress_bar_slots[i] == 0:
self.progress_bar_slots[i] = 1
return i
return 0 # fallback
def release_progress_bar_position(self, slot):
with self.progress_bar_slots.get_lock():
self.progress_bar_slots[slot] = 0
def get_single_file(self, url, output_folder, start_from_scratch=False):
filename = Path(url.rsplit('/', 1)[1])
output_path = output_folder / filename
progress_bar_position = self.get_progress_bar_position()
max_retries = 7
max_retries = self.max_retries
attempt = 0
try:
while attempt < max_retries:
attempt += 1
session = self.session
@ -222,7 +249,9 @@ class ModelDownloader:
'unit_scale': True,
'unit_divisor': 1024,
'bar_format': '{desc}{percentage:3.0f}%|{bar:50}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]',
'desc': f"{filename_str}: "
'desc': f"{filename_str}: ",
'position': progress_bar_position,
'leave': False
}
if 'COLAB_GPU' in os.environ:
@ -250,9 +279,21 @@ class ModelDownloader:
sleep(2 ** attempt)
else:
print("Failed to download after the maximum number of attempts.")
finally:
self.release_progress_bar_position(progress_bar_position)
def start_download_threads(self, file_list, output_folder, start_from_scratch=False, threads=4):
thread_map(lambda url: self.get_single_file(url, output_folder, start_from_scratch=start_from_scratch), file_list, max_workers=threads, disable=True)
self.initialize_progress_bar_slots(threads)
tqdm.tqdm.set_lock(tqdm.tqdm.get_lock())
try:
thread_map(
lambda url: self.get_single_file(url, output_folder, start_from_scratch=start_from_scratch),
file_list,
max_workers=threads,
disable=True
)
finally:
print(f"\nDownload of {len(file_list)} files to {output_folder} completed.")
def download_model_files(self, model, branch, links, sha256, output_folder, progress_bar=None, start_from_scratch=False, threads=4, specific_file=None, is_llamacpp=False):
self.progress_bar = progress_bar
@ -318,7 +359,7 @@ if __name__ == '__main__':
parser.add_argument('--model-dir', type=str, default=None, help='Save the model files to a subfolder of this folder instead of the default one (text-generation-webui/models).')
parser.add_argument('--clean', action='store_true', help='Does not resume the previous download.')
parser.add_argument('--check', action='store_true', help='Validates the checksums of model files.')
parser.add_argument('--max-retries', type=int, default=5, help='Max retries count when get error in download time.')
parser.add_argument('--max-retries', type=int, default=7, help='Max retries count when get error in download time.')
args = parser.parse_args()
branch = args.branch