import gc import traceback from queue import Queue from threading import Thread import torch import transformers import modules.shared as shared class _SentinelTokenStoppingCriteria(transformers.StoppingCriteria): def __init__(self, sentinel_token_ids: list, starting_idx: int): transformers.StoppingCriteria.__init__(self) self.sentinel_token_ids = sentinel_token_ids self.starting_idx = starting_idx self.shortest = min([x.shape[-1] for x in sentinel_token_ids]) def __call__(self, input_ids: torch.LongTensor, _scores: torch.FloatTensor) -> bool: for sample in input_ids: trimmed_sample = sample[self.starting_idx:] trimmed_len = trimmed_sample.shape[-1] if trimmed_len < self.shortest: continue for sentinel in self.sentinel_token_ids: sentinel_len = sentinel.shape[-1] if trimmed_len < sentinel_len: continue window = trimmed_sample[-sentinel_len:] if torch.all(torch.eq(sentinel, window)): return True return False class Stream(transformers.StoppingCriteria): def __init__(self, callback_func=None): self.callback_func = callback_func def __call__(self, input_ids, scores) -> bool: if self.callback_func is not None: self.callback_func(input_ids[0]) return False class Iteratorize: """ Transforms a function that takes a callback into a lazy iterator (generator). Adapted from: https://stackoverflow.com/a/9969000 """ def __init__(self, func, kwargs=None, callback=None): self.mfunc = func self.c_callback = callback self.q = Queue() self.sentinel = object() self.kwargs = kwargs or {} self.stop_now = False def _callback(val): if self.stop_now or shared.stop_everything: raise ValueError self.q.put(val) def gentask(): try: ret = self.mfunc(callback=_callback, **self.kwargs) except ValueError: pass except: traceback.print_exc() pass clear_torch_cache() self.q.put(self.sentinel) if self.c_callback: self.c_callback(ret) self.thread = Thread(target=gentask) self.thread.start() def __iter__(self): return self def __next__(self): obj = self.q.get(True, None) if obj is self.sentinel: raise StopIteration else: return obj def __del__(self): clear_torch_cache() def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.stop_now = True clear_torch_cache() def clear_torch_cache(): gc.collect() if not shared.args.cpu: torch.cuda.empty_cache()