mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-01-09 12:09:04 +01:00
Added UnicodeDecodeError workaround for modules/llamacpp_model.py (#6040)
--------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com>
This commit is contained in:
parent
91a8a87887
commit
b9e2ded6d4
@ -122,7 +122,14 @@ class LlamaCppModel:
|
||||
return self.model.tokenize(string)
|
||||
|
||||
def decode(self, ids, **kwargs):
|
||||
return self.model.detokenize(ids).decode('utf-8')
|
||||
detokenized = self.model.detokenize(ids)
|
||||
try:
|
||||
# Attempt strict UTF-8 decoding first
|
||||
return detokenized.decode('utf-8', 'strict')
|
||||
except UnicodeDecodeError as e:
|
||||
# Log the error and fall back to UTF-8 with replacement
|
||||
logger.warning(f"Invalid UTF-8 in detokenized output. Using replacement characters.\n{e}")
|
||||
return detokenized.decode('utf-8', 'replace')
|
||||
|
||||
def get_logits(self, tokens):
|
||||
self.model.reset()
|
||||
|
Loading…
Reference in New Issue
Block a user