Add proper warning message for decode failure

This commit is contained in:
nclok1405 2024-05-28 00:54:54 +09:00
parent f5ca611569
commit 47c774e8a3

View File

@ -118,12 +118,12 @@ class LlamaCppModel:
s = "" s = ""
try: try:
s = detokenized.decode('utf-8', 'strict') s = detokenized.decode('utf-8', 'strict')
except UnicodeDecodeError: except UnicodeDecodeError as e1:
try: try:
logger.warning("Detokenized result can't be decoded in utf-8 charset. Fallback to latin.") logger.warning(f"Detokenized result can't be decoded in utf-8 charset. Fallback to latin.\n {e1}")
s = detokenized.decode('latin', 'strict') s = detokenized.decode('latin', 'strict')
except UnicodeDecodeError as e: except UnicodeDecodeError as e2:
logger.warning("Detokenized result can't be decoded in latin charset. Will attempt to decode as utf-8 with invalid characters replaced with '?' (U+FFFD).") logger.warning(f"Detokenized result can't be decoded in latin charset. Will attempt to decode as utf-8 with invalid characters replaced with '?' (U+FFFD).\n{e2}")
s = detokenized.decode('utf-8', 'replace') s = detokenized.decode('utf-8', 'replace')
return s return s