Attempt at evaluating falcon perplexity (failed)

This commit is contained in:
oobabooga 2023-05-29 13:28:25 -03:00
parent 204731952a
commit 983eef1e29

View File

@ -82,7 +82,12 @@ def calculate_perplexity(models, input_dataset, stride, _max_length):
yield cumulative_log + "Tokenizing the input dataset...\n\n" yield cumulative_log + "Tokenizing the input dataset...\n\n"
encodings = encode(text, add_special_tokens=False) encodings = encode(text, add_special_tokens=False)
seq_len = encodings.shape[1] seq_len = encodings.shape[1]
max_length = _max_length or shared.model.config.max_position_embeddings if not _max_length:
if hasattr(shared.model.config, 'max_position_embeddings'):
max_length = shared.model.config.max_position_embeddings
else:
max_length = 2048
nlls = [] nlls = []
prev_end_loc = 0 prev_end_loc = 0
for begin_loc in tqdm(range(0, seq_len, stride)): for begin_loc in tqdm(range(0, seq_len, stride)):