Skip to content

Commit 79d7024

Browse files
ikawrakowKawrakow
authored andcommitted
Fix HellaSwag (ggml-org#2805)
Co-authored-by: Iwan Kawrakow <[email protected]>
1 parent f090d8d commit 79d7024

File tree

1 file changed

+17
-3
lines changed

1 file changed

+17
-3
lines changed

examples/perplexity/perplexity.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,7 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
351351
fprintf(stderr, "%s : loaded %zu tasks from prompt.\n", __func__, hs_task_count);
352352

353353
const bool is_spm = llama_vocab_type(ctx) == LLAMA_VOCAB_TYPE_SPM;
354+
fprintf(stderr, "================================= is_spm = %d\n", is_spm);
354355

355356
// This is needed as usual for LLaMA models
356357
const bool add_bos = is_spm;
@@ -406,18 +407,30 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
406407
double acc = 0.0f;
407408
const int n_vocab = llama_n_vocab(ctx);
408409

410+
std::vector<std::vector<int>> ending_tokens(4);
411+
409412
std::vector<float> tok_logits(n_vocab);
410413

411414
for (size_t task_idx = 0; task_idx < hs_task_count; task_idx++) {
412415
// Tokenize the context to count tokens
413416
std::vector<int> context_embd = ::llama_tokenize(ctx, hs_data[task_idx].context, add_bos);
414417
size_t context_size = context_embd.size();
415418

419+
for (int i = 0; i < 4; ++i) {
420+
ending_tokens[i] = ::llama_tokenize(ctx, hs_data[task_idx].context + hs_data[task_idx].ending[i], add_bos);
421+
for (int k = 0; k < int(context_size); ++k) {
422+
if (ending_tokens[i][k] != context_embd[k]) {
423+
fprintf(stderr, "Oops: ending %d of task %d differs from context at position %d\n",i,int(task_idx),k);
424+
break;
425+
}
426+
}
427+
}
428+
416429
// Do the 1st ending
417430
// In this case we include the context when evaluating
418-
auto query_embd = ::llama_tokenize(ctx, hs_data[task_idx].context + hs_data[task_idx].ending[0], add_bos);
431+
//auto query_embd = ::llama_tokenize(ctx, hs_data[task_idx].context + hs_data[task_idx].ending[0], add_bos);
432+
auto query_embd = ending_tokens[0];
419433
auto query_size = query_embd.size();
420-
//printf("First query: %d\n",(int)query_size);
421434

422435
// Stop if query wont fit the ctx window
423436
if (query_size > (size_t)params.n_ctx) {
@@ -462,7 +475,8 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
462475
for (size_t ending_idx = 1; ending_idx < 4; ending_idx++) {
463476

464477
// Tokenize the query
465-
query_embd = ::llama_tokenize(ctx, hs_data[task_idx].ending[ending_idx], false);
478+
query_embd.resize(ending_tokens[ending_idx].size() - context_size);
479+
std::memcpy(query_embd.data(), ending_tokens[ending_idx].data() + context_size, query_embd.size()*sizeof(int));
466480
query_size = query_embd.size();
467481

468482
// Stop if query wont fit the ctx window

0 commit comments

Comments
 (0)