common : apply ignore_eos as logit bias

ggerganov · ggerganov · commit 869ec41e7821 · 2024-12-12T21:22:33.000+02:00
ggml-ci
diff --git a/common/common.cpp b/common/common.cpp
@@ -940,6 +940,11 @@ struct common_init_result common_init_from_params(common_params & params) {
         params.sampling.ignore_eos = false;
     }
 
+    if (params.sampling.ignore_eos) {
+        LOG_INF("%s: added EOS logit bias = %f\n", __func__, -INFINITY);
+        params.sampling.logit_bias.push_back({llama_token_eos(model), -INFINITY});
+    }
+
     if (params.warmup) {
         LOG_WRN("%s: warming up the model with an empty run - please wait ... (--no-warmup to disable)\n", __func__);
 
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -1467,7 +1467,7 @@ struct server_context {
         n_ctx = llama_n_ctx(ctx);
 
         add_bos_token = llama_add_bos_token(model);
-        has_eos_token = !llama_add_eos_token(model);
+        has_eos_token = llama_token_eos(model) != LLAMA_TOKEN_NULL;
 
         if (!params_base.speculative.model.empty()) {
             SRV_INF("loading draft model '%s'\n", params_base.speculative.model.c_str());