Skip to content

Commit c14ee72

Browse files
committed
context : encode() clears embd_seq
ggml-ci
1 parent a21ff6c commit c14ee72

File tree

2 files changed

+8
-6
lines changed

2 files changed

+8
-6
lines changed

src/llama-context.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,8 @@ int llama_context::encode(llama_batch & inp_batch) {
702702
t_compute_start_us = ggml_time_us();
703703
}
704704

705+
embd_seq.clear();
706+
705707
n_queued_tokens += n_tokens;
706708

707709
const int64_t n_embd = hparams.n_embd;
@@ -842,13 +844,13 @@ int llama_context::encode(llama_batch & inp_batch) {
842844
}
843845

844846
int llama_context::decode(llama_batch & inp_batch) {
845-
if (inp_batch.n_tokens == 0) {
846-
LLAMA_LOG_ERROR("%s: n_tokens == 0\n", __func__);
847-
return -1;
848-
}
849-
850847
if (!memory) {
851848
LLAMA_LOG_WARN("%s: cannot decode batches with this context\n", __func__);
849+
return encode(inp_batch);
850+
}
851+
852+
if (inp_batch.n_tokens == 0) {
853+
LLAMA_LOG_ERROR("%s: n_tokens == 0\n", __func__);
852854
return -1;
853855
}
854856

tools/server/server.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3941,7 +3941,7 @@ int main(int argc, char ** argv) {
39413941
const auto handle_completions_impl = [&ctx_server, &res_error, &res_ok](
39423942
server_task_type type,
39433943
json & data,
3944-
std::function<bool()> is_connection_closed,
3944+
const std::function<bool()> & is_connection_closed,
39453945
httplib::Response & res,
39463946
oaicompat_type oaicompat) {
39473947
GGML_ASSERT(type == SERVER_TASK_TYPE_COMPLETION || type == SERVER_TASK_TYPE_INFILL);

0 commit comments

Comments
 (0)