File tree Expand file tree Collapse file tree 2 files changed +8
-6
lines changed Expand file tree Collapse file tree 2 files changed +8
-6
lines changed Original file line number Diff line number Diff line change @@ -702,6 +702,8 @@ int llama_context::encode(llama_batch & inp_batch) {
702
702
t_compute_start_us = ggml_time_us ();
703
703
}
704
704
705
+ embd_seq.clear ();
706
+
705
707
n_queued_tokens += n_tokens;
706
708
707
709
const int64_t n_embd = hparams.n_embd ;
@@ -842,13 +844,13 @@ int llama_context::encode(llama_batch & inp_batch) {
842
844
}
843
845
844
846
int llama_context::decode (llama_batch & inp_batch) {
845
- if (inp_batch.n_tokens == 0 ) {
846
- LLAMA_LOG_ERROR (" %s: n_tokens == 0\n " , __func__);
847
- return -1 ;
848
- }
849
-
850
847
if (!memory) {
851
848
LLAMA_LOG_WARN (" %s: cannot decode batches with this context\n " , __func__);
849
+ return encode (inp_batch);
850
+ }
851
+
852
+ if (inp_batch.n_tokens == 0 ) {
853
+ LLAMA_LOG_ERROR (" %s: n_tokens == 0\n " , __func__);
852
854
return -1 ;
853
855
}
854
856
Original file line number Diff line number Diff line change @@ -3941,7 +3941,7 @@ int main(int argc, char ** argv) {
3941
3941
const auto handle_completions_impl = [&ctx_server, &res_error, &res_ok](
3942
3942
server_task_type type,
3943
3943
json & data,
3944
- std::function<bool ()> is_connection_closed,
3944
+ const std::function<bool ()> & is_connection_closed,
3945
3945
httplib::Response & res,
3946
3946
oaicompat_type oaicompat) {
3947
3947
GGML_ASSERT (type == SERVER_TASK_TYPE_COMPLETION || type == SERVER_TASK_TYPE_INFILL);
You can’t perform that action at this time.
0 commit comments