Skip to content

Commit 66a54bf

Browse files
committed
tmp
1 parent b4ad03b commit 66a54bf

File tree

1 file changed

+6
-1
lines changed

1 file changed

+6
-1
lines changed

llama.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3142,6 +3142,7 @@ static struct ggml_cgraph * llm_build_llama(
31423142
if (batch.token) {
31433143
struct ggml_tensor * inp_tokens = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_tokens);
31443144
ggml_set_name(inp_tokens, "inp_tokens");
3145+
ggml_allocr_alloc(lctx.alloc, inp_tokens);
31453146

31463147
inpL = ggml_get_rows(ctx0, model.tok_embeddings, inp_tokens);
31473148
} else {
@@ -3156,19 +3157,23 @@ static struct ggml_cgraph * llm_build_llama(
31563157
// KQ_scale
31573158
struct ggml_tensor * KQ_scale = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, 1);
31583159
ggml_set_name(KQ_scale, "KQ_scale");
3160+
ggml_allocr_alloc(lctx.alloc, KQ_scale);
31593161

31603162
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
31613163
struct ggml_tensor * KQ_mask = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_kv, n_tokens, 1);
31623164
ggml_set_name(KQ_mask, "KQ_mask");
3165+
ggml_allocr_alloc(lctx.alloc, KQ_mask);
31633166

31643167
// KQ_pos - contains the positions
31653168
struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_tokens);
31663169
ggml_set_name(KQ_pos, "KQ_pos");
3170+
ggml_allocr_alloc(lctx.alloc, KQ_pos);
31673171

31683172
// shift the entire K-cache if needed
31693173
if (do_rope_shift) {
31703174
struct ggml_tensor * K_shift = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_ctx);
31713175
ggml_set_name(K_shift, "K_shift");
3176+
ggml_allocr_alloc(lctx.alloc, K_shift);
31723177

31733178
for (int il = 0; il < n_layer; ++il) {
31743179
struct ggml_tensor * tmp =
@@ -5523,7 +5528,7 @@ static struct ggml_cgraph * llama_build_graph(
55235528
}
55245529

55255530
// allocate memory and set the values for the input tensors of the graph
5526-
llama_build_graph_input(lctx, batch, result);
5531+
//llama_build_graph_input(lctx, batch, result);
55275532

55285533
//auto t_start = std::chrono::high_resolution_clock::now();
55295534

0 commit comments

Comments
 (0)