Skip to content

Commit dcccbe3

Browse files
committed
Force measure to allocate more memory for 70Bs
1 parent 1410813 commit dcccbe3

File tree

1 file changed

+5
-8
lines changed

1 file changed

+5
-8
lines changed

llama.cpp

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3248,14 +3248,11 @@ static struct ggml_cgraph * llm_build_llama(
32483248
} else {
32493249
run_layer = NULL;
32503250
}
3251-
} else if (ggml_allocr_is_measure(lctx.alloc) && il == n_layer - 1) {
3252-
// No idea why this is needed, but otherwise we run out of space
3253-
// when skipping attn or mlp (but not both) on the last layer
3254-
run_mlp = false;
3255-
} else if (ggml_allocr_is_measure(lctx.alloc) && il == n_layer - 2) {
3256-
// No idea why this is needed, but otherwise we run out of space
3257-
// when skipping attn or mlp (but not both) on the last layer
3258-
run_attn = false;
3251+
} else if (ggml_allocr_is_measure(lctx.alloc)) {
3252+
if (il == 0 || il == n_layer - 1) run_mlp = false;
3253+
else if (il == 1 || il == n_layer - 2) run_attn = false;
3254+
else if (il & 1) run_mlp = false;
3255+
else run_attn = false;
32593256
}
32603257
if (!run_attn && !run_mlp) continue;
32613258

0 commit comments

Comments
 (0)