Skip to content

Commit 2589726

Browse files
Alexei Starovoitovborkmann
Alexei Starovoitov
authored andcommitted
bpf: introduce bounded loops
Allow the verifier to validate the loops by simulating their execution. Exisiting programs have used '#pragma unroll' to unroll the loops by the compiler. Instead let the verifier simulate all iterations of the loop. In order to do that introduce parentage chain of bpf_verifier_state and 'branches' counter for the number of branches left to explore. See more detailed algorithm description in bpf_verifier.h This algorithm borrows the key idea from Edward Cree approach: https://patchwork.ozlabs.org/patch/877222/ Additional state pruning heuristics make such brute force loop walk practical even for large loops. Signed-off-by: Alexei Starovoitov <[email protected]> Acked-by: Andrii Nakryiko <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]>
1 parent fb8d251 commit 2589726

File tree

2 files changed

+181
-13
lines changed

2 files changed

+181
-13
lines changed

include/linux/bpf_verifier.h

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,53 @@ struct bpf_func_state {
194194
struct bpf_verifier_state {
195195
/* call stack tracking */
196196
struct bpf_func_state *frame[MAX_CALL_FRAMES];
197+
struct bpf_verifier_state *parent;
198+
/*
199+
* 'branches' field is the number of branches left to explore:
200+
* 0 - all possible paths from this state reached bpf_exit or
201+
* were safely pruned
202+
* 1 - at least one path is being explored.
203+
* This state hasn't reached bpf_exit
204+
* 2 - at least two paths are being explored.
205+
* This state is an immediate parent of two children.
206+
* One is fallthrough branch with branches==1 and another
207+
* state is pushed into stack (to be explored later) also with
208+
* branches==1. The parent of this state has branches==1.
209+
* The verifier state tree connected via 'parent' pointer looks like:
210+
* 1
211+
* 1
212+
* 2 -> 1 (first 'if' pushed into stack)
213+
* 1
214+
* 2 -> 1 (second 'if' pushed into stack)
215+
* 1
216+
* 1
217+
* 1 bpf_exit.
218+
*
219+
* Once do_check() reaches bpf_exit, it calls update_branch_counts()
220+
* and the verifier state tree will look:
221+
* 1
222+
* 1
223+
* 2 -> 1 (first 'if' pushed into stack)
224+
* 1
225+
* 1 -> 1 (second 'if' pushed into stack)
226+
* 0
227+
* 0
228+
* 0 bpf_exit.
229+
* After pop_stack() the do_check() will resume at second 'if'.
230+
*
231+
* If is_state_visited() sees a state with branches > 0 it means
232+
* there is a loop. If such state is exactly equal to the current state
233+
* it's an infinite loop. Note states_equal() checks for states
234+
* equvalency, so two states being 'states_equal' does not mean
235+
* infinite loop. The exact comparison is provided by
236+
* states_maybe_looping() function. It's a stronger pre-check and
237+
* much faster than states_equal().
238+
*
239+
* This algorithm may not find all possible infinite loops or
240+
* loop iteration count may be too high.
241+
* In such cases BPF_COMPLEXITY_LIMIT_INSNS limit kicks in.
242+
*/
243+
u32 branches;
197244
u32 insn_idx;
198245
u32 curframe;
199246
u32 active_spin_lock;
@@ -312,7 +359,9 @@ struct bpf_verifier_env {
312359
} cfg;
313360
u32 subprog_cnt;
314361
/* number of instructions analyzed by the verifier */
315-
u32 insn_processed;
362+
u32 prev_insn_processed, insn_processed;
363+
/* number of jmps, calls, exits analyzed so far */
364+
u32 prev_jmps_processed, jmps_processed;
316365
/* total verification time */
317366
u64 verification_time;
318367
/* maximum number of verifier states kept in 'branching' instructions */

kernel/bpf/verifier.c

Lines changed: 131 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -721,6 +721,8 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
721721
dst_state->speculative = src->speculative;
722722
dst_state->curframe = src->curframe;
723723
dst_state->active_spin_lock = src->active_spin_lock;
724+
dst_state->branches = src->branches;
725+
dst_state->parent = src->parent;
724726
for (i = 0; i <= src->curframe; i++) {
725727
dst = dst_state->frame[i];
726728
if (!dst) {
@@ -736,6 +738,23 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
736738
return 0;
737739
}
738740

741+
static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
742+
{
743+
while (st) {
744+
u32 br = --st->branches;
745+
746+
/* WARN_ON(br > 1) technically makes sense here,
747+
* but see comment in push_stack(), hence:
748+
*/
749+
WARN_ONCE((int)br < 0,
750+
"BUG update_branch_counts:branches_to_explore=%d\n",
751+
br);
752+
if (br)
753+
break;
754+
st = st->parent;
755+
}
756+
}
757+
739758
static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
740759
int *insn_idx)
741760
{
@@ -789,6 +808,18 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
789808
env->stack_size);
790809
goto err;
791810
}
811+
if (elem->st.parent) {
812+
++elem->st.parent->branches;
813+
/* WARN_ON(branches > 2) technically makes sense here,
814+
* but
815+
* 1. speculative states will bump 'branches' for non-branch
816+
* instructions
817+
* 2. is_state_visited() heuristics may decide not to create
818+
* a new state for a sequence of branches and all such current
819+
* and cloned states will be pointing to a single parent state
820+
* which might have large 'branches' count.
821+
*/
822+
}
792823
return &elem->st;
793824
err:
794825
free_verifier_state(env->cur_state, true);
@@ -5682,7 +5713,8 @@ static void init_explored_state(struct bpf_verifier_env *env, int idx)
56825713
* w - next instruction
56835714
* e - edge
56845715
*/
5685-
static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
5716+
static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
5717+
bool loop_ok)
56865718
{
56875719
int *insn_stack = env->cfg.insn_stack;
56885720
int *insn_state = env->cfg.insn_state;
@@ -5712,6 +5744,8 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
57125744
insn_stack[env->cfg.cur_stack++] = w;
57135745
return 1;
57145746
} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
5747+
if (loop_ok && env->allow_ptr_leaks)
5748+
return 0;
57155749
verbose_linfo(env, t, "%d: ", t);
57165750
verbose_linfo(env, w, "%d: ", w);
57175751
verbose(env, "back-edge from insn %d to %d\n", t, w);
@@ -5763,7 +5797,7 @@ static int check_cfg(struct bpf_verifier_env *env)
57635797
if (opcode == BPF_EXIT) {
57645798
goto mark_explored;
57655799
} else if (opcode == BPF_CALL) {
5766-
ret = push_insn(t, t + 1, FALLTHROUGH, env);
5800+
ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
57675801
if (ret == 1)
57685802
goto peek_stack;
57695803
else if (ret < 0)
@@ -5772,7 +5806,8 @@ static int check_cfg(struct bpf_verifier_env *env)
57725806
init_explored_state(env, t + 1);
57735807
if (insns[t].src_reg == BPF_PSEUDO_CALL) {
57745808
init_explored_state(env, t);
5775-
ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
5809+
ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
5810+
env, false);
57765811
if (ret == 1)
57775812
goto peek_stack;
57785813
else if (ret < 0)
@@ -5785,7 +5820,7 @@ static int check_cfg(struct bpf_verifier_env *env)
57855820
}
57865821
/* unconditional jump with single edge */
57875822
ret = push_insn(t, t + insns[t].off + 1,
5788-
FALLTHROUGH, env);
5823+
FALLTHROUGH, env, true);
57895824
if (ret == 1)
57905825
goto peek_stack;
57915826
else if (ret < 0)
@@ -5798,13 +5833,13 @@ static int check_cfg(struct bpf_verifier_env *env)
57985833
} else {
57995834
/* conditional jump with two edges */
58005835
init_explored_state(env, t);
5801-
ret = push_insn(t, t + 1, FALLTHROUGH, env);
5836+
ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
58025837
if (ret == 1)
58035838
goto peek_stack;
58045839
else if (ret < 0)
58055840
goto err_free;
58065841

5807-
ret = push_insn(t, t + insns[t].off + 1, BRANCH, env);
5842+
ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
58085843
if (ret == 1)
58095844
goto peek_stack;
58105845
else if (ret < 0)
@@ -5814,7 +5849,7 @@ static int check_cfg(struct bpf_verifier_env *env)
58145849
/* all other non-branch instructions with single
58155850
* fall-through edge
58165851
*/
5817-
ret = push_insn(t, t + 1, FALLTHROUGH, env);
5852+
ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
58185853
if (ret == 1)
58195854
goto peek_stack;
58205855
else if (ret < 0)
@@ -6247,6 +6282,8 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn,
62476282

62486283
sl = *explored_state(env, insn);
62496284
while (sl) {
6285+
if (sl->state.branches)
6286+
goto next;
62506287
if (sl->state.insn_idx != insn ||
62516288
sl->state.curframe != cur->curframe)
62526289
goto next;
@@ -6611,19 +6648,51 @@ static int propagate_liveness(struct bpf_verifier_env *env,
66116648
return 0;
66126649
}
66136650

6651+
static bool states_maybe_looping(struct bpf_verifier_state *old,
6652+
struct bpf_verifier_state *cur)
6653+
{
6654+
struct bpf_func_state *fold, *fcur;
6655+
int i, fr = cur->curframe;
6656+
6657+
if (old->curframe != fr)
6658+
return false;
6659+
6660+
fold = old->frame[fr];
6661+
fcur = cur->frame[fr];
6662+
for (i = 0; i < MAX_BPF_REG; i++)
6663+
if (memcmp(&fold->regs[i], &fcur->regs[i],
6664+
offsetof(struct bpf_reg_state, parent)))
6665+
return false;
6666+
return true;
6667+
}
6668+
6669+
66146670
static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
66156671
{
66166672
struct bpf_verifier_state_list *new_sl;
66176673
struct bpf_verifier_state_list *sl, **pprev;
66186674
struct bpf_verifier_state *cur = env->cur_state, *new;
66196675
int i, j, err, states_cnt = 0;
6676+
bool add_new_state = false;
66206677

66216678
if (!env->insn_aux_data[insn_idx].prune_point)
66226679
/* this 'insn_idx' instruction wasn't marked, so we will not
66236680
* be doing state search here
66246681
*/
66256682
return 0;
66266683

6684+
/* bpf progs typically have pruning point every 4 instructions
6685+
* http://vger.kernel.org/bpfconf2019.html#session-1
6686+
* Do not add new state for future pruning if the verifier hasn't seen
6687+
* at least 2 jumps and at least 8 instructions.
6688+
* This heuristics helps decrease 'total_states' and 'peak_states' metric.
6689+
* In tests that amounts to up to 50% reduction into total verifier
6690+
* memory consumption and 20% verifier time speedup.
6691+
*/
6692+
if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
6693+
env->insn_processed - env->prev_insn_processed >= 8)
6694+
add_new_state = true;
6695+
66276696
pprev = explored_state(env, insn_idx);
66286697
sl = *pprev;
66296698

@@ -6633,6 +6702,30 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
66336702
states_cnt++;
66346703
if (sl->state.insn_idx != insn_idx)
66356704
goto next;
6705+
if (sl->state.branches) {
6706+
if (states_maybe_looping(&sl->state, cur) &&
6707+
states_equal(env, &sl->state, cur)) {
6708+
verbose_linfo(env, insn_idx, "; ");
6709+
verbose(env, "infinite loop detected at insn %d\n", insn_idx);
6710+
return -EINVAL;
6711+
}
6712+
/* if the verifier is processing a loop, avoid adding new state
6713+
* too often, since different loop iterations have distinct
6714+
* states and may not help future pruning.
6715+
* This threshold shouldn't be too low to make sure that
6716+
* a loop with large bound will be rejected quickly.
6717+
* The most abusive loop will be:
6718+
* r1 += 1
6719+
* if r1 < 1000000 goto pc-2
6720+
* 1M insn_procssed limit / 100 == 10k peak states.
6721+
* This threshold shouldn't be too high either, since states
6722+
* at the end of the loop are likely to be useful in pruning.
6723+
*/
6724+
if (env->jmps_processed - env->prev_jmps_processed < 20 &&
6725+
env->insn_processed - env->prev_insn_processed < 100)
6726+
add_new_state = false;
6727+
goto miss;
6728+
}
66366729
if (states_equal(env, &sl->state, cur)) {
66376730
sl->hit_cnt++;
66386731
/* reached equivalent register/stack state,
@@ -6650,7 +6743,15 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
66506743
return err;
66516744
return 1;
66526745
}
6653-
sl->miss_cnt++;
6746+
miss:
6747+
/* when new state is not going to be added do not increase miss count.
6748+
* Otherwise several loop iterations will remove the state
6749+
* recorded earlier. The goal of these heuristics is to have
6750+
* states from some iterations of the loop (some in the beginning
6751+
* and some at the end) to help pruning.
6752+
*/
6753+
if (add_new_state)
6754+
sl->miss_cnt++;
66546755
/* heuristic to determine whether this state is beneficial
66556756
* to keep checking from state equivalence point of view.
66566757
* Higher numbers increase max_states_per_insn and verification time,
@@ -6662,6 +6763,11 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
66626763
*/
66636764
*pprev = sl->next;
66646765
if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
6766+
u32 br = sl->state.branches;
6767+
6768+
WARN_ONCE(br,
6769+
"BUG live_done but branches_to_explore %d\n",
6770+
br);
66656771
free_verifier_state(&sl->state, false);
66666772
kfree(sl);
66676773
env->peak_states--;
@@ -6687,18 +6793,25 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
66876793
if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
66886794
return 0;
66896795

6690-
/* there were no equivalent states, remember current one.
6691-
* technically the current state is not proven to be safe yet,
6796+
if (!add_new_state)
6797+
return 0;
6798+
6799+
/* There were no equivalent states, remember the current one.
6800+
* Technically the current state is not proven to be safe yet,
66926801
* but it will either reach outer most bpf_exit (which means it's safe)
6693-
* or it will be rejected. Since there are no loops, we won't be
6802+
* or it will be rejected. When there are no loops the verifier won't be
66946803
* seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
6695-
* again on the way to bpf_exit
6804+
* again on the way to bpf_exit.
6805+
* When looping the sl->state.branches will be > 0 and this state
6806+
* will not be considered for equivalence until branches == 0.
66966807
*/
66976808
new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
66986809
if (!new_sl)
66996810
return -ENOMEM;
67006811
env->total_states++;
67016812
env->peak_states++;
6813+
env->prev_jmps_processed = env->jmps_processed;
6814+
env->prev_insn_processed = env->insn_processed;
67026815

67036816
/* add new state to the head of linked list */
67046817
new = &new_sl->state;
@@ -6709,6 +6822,9 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
67096822
return err;
67106823
}
67116824
new->insn_idx = insn_idx;
6825+
WARN_ONCE(new->branches != 1,
6826+
"BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
6827+
cur->parent = new;
67126828
new_sl->next = *explored_state(env, insn_idx);
67136829
*explored_state(env, insn_idx) = new_sl;
67146830
/* connect new state to parentage chain. Current frame needs all
@@ -6795,6 +6911,7 @@ static int do_check(struct bpf_verifier_env *env)
67956911
return -ENOMEM;
67966912
state->curframe = 0;
67976913
state->speculative = false;
6914+
state->branches = 1;
67986915
state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
67996916
if (!state->frame[0]) {
68006917
kfree(state);
@@ -7001,6 +7118,7 @@ static int do_check(struct bpf_verifier_env *env)
70017118
} else if (class == BPF_JMP || class == BPF_JMP32) {
70027119
u8 opcode = BPF_OP(insn->code);
70037120

7121+
env->jmps_processed++;
70047122
if (opcode == BPF_CALL) {
70057123
if (BPF_SRC(insn->code) != BPF_K ||
70067124
insn->off != 0 ||
@@ -7086,6 +7204,7 @@ static int do_check(struct bpf_verifier_env *env)
70867204
if (err)
70877205
return err;
70887206
process_bpf_exit:
7207+
update_branch_counts(env, env->cur_state);
70897208
err = pop_stack(env, &env->prev_insn_idx,
70907209
&env->insn_idx);
70917210
if (err < 0) {

0 commit comments

Comments
 (0)