Description
Our continuous integration started seeing the following warning starting with Linux 6.9-rc1 and LLVM tip of tree (19.0.0):
$ make -skj"$(nproc)" ARCH=loongarch LLVM=1 allmodconfig drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.o
drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c:1526:12: error: stack frame size (2400) exceeds limit (2048) in 'vdec_vp9_slice_update_prob' [-Werror,-Wframe-larger-than]
1526 | static int vdec_vp9_slice_update_prob(struct vdec_vp9_slice_instance *instance,
| ^
1 error generated.
The Linux commit that "causes" this is 918327e9b7ff ("ubsan: Remove CONFIG_UBSAN_SANITIZE_ALL"), which explains one of the factors for triggering this (it is specifically -fsanitize=array-bounds
). cvise
helped deduce that this is also exacerbated by -mstrict-align
. I decided to bisect LLVM for why this is not visible with LLVM 18 and landed on llvm/llvm-project@90ba330 (that commit just keeps on giving :/), which somewhat makes sense I suppose.
For a "trivial" reproducer, cvise
spits out:
enum { false, true } __read_overflow2_field(long, long);
struct v4l2_vp9_frame_context {
char coef[4][2][2][6][6][3];
char inter_mode[7][3][2];
char partition[16][3];
};
struct mtk_vcodec_mem {
void *va;
};
struct vdec_vp9_slice_frame_ctx {
struct {
char probs[6][3];
} coef_probs[4][2][2][16];
char partition_prob[16][4];
char inter_mode_probs[][4];
} *vdec_vp9_slice_framectx_map_helper_frame_ctx;
struct {
char intra_only;
} *vdec_vp9_slice_update_prob_vsi;
struct {
struct mtk_vcodec_mem prob;
struct mtk_vcodec_mem counts;
struct vdec_vp9_slice_frame_ctx frame_ctx[4];
struct v4l2_vp9_frame_context frame_ctx_helper;
char dirty[4];
int counts_helper;
} *vdec_vp9_slice_update_prob_instance;
long vdec_vp9_slice_framectx_map_helper___p_size_field;
_Bool vdec_vp9_slice_framectx_map_helper___trans_tmp_2,
vdec_vp9_slice_helper_map_framectx___trans_tmp_9;
char vdec_vp9_slice_update_prob_uh_0;
int vdec_vp9_slice_update_prob_uh_4;
extern inline __attribute__((__gnu_inline__)) _Bool
fortify_memcpy_chk(unsigned q_size_field) {
long size = 0;
if (q_size_field)
__read_overflow2_field(q_size_field, size);
return 0;
}
static void vdec_vp9_slice_framectx_map_helper(
struct v4l2_vp9_frame_context *frame_ctx_helper) {
_Bool __trans_tmp_1,
__ret_do_once = vdec_vp9_slice_framectx_map_helper___trans_tmp_2;
int i;
for (i = 0; i < sizeof(frame_ctx_helper); i++) {
long __fortify_size = sizeof(frame_ctx_helper), size = __fortify_size;
int p_size_field = vdec_vp9_slice_framectx_map_helper___p_size_field,
q_size_field = __read_overflow2_field(p_size_field, size);
if (__builtin_constant_p(p_size_field))
__read_overflow2_field(q_size_field, size);
__trans_tmp_1 = 0;
_Bool __ret_do_once = __trans_tmp_1, __ret_cond = __ret_do_once;
static _Bool __already_done;
if (__builtin_expect(__ret_cond && __already_done, 0))
;
__builtin_memcpy(
frame_ctx_helper->inter_mode[i],
vdec_vp9_slice_framectx_map_helper_frame_ctx->inter_mode_probs[i],
__fortify_size);
}
long __fortify_size = sizeof(frame_ctx_helper);
for (i = 0; i; i++) {
static _Bool __already_done;
_Bool __ret_cond = __ret_do_once;
if (__builtin_expect(__ret_cond && __already_done, 0)) {
}
}
for (; i < sizeof(frame_ctx_helper->partition) /
sizeof(frame_ctx_helper->partition)[0];
i++)
__builtin_memcpy(
frame_ctx_helper->partition[i],
vdec_vp9_slice_framectx_map_helper_frame_ctx->partition_prob[i],
__fortify_size);
}
static void vdec_vp9_slice_helper_map_framectx(
struct v4l2_vp9_frame_context *frame_ctx_helper,
struct vdec_vp9_slice_frame_ctx *frame_ctx) {
int i, j, k;
for (i = 0;
i < sizeof(frame_ctx_helper->coef) / sizeof(frame_ctx_helper->coef)[0];
i++)
for (j = 0; j < sizeof(frame_ctx_helper); j++)
for (k = 0; k < sizeof(frame_ctx_helper->coef[0][0]) /
sizeof(frame_ctx_helper->coef[0][0])[0];
k++) {
int __trans_tmp_3 = i, __trans_tmp_4 = j, __trans_tmp_5 = k, l, m;
struct vdec_vp9_slice_frame_ctx *__trans_tmp_6 = frame_ctx;
struct v4l2_vp9_frame_context *__trans_tmp_7 = frame_ctx_helper;
frame_ctx = __trans_tmp_6;
frame_ctx_helper = __trans_tmp_7;
for (l = 0; l < sizeof(frame_ctx_helper->coef[0][0][0]) /
sizeof(frame_ctx_helper->coef[0][0][0])[0];
l++)
for (m = 0; m < l; m++) {
long __fortify_size = sizeof(frame_ctx_helper),
__q_size_field =
__builtin_dynamic_object_size(frame_ctx_helper, 1);
_Bool __ret_do_once = fortify_memcpy_chk(__q_size_field),
__ret_cond = __ret_do_once;
static _Bool __already_done;
if (__builtin_expect(__ret_cond && __already_done, 0))
;
__builtin_memcpy(
frame_ctx
->coef_probs[__trans_tmp_3][__trans_tmp_4][__trans_tmp_5][l]
.probs[m],
frame_ctx_helper
->coef[__trans_tmp_3][__trans_tmp_4][__trans_tmp_5][l][m],
__fortify_size);
}
}
for (i = 0; i < sizeof(frame_ctx_helper); i++) {
long __fortify_size = sizeof(frame_ctx_helper), size = __fortify_size;
int p_size_field = 0,
q_size_field = __read_overflow2_field(p_size_field, size);
__read_overflow2_field(q_size_field, size);
__builtin_memcpy(frame_ctx->inter_mode_probs[i],
frame_ctx_helper->inter_mode[i], __fortify_size);
}
long __fortify_size = sizeof(frame_ctx_helper);
_Bool __ret_do_once = vdec_vp9_slice_helper_map_framectx___trans_tmp_9;
for (i = 0; i < sizeof(frame_ctx_helper->partition) /
sizeof(frame_ctx_helper->partition)[0];
i++) {
long __q_size_field = __builtin_dynamic_object_size(frame_ctx_helper, 1);
fortify_memcpy_chk(__q_size_field);
static _Bool __already_done;
_Bool __ret_cond = __ret_do_once;
if (__builtin_expect(__ret_cond && __already_done, 0))
;
__builtin_memcpy(frame_ctx->partition_prob[i],
frame_ctx_helper->partition[i], __fortify_size);
}
}
int vdec_vp9_slice_update_prob() {
struct vdec_vp9_slice_frame_ctx *pre_frame_ctx;
struct v4l2_vp9_frame_context *pre_frame_ctx_helper;
_Bool frame_is_intra;
pre_frame_ctx = &vdec_vp9_slice_update_prob_instance
->frame_ctx[vdec_vp9_slice_update_prob_uh_4];
pre_frame_ctx_helper = &vdec_vp9_slice_update_prob_instance->frame_ctx_helper;
frame_is_intra = vdec_vp9_slice_update_prob_uh_0 ||
vdec_vp9_slice_update_prob_vsi->intra_only;
if (vdec_vp9_slice_update_prob_instance
->dirty[vdec_vp9_slice_update_prob_uh_4])
vdec_vp9_slice_framectx_map_helper(pre_frame_ctx_helper);
else
vdec_vp9_slice_framectx_map_helper(pre_frame_ctx_helper);
if (frame_is_intra)
vdec_vp9_slice_helper_map_framectx(pre_frame_ctx_helper, pre_frame_ctx);
{
_Bool __ret_do_once = fortify_memcpy_chk(0);
if (({
static _Bool __already_done;
_Bool __ret_cond = __ret_do_once;
_Bool __ret_once = false;
if (__builtin_expect(__ret_cond && !__already_done, 0))
__ret_once = true;
__builtin_expect(__ret_once, 0);
}))
asm("");
}
return 0;
}
which I think confirms this is also related to the fortified string routines that are enabled with CONFIG_FORTIFY_SOURCE
, since they show up in the reproducer.
With GCC 13.2.0, there is a very small different with or without -mstrict-align
:
$ loongarch64-linux-gcc -O2 -Wall -Wframe-larger-than=1 -c -o /dev/null vdec_vp9_req_lat_if.i
vdec_vp9_req_lat_if.i: In function 'vdec_vp9_slice_update_prob':
vdec_vp9_req_lat_if.i:165:1: warning: the frame size of 64 bytes is larger than 1 bytes [-Wframe-larger-than=]
165 | }
| ^
$ loongarch64-linux-gcc -O2 -Wall -Wframe-larger-than=64 -c -o /dev/null vdec_vp9_req_lat_if.i -mstrict-align
vdec_vp9_req_lat_if.i: In function 'vdec_vp9_slice_update_prob':
vdec_vp9_req_lat_if.i:165:1: warning: the frame size of 80 bytes is larger than 64 bytes [-Wframe-larger-than=]
165 | }
| ^
GCC does not have -fsanitize=array-bounds
but there is no difference with -fsanitize=bounds
, which is a close approximation.
$ loongarch64-linux-gcc -O2 -Wall -Wframe-larger-than=64 -c -o /dev/null vdec_vp9_req_lat_if.i -fsanitize=bounds
vdec_vp9_req_lat_if.i: In function 'vdec_vp9_slice_update_prob':
vdec_vp9_req_lat_if.i:165:1: warning: the frame size of 80 bytes is larger than 64 bytes [-Wframe-larger-than=]
165 | }
| ^
$ loongarch64-linux-gcc -O2 -Wall -Wframe-larger-than=64 -c -o /dev/null vdec_vp9_req_lat_if.i -fsanitize=bounds -mstrict-align
vdec_vp9_req_lat_if.i: In function 'vdec_vp9_slice_update_prob':
vdec_vp9_req_lat_if.i:165:1: warning: the frame size of 80 bytes is larger than 64 bytes [-Wframe-larger-than=]
165 | }
| ^
With LLVM @ llvm/llvm-project@98509c7 (the direct parent of the blamed LLVM commit), there is a slight increase in frame size when adding -mstrict-align
$ clang --target=loongarch64-linux-gnusf -O2 -Wall -Wframe-larger-than=64 -c -o /dev/null vdec_vp9_req_lat_if.i
vdec_vp9_req_lat_if.i:136:5: warning: stack frame size (192) exceeds limit (64) in 'vdec_vp9_slice_update_prob' [-Wframe-larger-than]
136 | int vdec_vp9_slice_update_prob() {
| ^
1 warning generated.
$ clang --target=loongarch64-linux-gnusf -O2 -Wall -Wframe-larger-than=64 -c -o /dev/null vdec_vp9_req_lat_if.i -mstrict-align
vdec_vp9_req_lat_if.i:136:5: warning: stack frame size (224) exceeds limit (64) in 'vdec_vp9_slice_update_prob' [-Wframe-larger-than]
136 | int vdec_vp9_slice_update_prob() {
| ^
vdec_vp9_req_lat_if.i:40:13: warning: stack frame size (96) exceeds limit (64) in 'vdec_vp9_slice_framectx_map_helper' [-Wframe-larger-than]
40 | static void vdec_vp9_slice_framectx_map_helper(
| ^
2 warnings generated.
but there is an even larger difference once -fsanitize=array-bounds
is added to the mix.
$ clang --target=loongarch64-linux-gnusf -O2 -Wall -Wframe-larger-than=64 -c -o /dev/null vdec_vp9_req_lat_if.i -fsanitize=array-bounds
vdec_vp9_req_lat_if.i:136:5: warning: stack frame size (608) exceeds limit (64) in 'vdec_vp9_slice_update_prob' [-Wframe-larger-than]
136 | int vdec_vp9_slice_update_prob() {
| ^
1 warning generated.
$ clang --target=loongarch64-linux-gnusf -O2 -Wall -Wframe-larger-than=64 -c -o /dev/null vdec_vp9_req_lat_if.i -fsanitize=array-bounds -mstrict-align
vdec_vp9_req_lat_if.i:136:5: warning: stack frame size (1536) exceeds limit (64) in 'vdec_vp9_slice_update_prob' [-Wframe-larger-than]
136 | int vdec_vp9_slice_update_prob() {
| ^
1 warning generated.
llvm/llvm-project@90ba330 does not really change much without -fsanitize=array-bounds
(it actually improves the -mno-strict-align
case)
$ clang --target=loongarch64-linux-gnusf -O2 -Wall -Wframe-larger-than=64 -c -o /dev/null vdec_vp9_req_lat_if.i
vdec_vp9_req_lat_if.i:136:5: warning: stack frame size (176) exceeds limit (64) in 'vdec_vp9_slice_update_prob' [-Wframe-larger-than]
136 | int vdec_vp9_slice_update_prob() {
| ^
1 warning generated.
$ clang --target=loongarch64-linux-gnusf -O2 -Wall -Wframe-larger-than=64 -c -o /dev/null vdec_vp9_req_lat_if.i -mstrict-align
vdec_vp9_req_lat_if.i:136:5: warning: stack frame size (224) exceeds limit (64) in 'vdec_vp9_slice_update_prob' [-Wframe-larger-than]
136 | int vdec_vp9_slice_update_prob() {
| ^
vdec_vp9_req_lat_if.i:40:13: warning: stack frame size (96) exceeds limit (64) in 'vdec_vp9_slice_framectx_map_helper' [-Wframe-larger-than]
40 | static void vdec_vp9_slice_framectx_map_helper(
| ^
2 warnings generated.
but the difference of -fsanitize=array-bounds
is made even worse, pushing it above the 2048
limit for 64-bit platforms in Linux.
$ clang --target=loongarch64-linux-gnusf -O2 -Wall -Wframe-larger-than=64 -c -o /dev/null vdec_vp9_req_lat_if.i -fsanitize=array-bounds
vdec_vp9_req_lat_if.i:136:5: warning: stack frame size (576) exceeds limit (64) in 'vdec_vp9_slice_update_prob' [-Wframe-larger-than]
136 | int vdec_vp9_slice_update_prob() {
| ^
1 warning generated.
$ clang --target=loongarch64-linux-gnusf -O2 -Wall -Wframe-larger-than=64 -c -o /dev/null vdec_vp9_req_lat_if.i -fsanitize=array-bounds -mstrict-align
vdec_vp9_req_lat_if.i:136:5: warning: stack frame size (2080) exceeds limit (64) in 'vdec_vp9_slice_update_prob' [-Wframe-larger-than]
136 | int vdec_vp9_slice_update_prob() {
| ^
1 warning generated.
This behavior does not reproduce with AArch64 or ARM on the bad revision, so I suspect this is something up with the LoongArch backend in LLVM.
$ clang --target=aarch64-linux-gnu -O2 -Wall -Wframe-larger-than=64 -c -o /dev/null vdec_vp9_req_lat_if.i
vdec_vp9_req_lat_if.i:136:5: warning: stack frame size (144) exceeds limit (64) in 'vdec_vp9_slice_update_prob' [-Wframe-larger-than]
136 | int vdec_vp9_slice_update_prob() {
| ^
1 warning generated.
$ clang --target=aarch64-linux-gnu -O2 -Wall -Wframe-larger-than=64 -c -o /dev/null vdec_vp9_req_lat_if.i -mstrict-align
vdec_vp9_req_lat_if.i:136:5: warning: stack frame size (160) exceeds limit (64) in 'vdec_vp9_slice_update_prob' [-Wframe-larger-than]
136 | int vdec_vp9_slice_update_prob() {
| ^
1 warning generated.
$ clang --target=aarch64-linux-gnu -O2 -Wall -Wframe-larger-than=64 -c -o /dev/null vdec_vp9_req_lat_if.i -fsanitize=array-bounds
vdec_vp9_req_lat_if.i:136:5: warning: stack frame size (128) exceeds limit (64) in 'vdec_vp9_slice_update_prob' [-Wframe-larger-than]
136 | int vdec_vp9_slice_update_prob() {
| ^
1 warning generated.
$ clang --target=aarch64-linux-gnu -O2 -Wall -Wframe-larger-than=64 -c -o /dev/null vdec_vp9_req_lat_if.i -fsanitize=array-bounds -mstrict-align
vdec_vp9_req_lat_if.i:136:5: warning: stack frame size (128) exceeds limit (64) in 'vdec_vp9_slice_update_prob' [-Wframe-larger-than]
136 | int vdec_vp9_slice_update_prob() {
| ^
1 warning generated.
$ clang --target=arm-linux-gnueabi -O2 -Wall -Wframe-larger-than=32 -c -o /dev/null vdec_vp9_req_lat_if.i
vdec_vp9_req_lat_if.i:136:5: warning: stack frame size (56) exceeds limit (32) in 'vdec_vp9_slice_update_prob' [-Wframe-larger-than]
136 | int vdec_vp9_slice_update_prob() {
| ^
1 warning generated.
$ clang --target=arm-linux-gnueabi -O2 -Wall -Wframe-larger-than=32 -c -o /dev/null vdec_vp9_req_lat_if.i -mstrict-align
vdec_vp9_req_lat_if.i:136:5: warning: stack frame size (56) exceeds limit (32) in 'vdec_vp9_slice_update_prob' [-Wframe-larger-than]
136 | int vdec_vp9_slice_update_prob() {
| ^
1 warning generated.
$ clang --target=arm-linux-gnueabi -O2 -Wall -Wframe-larger-than=32 -c -o /dev/null vdec_vp9_req_lat_if.i -fsanitize=array-bounds
vdec_vp9_req_lat_if.i:136:5: warning: stack frame size (80) exceeds limit (32) in 'vdec_vp9_slice_update_prob' [-Wframe-larger-than]
136 | int vdec_vp9_slice_update_prob() {
| ^
1 warning generated.
$ clang --target=arm-linux-gnueabi -O2 -Wall -Wframe-larger-than=32 -c -o /dev/null vdec_vp9_req_lat_if.i -fsanitize=array-bounds -mstrict-align
vdec_vp9_req_lat_if.i:136:5: warning: stack frame size (80) exceeds limit (32) in 'vdec_vp9_slice_update_prob' [-Wframe-larger-than]
136 | int vdec_vp9_slice_update_prob() {
| ^
1 warning generated.