Open
Description
Saw this while looking at #128441, but this seems like a more general issue. This C code
int foo(int *a, int *b, int size) {
int ret = 0;
for (int i = 0; i < size; ++i) {
int diff = a[i] ^ b[i];
ret += diff;
if (!diff) {
break;
}
}
return ret;
}
compiles at all optimization levels to:
foo:
test edx, edx
jle .LBB0_1
mov ecx, edx
dec rcx
xor edx, edx
xor eax, eax
.LBB0_3:
mov r8d, eax
mov r9d, dword ptr [rdi + 4*rdx]
mov r10d, dword ptr [rsi + 4*rdx]
mov eax, r10d
xor eax, r9d
add eax, r8d
xor r10d, r9d
je .LBB0_5
lea r8, [rdx + 1]
cmp rcx, rdx
mov rdx, r8
jne .LBB0_3
.LBB0_5:
ret
.LBB0_1:
xor eax, eax
ret
It keeps moving the accumulator from eax
to r8d
and back. It also materializes the xor
result twice when it doesn't need to. Ideally I think it would look more like this:
.LBB0_3:
mov r8d, dword ptr [rdi + 4*rdx]
xor r8d, dword ptr [rsi + 4*rdx]
je .LBB0_5
add eax, r8d
inc edx
cmp ecx, edx
jne .LBB0_3