Closed
Description
!it.x && !it.y
produces worse code than (it.x == 0) && (it.y == 0)
if it
is a struct type with bool
members x
and y
.
Oddly using a uint8_t
for x
and y
does not suffer from this problem.
This affects AArch64, x86-64 and RiscV targets
Real-world motivation
// The `in_full` and `has_aligned` page flags are put in a union to efficiently
// test if both are false (`full_aligned == 0`) in the `mi_free` routine.
typedef union mi_page_flags_s {
uint8_t full_aligned;
struct {
uint8_t in_full : 1;
uint8_t has_aligned : 1;
} x;
} mi_page_flags_t;
C++ code
https://godbolt.org/z/1387M789s
#include <cstdint>
struct S1 {
bool x;
bool y;
};
struct S2 {
bool x : 1;
bool y : 1;
};
struct S3 {
uint8_t x : 1;
uint8_t y : 1;
};
struct S4 {
uint8_t x ;
uint8_t y ;
};
extern "C" {
auto src1(S1 it) -> bool { return !it.x && !it.y; }
auto tgt1(S1 it) -> bool { return (it.x == 0) && (it.y == 0); }
auto src2(S2 it) -> bool { return !it.x && !it.y; }
auto tgt2(S2 it) -> bool { return (it.x == 0) && (it.y == 0); }
auto src3(S3 it) -> bool { return !it.x && !it.y; }
auto tgt3(S3 it) -> bool { return (it.x == 0) && (it.y == 0); }
auto src4(S4 it) -> bool { return !it.x && !it.y; }
auto tgt4(S4 it) -> bool { return (it.x == 0) && (it.y == 0); }
}
AArch64 assembly
src1:
tst x0, #0x100
eor w9, w0, #0x1
cset w8, eq
and w0, w8, w9
ret
tgt1:
mov w8, #257
tst x0, x8
cset w0, eq
ret
src2:
tst x0, #0x2
eor w9, w0, #0x1
cset w8, eq
and w0, w8, w9
ret
tgt2:
tst x0, #0x3
cset w0, eq
ret
src3:
tst x0, #0x3
cset w0, eq
ret
tgt3:
tst x0, #0x3
cset w0, eq
ret
src4:
tst x0, #0xffff
cset w0, eq
ret
tgt4:
tst x0, #0xffff
cset w0, eq
ret
Alive proof
https://alive2.llvm.org/ce/z/JRwXu7
----------------------------------------
define i1 @src1(i64 %#0) nofree willreturn memory(none) {
#1:
%#2 = trunc i64 %#0 to i1
%#3 = and i64 %#0, 256
%#4 = icmp eq i64 %#3, 0
%#5 = xor i1 %#2, 1
%#6 = and i1 %#4, %#5
ret i1 %#6
}
=>
define i1 @tgt1(i64 %#0) nofree willreturn memory(none) {
#1:
%#2 = and i64 %#0, 257
%#3 = icmp eq i64 %#2, 0
ret i1 %#3
}
Transformation seems to be correct!
----------------------------------------
define i1 @src2(i64 %#0) nofree willreturn memory(none) {
#1:
%#2 = trunc i64 %#0 to i1
%#3 = and i64 %#0, 2
%#4 = icmp eq i64 %#3, 0
%#5 = xor i1 %#2, 1
%#6 = and i1 %#4, %#5
ret i1 %#6
}
=>
define i1 @tgt2(i64 %#0) nofree willreturn memory(none) {
#1:
%#2 = and i64 %#0, 3
%#3 = icmp eq i64 %#2, 0
ret i1 %#3
}
Transformation seems to be correct!
----------------------------------------
define i1 @src3(i64 %#0) nofree willreturn memory(none) {
#1:
%#2 = and i64 %#0, 3
%#3 = icmp eq i64 %#2, 0
ret i1 %#3
}
=>
define i1 @tgt3(i64 %#0) nofree willreturn memory(none) {
#1:
%#2 = and i64 %#0, 3
%#3 = icmp eq i64 %#2, 0
ret i1 %#3
}
Transformation seems to be correct!
----------------------------------------
define i1 @src4(i64 %#0) nofree willreturn memory(none) {
#1:
%#2 = and i64 %#0, 65535
%#3 = icmp eq i64 %#2, 0
ret i1 %#3
}
=>
define i1 @tgt4(i64 %#0) nofree willreturn memory(none) {
#1:
%#2 = and i64 %#0, 65535
%#3 = icmp eq i64 %#2, 0
ret i1 %#3
}
Transformation seems to be correct!
Summary:
4 correct transformations
0 incorrect transformations
0 failed-to-prove transformations
0 Alive2 errors