Skip to content

Commit 627e1f1

Browse files
committed
DAG: Call SimplifyDemandedBits on fcopysign sign value
Math library code has quite a few places with complex bit logic that are ultimately fed into a copysign. This helps avoid some regressions in a future patch. This assumes the position in the float type, which should at least be valid for IEEE types. Not sure if we need to guard against ppc_fp128 or anything else weird. There appears to be some value in simplifying the value operand as well, but I'll address that separately.
1 parent c769dc4 commit 627e1f1

File tree

6 files changed

+47
-62
lines changed

6 files changed

+47
-62
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17565,6 +17565,12 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
1756517565
if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
1756617566
return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
1756717567

17568+
// We only take the sign bit from the sign operand.
17569+
EVT SignVT = N1.getValueType();
17570+
if (SimplifyDemandedBits(N1,
17571+
APInt::getSignMask(SignVT.getScalarSizeInBits())))
17572+
return SDValue(N, 0);
17573+
1756817574
return SDValue();
1756917575
}
1757017576

llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,10 @@ define half @test_pown_reduced_fast_f16_known_odd(half %x, i32 %y.arg) #0 {
1111
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1212
; GFX9-NEXT: v_or_b32_e32 v1, 1, v1
1313
; GFX9-NEXT: v_cvt_f32_i32_e32 v1, v1
14-
; GFX9-NEXT: v_and_b32_e32 v2, 0xffff8000, v0
1514
; GFX9-NEXT: s_movk_i32 s4, 0x7fff
1615
; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1
17-
; GFX9-NEXT: v_mul_f16_e64 v0, |v0|, v1
18-
; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v2
16+
; GFX9-NEXT: v_mul_f16_e64 v1, |v0|, v1
17+
; GFX9-NEXT: v_bfi_b32 v0, s4, v1, v0
1918
; GFX9-NEXT: s_setpc_b64 s[30:31]
2019
%y = or i32 %y.arg, 1
2120
%fabs = call half @llvm.fabs.f16(half %x)
@@ -37,10 +36,9 @@ define <2 x half> @test_pown_reduced_fast_v2f16_known_odd(<2 x half> %x, <2 x i3
3736
; GFX9-NEXT: v_cvt_f32_i32_e32 v2, v2
3837
; GFX9-NEXT: v_cvt_f32_i32_e32 v1, v1
3938
; GFX9-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v0
40-
; GFX9-NEXT: v_and_b32_e32 v0, 0x80008000, v0
39+
; GFX9-NEXT: s_movk_i32 s4, 0x7fff
4140
; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2
4241
; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1
43-
; GFX9-NEXT: s_movk_i32 s4, 0x7fff
4442
; GFX9-NEXT: v_pack_b32_f16 v1, v1, v2
4543
; GFX9-NEXT: v_pk_mul_f16 v1, v3, v1
4644
; GFX9-NEXT: v_bfi_b32 v2, s4, v1, v0
@@ -67,10 +65,9 @@ define float @test_pown_reduced_fast_f32_known_odd(float %x, i32 %y.arg) #0 {
6765
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6866
; GFX9-NEXT: v_or_b32_e32 v1, 1, v1
6967
; GFX9-NEXT: v_cvt_f32_i32_e32 v1, v1
70-
; GFX9-NEXT: v_and_b32_e32 v2, 0x80000000, v0
7168
; GFX9-NEXT: s_brev_b32 s4, -2
72-
; GFX9-NEXT: v_mul_f32_e64 v0, |v0|, v1
73-
; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v2
69+
; GFX9-NEXT: v_mul_f32_e64 v1, |v0|, v1
70+
; GFX9-NEXT: v_bfi_b32 v0, s4, v1, v0
7471
; GFX9-NEXT: s_setpc_b64 s[30:31]
7572
%y = or i32 %y.arg, 1
7673
%fabs = call float @llvm.fabs.f32(float %x)
@@ -94,8 +91,6 @@ define <2 x float> @test_pown_reduced_fast_v2f32_known_odd(<2 x float> %x, <2 x
9491
; GFX9-NEXT: s_brev_b32 s4, -2
9592
; GFX9-NEXT: v_mul_f32_e64 v3, |v1|, v3
9693
; GFX9-NEXT: v_mul_f32_e64 v2, |v0|, v2
97-
; GFX9-NEXT: v_and_b32_e32 v1, 0x80000000, v1
98-
; GFX9-NEXT: v_and_b32_e32 v0, 0x80000000, v0
9994
; GFX9-NEXT: v_bfi_b32 v0, s4, v2, v0
10095
; GFX9-NEXT: v_bfi_b32 v1, s4, v3, v1
10196
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -118,8 +113,7 @@ define double @test_pown_reduced_fast_f64_known_odd(double %x, i32 %y.arg) #0 {
118113
; GFX9-NEXT: v_cvt_f64_i32_e32 v[2:3], v2
119114
; GFX9-NEXT: s_brev_b32 s4, -2
120115
; GFX9-NEXT: v_mul_f64 v[2:3], |v[0:1]|, v[2:3]
121-
; GFX9-NEXT: v_and_b32_e32 v0, 0x80000000, v1
122-
; GFX9-NEXT: v_bfi_b32 v1, s4, v3, v0
116+
; GFX9-NEXT: v_bfi_b32 v1, s4, v3, v1
123117
; GFX9-NEXT: v_mov_b32_e32 v0, v2
124118
; GFX9-NEXT: s_setpc_b64 s[30:31]
125119
%y = or i32 %y.arg, 1
@@ -144,10 +138,8 @@ define <2 x double> @test_pown_reduced_fast_v2f64_known_odd(<2 x double> %x, <2
144138
; GFX9-NEXT: s_brev_b32 s4, -2
145139
; GFX9-NEXT: v_mul_f64 v[4:5], |v[0:1]|, v[4:5]
146140
; GFX9-NEXT: v_mul_f64 v[6:7], |v[2:3]|, v[6:7]
147-
; GFX9-NEXT: v_and_b32_e32 v0, 0x80000000, v3
148-
; GFX9-NEXT: v_and_b32_e32 v1, 0x80000000, v1
149141
; GFX9-NEXT: v_bfi_b32 v1, s4, v5, v1
150-
; GFX9-NEXT: v_bfi_b32 v3, s4, v7, v0
142+
; GFX9-NEXT: v_bfi_b32 v3, s4, v7, v3
151143
; GFX9-NEXT: v_mov_b32_e32 v0, v4
152144
; GFX9-NEXT: v_mov_b32_e32 v2, v6
153145
; GFX9-NEXT: s_setpc_b64 s[30:31]

llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll

Lines changed: 25 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -152,41 +152,36 @@ define i128 @test_copysign(ppc_fp128 %x, ppc_fp128 %y) nounwind {
152152
; PPC32-LABEL: test_copysign:
153153
; PPC32: # %bb.0: # %entry
154154
; PPC32-NEXT: mflr 0
155-
; PPC32-NEXT: stwu 1, -96(1)
156-
; PPC32-NEXT: stw 0, 100(1)
157-
; PPC32-NEXT: stfd 1, 40(1)
158-
; PPC32-NEXT: lwz 3, 44(1)
159-
; PPC32-NEXT: stfd 2, 32(1)
160-
; PPC32-NEXT: stw 3, 60(1)
161-
; PPC32-NEXT: lwz 3, 40(1)
162-
; PPC32-NEXT: stfd 3, 72(1)
163-
; PPC32-NEXT: stw 3, 56(1)
155+
; PPC32-NEXT: stwu 1, -80(1)
156+
; PPC32-NEXT: stw 0, 84(1)
157+
; PPC32-NEXT: stfd 1, 32(1)
164158
; PPC32-NEXT: lwz 3, 36(1)
165-
; PPC32-NEXT: stfd 4, 64(1)
159+
; PPC32-NEXT: stfd 2, 24(1)
166160
; PPC32-NEXT: stw 3, 52(1)
167161
; PPC32-NEXT: lwz 3, 32(1)
168-
; PPC32-NEXT: lfd 1, 56(1)
162+
; PPC32-NEXT: stfd 3, 56(1)
169163
; PPC32-NEXT: stw 3, 48(1)
170-
; PPC32-NEXT: lwz 3, 76(1)
171-
; PPC32-NEXT: lfd 2, 48(1)
172-
; PPC32-NEXT: stw 3, 92(1)
173-
; PPC32-NEXT: lwz 3, 72(1)
174-
; PPC32-NEXT: stw 3, 88(1)
175-
; PPC32-NEXT: lwz 3, 68(1)
176-
; PPC32-NEXT: lfd 3, 88(1)
177-
; PPC32-NEXT: stw 3, 84(1)
178-
; PPC32-NEXT: lwz 3, 64(1)
179-
; PPC32-NEXT: stw 3, 80(1)
180-
; PPC32-NEXT: lfd 4, 80(1)
164+
; PPC32-NEXT: lwz 3, 28(1)
165+
; PPC32-NEXT: lfd 4, 64(1)
166+
; PPC32-NEXT: stw 3, 44(1)
167+
; PPC32-NEXT: lwz 3, 24(1)
168+
; PPC32-NEXT: lfd 1, 48(1)
169+
; PPC32-NEXT: stw 3, 40(1)
170+
; PPC32-NEXT: lwz 3, 60(1)
171+
; PPC32-NEXT: lfd 2, 40(1)
172+
; PPC32-NEXT: stw 3, 76(1)
173+
; PPC32-NEXT: lwz 3, 56(1)
174+
; PPC32-NEXT: stw 3, 72(1)
175+
; PPC32-NEXT: lfd 3, 72(1)
181176
; PPC32-NEXT: bl copysignl
182-
; PPC32-NEXT: stfd 1, 16(1)
183-
; PPC32-NEXT: stfd 2, 24(1)
184-
; PPC32-NEXT: lwz 3, 16(1)
185-
; PPC32-NEXT: lwz 4, 20(1)
186-
; PPC32-NEXT: lwz 5, 24(1)
187-
; PPC32-NEXT: lwz 6, 28(1)
188-
; PPC32-NEXT: lwz 0, 100(1)
189-
; PPC32-NEXT: addi 1, 1, 96
177+
; PPC32-NEXT: stfd 1, 8(1)
178+
; PPC32-NEXT: stfd 2, 16(1)
179+
; PPC32-NEXT: lwz 3, 8(1)
180+
; PPC32-NEXT: lwz 4, 12(1)
181+
; PPC32-NEXT: lwz 5, 16(1)
182+
; PPC32-NEXT: lwz 6, 20(1)
183+
; PPC32-NEXT: lwz 0, 84(1)
184+
; PPC32-NEXT: addi 1, 1, 80
190185
; PPC32-NEXT: mtlr 0
191186
; PPC32-NEXT: blr
192187
entry:

llvm/test/CodeGen/RISCV/double-arith.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -320,9 +320,7 @@ define double @fsgnjn_d(double %a, double %b) nounwind {
320320
;
321321
; RV64IZFINXZDINX-LABEL: fsgnjn_d:
322322
; RV64IZFINXZDINX: # %bb.0:
323-
; RV64IZFINXZDINX-NEXT: li a2, -1
324-
; RV64IZFINXZDINX-NEXT: slli a2, a2, 63
325-
; RV64IZFINXZDINX-NEXT: xor a1, a1, a2
323+
; RV64IZFINXZDINX-NEXT: not a1, a1
326324
; RV64IZFINXZDINX-NEXT: fsgnj.d a0, a0, a1
327325
; RV64IZFINXZDINX-NEXT: ret
328326
;

llvm/test/CodeGen/RISCV/double-bitmanip-dagcombines.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -156,20 +156,16 @@ define double @fcopysign_fneg(double %a, double %b) nounwind {
156156
;
157157
; RV64IFD-LABEL: fcopysign_fneg:
158158
; RV64IFD: # %bb.0:
159-
; RV64IFD-NEXT: li a2, -1
160-
; RV64IFD-NEXT: slli a2, a2, 63
161-
; RV64IFD-NEXT: xor a1, a1, a2
162-
; RV64IFD-NEXT: fmv.d.x fa5, a1
159+
; RV64IFD-NEXT: fmv.d.x fa5, a0
160+
; RV64IFD-NEXT: not a0, a1
163161
; RV64IFD-NEXT: fmv.d.x fa4, a0
164-
; RV64IFD-NEXT: fsgnj.d fa5, fa4, fa5
162+
; RV64IFD-NEXT: fsgnj.d fa5, fa5, fa4
165163
; RV64IFD-NEXT: fmv.x.d a0, fa5
166164
; RV64IFD-NEXT: ret
167165
;
168166
; RV64IZFINXZDINX-LABEL: fcopysign_fneg:
169167
; RV64IZFINXZDINX: # %bb.0:
170-
; RV64IZFINXZDINX-NEXT: li a2, -1
171-
; RV64IZFINXZDINX-NEXT: slli a2, a2, 63
172-
; RV64IZFINXZDINX-NEXT: xor a1, a1, a2
168+
; RV64IZFINXZDINX-NEXT: not a1, a1
173169
; RV64IZFINXZDINX-NEXT: fsgnj.d a0, a0, a1
174170
; RV64IZFINXZDINX-NEXT: ret
175171
%1 = fneg double %b

llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -119,18 +119,16 @@ define float @fcopysign_fneg(float %a, float %b) nounwind {
119119
;
120120
; RV32IF-LABEL: fcopysign_fneg:
121121
; RV32IF: # %bb.0:
122-
; RV32IF-NEXT: lui a2, 524288
123-
; RV32IF-NEXT: xor a1, a1, a2
124-
; RV32IF-NEXT: fmv.w.x fa5, a1
122+
; RV32IF-NEXT: fmv.w.x fa5, a0
123+
; RV32IF-NEXT: not a0, a1
125124
; RV32IF-NEXT: fmv.w.x fa4, a0
126-
; RV32IF-NEXT: fsgnj.s fa5, fa4, fa5
125+
; RV32IF-NEXT: fsgnj.s fa5, fa5, fa4
127126
; RV32IF-NEXT: fmv.x.w a0, fa5
128127
; RV32IF-NEXT: ret
129128
;
130129
; RV32IZFINX-LABEL: fcopysign_fneg:
131130
; RV32IZFINX: # %bb.0:
132-
; RV32IZFINX-NEXT: lui a2, 524288
133-
; RV32IZFINX-NEXT: xor a1, a1, a2
131+
; RV32IZFINX-NEXT: not a1, a1
134132
; RV32IZFINX-NEXT: fsgnj.s a0, a0, a1
135133
; RV32IZFINX-NEXT: ret
136134
;

0 commit comments

Comments
 (0)