DAG: Call SimplifyDemandedBits on fcopysign sign value

arsenm · arsenm · commit 627e1f1ffe69 · 2024-07-01T10:15:25.000Z
Math library code has quite a few places with complex bit
logic that are ultimately fed into a copysign. This helps
avoid some regressions in a future patch.

This assumes the position in the float type, which should
at least be valid for IEEE types. Not sure if we need to guard
against ppc_fp128 or anything else weird.

There appears to be some value in simplifying the value operand
as well, but I'll address that separately.
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17565,6 +17565,12 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
 
+  // We only take the sign bit from the sign operand.
+  EVT SignVT = N1.getValueType();
+  if (SimplifyDemandedBits(N1,
+                           APInt::getSignMask(SignVT.getScalarSizeInBits())))
+    return SDValue(N, 0);
+
   return SDValue();
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll b/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll
@@ -11,11 +11,10 @@ define half @test_pown_reduced_fast_f16_known_odd(half %x, i32 %y.arg) #0 {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_or_b32_e32 v1, 1, v1
 ; GFX9-NEXT:    v_cvt_f32_i32_e32 v1, v1
-; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff8000, v0
 ; GFX9-NEXT:    s_movk_i32 s4, 0x7fff
 ; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX9-NEXT:    v_mul_f16_e64 v0, |v0|, v1
-; GFX9-NEXT:    v_bfi_b32 v0, s4, v0, v2
+; GFX9-NEXT:    v_mul_f16_e64 v1, |v0|, v1
+; GFX9-NEXT:    v_bfi_b32 v0, s4, v1, v0
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %y = or i32 %y.arg, 1
   %fabs = call half @llvm.fabs.f16(half %x)
@@ -37,10 +36,9 @@ define <2 x half> @test_pown_reduced_fast_v2f16_known_odd(<2 x half> %x, <2 x i3
 ; GFX9-NEXT:    v_cvt_f32_i32_e32 v2, v2
 ; GFX9-NEXT:    v_cvt_f32_i32_e32 v1, v1
 ; GFX9-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v0
-; GFX9-NEXT:    v_and_b32_e32 v0, 0x80008000, v0
+; GFX9-NEXT:    s_movk_i32 s4, 0x7fff
 ; GFX9-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX9-NEXT:    s_movk_i32 s4, 0x7fff
 ; GFX9-NEXT:    v_pack_b32_f16 v1, v1, v2
 ; GFX9-NEXT:    v_pk_mul_f16 v1, v3, v1
 ; GFX9-NEXT:    v_bfi_b32 v2, s4, v1, v0
@@ -67,10 +65,9 @@ define float @test_pown_reduced_fast_f32_known_odd(float %x, i32 %y.arg) #0 {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_or_b32_e32 v1, 1, v1
 ; GFX9-NEXT:    v_cvt_f32_i32_e32 v1, v1
-; GFX9-NEXT:    v_and_b32_e32 v2, 0x80000000, v0
 ; GFX9-NEXT:    s_brev_b32 s4, -2
-; GFX9-NEXT:    v_mul_f32_e64 v0, |v0|, v1
-; GFX9-NEXT:    v_bfi_b32 v0, s4, v0, v2
+; GFX9-NEXT:    v_mul_f32_e64 v1, |v0|, v1
+; GFX9-NEXT:    v_bfi_b32 v0, s4, v1, v0
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %y = or i32 %y.arg, 1
   %fabs = call float @llvm.fabs.f32(float %x)
@@ -94,8 +91,6 @@ define <2 x float> @test_pown_reduced_fast_v2f32_known_odd(<2 x float> %x, <2 x
 ; GFX9-NEXT:    s_brev_b32 s4, -2
 ; GFX9-NEXT:    v_mul_f32_e64 v3, |v1|, v3
 ; GFX9-NEXT:    v_mul_f32_e64 v2, |v0|, v2
-; GFX9-NEXT:    v_and_b32_e32 v1, 0x80000000, v1
-; GFX9-NEXT:    v_and_b32_e32 v0, 0x80000000, v0
 ; GFX9-NEXT:    v_bfi_b32 v0, s4, v2, v0
 ; GFX9-NEXT:    v_bfi_b32 v1, s4, v3, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -118,8 +113,7 @@ define double @test_pown_reduced_fast_f64_known_odd(double %x, i32 %y.arg) #0 {
 ; GFX9-NEXT:    v_cvt_f64_i32_e32 v[2:3], v2
 ; GFX9-NEXT:    s_brev_b32 s4, -2
 ; GFX9-NEXT:    v_mul_f64 v[2:3], |v[0:1]|, v[2:3]
-; GFX9-NEXT:    v_and_b32_e32 v0, 0x80000000, v1
-; GFX9-NEXT:    v_bfi_b32 v1, s4, v3, v0
+; GFX9-NEXT:    v_bfi_b32 v1, s4, v3, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v0, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %y = or i32 %y.arg, 1
@@ -144,10 +138,8 @@ define <2 x double> @test_pown_reduced_fast_v2f64_known_odd(<2 x double> %x, <2
 ; GFX9-NEXT:    s_brev_b32 s4, -2
 ; GFX9-NEXT:    v_mul_f64 v[4:5], |v[0:1]|, v[4:5]
 ; GFX9-NEXT:    v_mul_f64 v[6:7], |v[2:3]|, v[6:7]
-; GFX9-NEXT:    v_and_b32_e32 v0, 0x80000000, v3
-; GFX9-NEXT:    v_and_b32_e32 v1, 0x80000000, v1
 ; GFX9-NEXT:    v_bfi_b32 v1, s4, v5, v1
-; GFX9-NEXT:    v_bfi_b32 v3, s4, v7, v0
+; GFX9-NEXT:    v_bfi_b32 v3, s4, v7, v3
 ; GFX9-NEXT:    v_mov_b32_e32 v0, v4
 ; GFX9-NEXT:    v_mov_b32_e32 v2, v6
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
@@ -152,41 +152,36 @@ define i128 @test_copysign(ppc_fp128 %x, ppc_fp128 %y) nounwind  {
 ; PPC32-LABEL: test_copysign:
 ; PPC32:       # %bb.0: # %entry
 ; PPC32-NEXT:    mflr 0
-; PPC32-NEXT:    stwu 1, -96(1)
-; PPC32-NEXT:    stw 0, 100(1)
-; PPC32-NEXT:    stfd 1, 40(1)
-; PPC32-NEXT:    lwz 3, 44(1)
-; PPC32-NEXT:    stfd 2, 32(1)
-; PPC32-NEXT:    stw 3, 60(1)
-; PPC32-NEXT:    lwz 3, 40(1)
-; PPC32-NEXT:    stfd 3, 72(1)
-; PPC32-NEXT:    stw 3, 56(1)
+; PPC32-NEXT:    stwu 1, -80(1)
+; PPC32-NEXT:    stw 0, 84(1)
+; PPC32-NEXT:    stfd 1, 32(1)
 ; PPC32-NEXT:    lwz 3, 36(1)
-; PPC32-NEXT:    stfd 4, 64(1)
+; PPC32-NEXT:    stfd 2, 24(1)
 ; PPC32-NEXT:    stw 3, 52(1)
 ; PPC32-NEXT:    lwz 3, 32(1)
-; PPC32-NEXT:    lfd 1, 56(1)
+; PPC32-NEXT:    stfd 3, 56(1)
 ; PPC32-NEXT:    stw 3, 48(1)
-; PPC32-NEXT:    lwz 3, 76(1)
-; PPC32-NEXT:    lfd 2, 48(1)
-; PPC32-NEXT:    stw 3, 92(1)
-; PPC32-NEXT:    lwz 3, 72(1)
-; PPC32-NEXT:    stw 3, 88(1)
-; PPC32-NEXT:    lwz 3, 68(1)
-; PPC32-NEXT:    lfd 3, 88(1)
-; PPC32-NEXT:    stw 3, 84(1)
-; PPC32-NEXT:    lwz 3, 64(1)
-; PPC32-NEXT:    stw 3, 80(1)
-; PPC32-NEXT:    lfd 4, 80(1)
+; PPC32-NEXT:    lwz 3, 28(1)
+; PPC32-NEXT:    lfd 4, 64(1)
+; PPC32-NEXT:    stw 3, 44(1)
+; PPC32-NEXT:    lwz 3, 24(1)
+; PPC32-NEXT:    lfd 1, 48(1)
+; PPC32-NEXT:    stw 3, 40(1)
+; PPC32-NEXT:    lwz 3, 60(1)
+; PPC32-NEXT:    lfd 2, 40(1)
+; PPC32-NEXT:    stw 3, 76(1)
+; PPC32-NEXT:    lwz 3, 56(1)
+; PPC32-NEXT:    stw 3, 72(1)
+; PPC32-NEXT:    lfd 3, 72(1)
 ; PPC32-NEXT:    bl copysignl
-; PPC32-NEXT:    stfd 1, 16(1)
-; PPC32-NEXT:    stfd 2, 24(1)
-; PPC32-NEXT:    lwz 3, 16(1)
-; PPC32-NEXT:    lwz 4, 20(1)
-; PPC32-NEXT:    lwz 5, 24(1)
-; PPC32-NEXT:    lwz 6, 28(1)
-; PPC32-NEXT:    lwz 0, 100(1)
-; PPC32-NEXT:    addi 1, 1, 96
+; PPC32-NEXT:    stfd 1, 8(1)
+; PPC32-NEXT:    stfd 2, 16(1)
+; PPC32-NEXT:    lwz 3, 8(1)
+; PPC32-NEXT:    lwz 4, 12(1)
+; PPC32-NEXT:    lwz 5, 16(1)
+; PPC32-NEXT:    lwz 6, 20(1)
+; PPC32-NEXT:    lwz 0, 84(1)
+; PPC32-NEXT:    addi 1, 1, 80
 ; PPC32-NEXT:    mtlr 0
 ; PPC32-NEXT:    blr
 entry:
diff --git a/llvm/test/CodeGen/RISCV/double-arith.ll b/llvm/test/CodeGen/RISCV/double-arith.ll
@@ -320,9 +320,7 @@ define double @fsgnjn_d(double %a, double %b) nounwind {
 ;
 ; RV64IZFINXZDINX-LABEL: fsgnjn_d:
 ; RV64IZFINXZDINX:       # %bb.0:
-; RV64IZFINXZDINX-NEXT:    li a2, -1
-; RV64IZFINXZDINX-NEXT:    slli a2, a2, 63
-; RV64IZFINXZDINX-NEXT:    xor a1, a1, a2
+; RV64IZFINXZDINX-NEXT:    not a1, a1
 ; RV64IZFINXZDINX-NEXT:    fsgnj.d a0, a0, a1
 ; RV64IZFINXZDINX-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/RISCV/double-bitmanip-dagcombines.ll b/llvm/test/CodeGen/RISCV/double-bitmanip-dagcombines.ll
@@ -156,20 +156,16 @@ define double @fcopysign_fneg(double %a, double %b) nounwind {
 ;
 ; RV64IFD-LABEL: fcopysign_fneg:
 ; RV64IFD:       # %bb.0:
-; RV64IFD-NEXT:    li a2, -1
-; RV64IFD-NEXT:    slli a2, a2, 63
-; RV64IFD-NEXT:    xor a1, a1, a2
-; RV64IFD-NEXT:    fmv.d.x fa5, a1
+; RV64IFD-NEXT:    fmv.d.x fa5, a0
+; RV64IFD-NEXT:    not a0, a1
 ; RV64IFD-NEXT:    fmv.d.x fa4, a0
-; RV64IFD-NEXT:    fsgnj.d fa5, fa4, fa5
+; RV64IFD-NEXT:    fsgnj.d fa5, fa5, fa4
 ; RV64IFD-NEXT:    fmv.x.d a0, fa5
 ; RV64IFD-NEXT:    ret
 ;
 ; RV64IZFINXZDINX-LABEL: fcopysign_fneg:
 ; RV64IZFINXZDINX:       # %bb.0:
-; RV64IZFINXZDINX-NEXT:    li a2, -1
-; RV64IZFINXZDINX-NEXT:    slli a2, a2, 63
-; RV64IZFINXZDINX-NEXT:    xor a1, a1, a2
+; RV64IZFINXZDINX-NEXT:    not a1, a1
 ; RV64IZFINXZDINX-NEXT:    fsgnj.d a0, a0, a1
 ; RV64IZFINXZDINX-NEXT:    ret
   %1 = fneg double %b
diff --git a/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll b/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll
@@ -119,18 +119,16 @@ define float @fcopysign_fneg(float %a, float %b) nounwind {
 ;
 ; RV32IF-LABEL: fcopysign_fneg:
 ; RV32IF:       # %bb.0:
-; RV32IF-NEXT:    lui a2, 524288
-; RV32IF-NEXT:    xor a1, a1, a2
-; RV32IF-NEXT:    fmv.w.x fa5, a1
+; RV32IF-NEXT:    fmv.w.x fa5, a0
+; RV32IF-NEXT:    not a0, a1
 ; RV32IF-NEXT:    fmv.w.x fa4, a0
-; RV32IF-NEXT:    fsgnj.s fa5, fa4, fa5
+; RV32IF-NEXT:    fsgnj.s fa5, fa5, fa4
 ; RV32IF-NEXT:    fmv.x.w a0, fa5
 ; RV32IF-NEXT:    ret
 ;
 ; RV32IZFINX-LABEL: fcopysign_fneg:
 ; RV32IZFINX:       # %bb.0:
-; RV32IZFINX-NEXT:    lui a2, 524288
-; RV32IZFINX-NEXT:    xor a1, a1, a2
+; RV32IZFINX-NEXT:    not a1, a1
 ; RV32IZFINX-NEXT:    fsgnj.s a0, a0, a1
 ; RV32IZFINX-NEXT:    ret
 ;

Original file line number	Diff line number	Diff line change
`@@ -320,9 +320,7 @@ define double @fsgnjn_d(double %a, double %b) nounwind {`
`320`	`320`	`;`
`321`	`321`	`; RV64IZFINXZDINX-LABEL: fsgnjn_d:`
`322`	`322`	`; RV64IZFINXZDINX: # %bb.0:`
`323`		`-; RV64IZFINXZDINX-NEXT: li a2, -1`
`324`		`-; RV64IZFINXZDINX-NEXT: slli a2, a2, 63`
`325`		`-; RV64IZFINXZDINX-NEXT: xor a1, a1, a2`
	`323`	`+; RV64IZFINXZDINX-NEXT: not a1, a1`
`326`	`324`	`; RV64IZFINXZDINX-NEXT: fsgnj.d a0, a0, a1`
`327`	`325`	`; RV64IZFINXZDINX-NEXT: ret`
`328`	`326`	`;`