Skip to content

Commit 2c7786e

Browse files
authored
Prefer use of 0.0 over -0.0 for fadd reductions w/nsz (in IR) (#106770)
This is a follow up to 924907b, and is mostly motivated by consistency but does include one additional optimization. In general, we prefer 0.0 over -0.0 as the identity value for an fadd. We use that value in several places, but don't in others. So, let's be consistent and use the same identity (when nsz allows) everywhere. This creates a bunch of test churn, but due to 924907b, most of that churn doesn't actually indicate a change in codegen. The exception is that this change enables the use of 0.0 for nsz, but *not* reasoc, fadd reductions. Or said differently, it allows the neutral value of an ordered fadd reduction to be 0.0.
1 parent df4746d commit 2c7786e

36 files changed

+101
-98
lines changed

llvm/lib/CodeGen/ExpandVectorPredication.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -408,7 +408,9 @@ static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI,
408408
APFloat::getLargest(Semantics, Negative));
409409
}
410410
case Intrinsic::vp_reduce_fadd:
411-
return ConstantFP::getNegativeZero(EltTy);
411+
return ConstantExpr::getBinOpIdentity(
412+
Instruction::FAdd, EltTy, false,
413+
VPI.getFastMathFlags().noSignedZeros());
412414
case Intrinsic::vp_reduce_fmul:
413415
return ConstantFP::get(EltTy, 1.0);
414416
}

llvm/lib/Transforms/Utils/LoopUtils.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1213,7 +1213,8 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, Value *Src,
12131213
auto getIdentity = [&]() {
12141214
Intrinsic::ID ID = getReductionIntrinsicID(RdxKind);
12151215
unsigned Opc = getArithmeticReductionInstruction(ID);
1216-
return ConstantExpr::getBinOpIdentity(Opc, SrcVecEltTy);
1216+
bool NSZ = Builder.getFastMathFlags().noSignedZeros();
1217+
return ConstantExpr::getBinOpIdentity(Opc, SrcVecEltTy, false, NSZ);
12171218
};
12181219
switch (RdxKind) {
12191220
case RecurKind::Add:

llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ define float @cond_fadd(ptr noalias nocapture readonly %a, ptr noalias nocapture
2929
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP10]], i32 0
3030
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP11]], i32 4, <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> poison)
3131
; CHECK-NEXT: [[TMP12:%.*]] = select fast <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> zeroinitializer
32-
; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP12]])
32+
; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> [[TMP12]])
3333
; CHECK-NEXT: [[TMP14]] = fadd fast float [[TMP13]], [[VEC_PHI]]
3434
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
3535
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]

llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ define float @fadd_fast(ptr noalias nocapture readonly %a, i64 %n) {
202202
; CHECK: %[[ADD2:.*]] = fadd fast <vscale x 8 x float> %[[LOAD2]]
203203
; CHECK: middle.block:
204204
; CHECK: %[[ADD:.*]] = fadd fast <vscale x 8 x float> %[[ADD2]], %[[ADD1]]
205-
; CHECK-NEXT: call fast float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, <vscale x 8 x float> %[[ADD]])
205+
; CHECK-NEXT: call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> %[[ADD]])
206206
entry:
207207
br label %for.body
208208

@@ -231,7 +231,7 @@ define bfloat @fadd_fast_bfloat(ptr noalias nocapture readonly %a, i64 %n) {
231231
; CHECK: %[[FADD2:.*]] = fadd fast <8 x bfloat> %[[LOAD2]]
232232
; CHECK: middle.block:
233233
; CHECK: %[[RDX:.*]] = fadd fast <8 x bfloat> %[[FADD2]], %[[FADD1]]
234-
; CHECK: call fast bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR8000, <8 x bfloat> %[[RDX]])
234+
; CHECK: call fast bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> %[[RDX]])
235235
entry:
236236
br label %for.body
237237

llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -821,7 +821,7 @@ define void @int_float_struct(ptr nocapture readonly %p) #0 {
821821
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
822822
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
823823
; CHECK: middle.block:
824-
; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP7]])
824+
; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> [[TMP7]])
825825
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP6]])
826826
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
827827
; CHECK: scalar.ph:

llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-option.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -93,15 +93,15 @@ define float @fadd_red_fast(ptr noalias nocapture readonly %a, i64 %n) #0 {
9393
; CHECK-NOTF: %[[LOAD:.*]] = load <vscale x 4 x float>
9494
; CHECK-NOTF: %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]]
9595
; CHECK-NOTF: middle.block:
96-
; CHECK-NOTF-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> %[[ADD]])
96+
; CHECK-NOTF-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %[[ADD]])
9797

9898
; CHECK-TF-NORED-LABEL: @fadd_red_fast
9999
; CHECK-TF-NORED: vector.body:
100100
; CHECK-TF-NORED-NOT: %{{.*}} = phi <vscale x 4 x i1>
101101
; CHECK-TF-NORED: %[[LOAD:.*]] = load <vscale x 4 x float>
102102
; CHECK-TF-NORED: %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]]
103103
; CHECK-TF-NORED: middle.block:
104-
; CHECK-TF-NORED-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> %[[ADD]])
104+
; CHECK-TF-NORED-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %[[ADD]])
105105

106106
; CHECK-TF-NOREC-LABEL: @fadd_red_fast
107107
; CHECK-TF-NOREC: vector.body:
@@ -111,7 +111,7 @@ define float @fadd_red_fast(ptr noalias nocapture readonly %a, i64 %n) #0 {
111111
; CHECK-TF-NOREC: %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]]
112112
; CHECK-TF-NOREC: %[[SEL:.*]] = select fast <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x float> %[[ADD]], <vscale x 4 x float> %[[VEC_PHI]]
113113
; CHECK-TF-NOREC: middle.block:
114-
; CHECK-TF-NOREC-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> %[[SEL]])
114+
; CHECK-TF-NOREC-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %[[SEL]])
115115

116116
; CHECK-TF-NOREV-LABEL: @fadd_red_fast
117117
; CHECK-TF-NOREV: vector.body:
@@ -121,7 +121,7 @@ define float @fadd_red_fast(ptr noalias nocapture readonly %a, i64 %n) #0 {
121121
; CHECK-TF-NOREV: %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]]
122122
; CHECK-TF-NOREV: %[[SEL:.*]] = select fast <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x float> %[[ADD]], <vscale x 4 x float> %[[VEC_PHI]]
123123
; CHECK-TF-NOREV: middle.block:
124-
; CHECK-TF-NOREV-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> %[[SEL]])
124+
; CHECK-TF-NOREV-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %[[SEL]])
125125

126126
; CHECK-TF-LABEL: @fadd_red_fast
127127
; CHECK-TF: vector.body:
@@ -131,7 +131,7 @@ define float @fadd_red_fast(ptr noalias nocapture readonly %a, i64 %n) #0 {
131131
; CHECK-TF: %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]]
132132
; CHECK-TF: %[[SEL:.*]] = select fast <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x float> %[[ADD]], <vscale x 4 x float> %[[VEC_PHI]]
133133
; CHECK-TF: middle.block:
134-
; CHECK-TF-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> %[[SEL]])
134+
; CHECK-TF-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %[[SEL]])
135135

136136
; CHECK-TF-ONLYRED-LABEL: @fadd_red_fast
137137
; CHECK-TF-ONLYRED: vector.body:
@@ -141,15 +141,15 @@ define float @fadd_red_fast(ptr noalias nocapture readonly %a, i64 %n) #0 {
141141
; CHECK-TF-ONLYRED: %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]]
142142
; CHECK-TF-ONLYRED: %[[SEL:.*]] = select fast <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x float> %[[ADD]], <vscale x 4 x float> %[[VEC_PHI]]
143143
; CHECK-TF-ONLYRED: middle.block:
144-
; CHECK-TF-ONLYRED-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> %[[SEL]])
144+
; CHECK-TF-ONLYRED-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %[[SEL]])
145145

146146
; CHECK-NEOVERSE-V1-LABEL: @fadd_red_fast
147147
; CHECK-NEOVERSE-V1: vector.body:
148148
; CHECK-NEOVERSE-V1-NOT: %{{.*}} = phi <vscale x 4 x i1>
149149
; CHECK-NEOVERSE-V1: %[[LOAD:.*]] = load <vscale x 4 x float>
150150
; CHECK-NEOVERSE-V1: %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]]
151151
; CHECK-NEOVERSE-V1: middle.block:
152-
; CHECK-NEOVERSE-V1-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> %[[ADD]])
152+
; CHECK-NEOVERSE-V1-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %[[ADD]])
153153

154154
entry:
155155
br label %for.body

llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ define half @vectorize_v2f16_loop(ptr addrspace(1) noalias %s) {
2424
; GFX9-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
2525
; GFX9: middle.block:
2626
; GFX9-NEXT: [[BIN_RDX:%.*]] = fadd fast <2 x half> [[TMP3]], [[TMP2]]
27-
; GFX9-NEXT: [[TMP5:%.*]] = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH8000, <2 x half> [[BIN_RDX]])
27+
; GFX9-NEXT: [[TMP5:%.*]] = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> [[BIN_RDX]])
2828
; GFX9-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
2929
; GFX9: scalar.ph:
3030
; GFX9-NEXT: br label [[FOR_BODY:%.*]]
@@ -54,7 +54,7 @@ define half @vectorize_v2f16_loop(ptr addrspace(1) noalias %s) {
5454
; VI-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
5555
; VI: middle.block:
5656
; VI-NEXT: [[BIN_RDX:%.*]] = fadd fast <2 x half> [[TMP3]], [[TMP2]]
57-
; VI-NEXT: [[TMP5:%.*]] = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH8000, <2 x half> [[BIN_RDX]])
57+
; VI-NEXT: [[TMP5:%.*]] = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> [[BIN_RDX]])
5858
; VI-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
5959
; VI: scalar.ph:
6060
; VI-NEXT: br label [[FOR_BODY:%.*]]

llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) {
336336
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
337337
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
338338
; CHECK: middle.block:
339-
; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP4]])
339+
; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP4]])
340340
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
341341
; CHECK: scalar.ph:
342342
; CHECK-NEXT: br label [[FOR_BODY:%.*]]

llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -531,7 +531,7 @@ define float @fadd_f32(ptr nocapture readonly %x, i32 %n) #0 {
531531
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
532532
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
533533
; CHECK: middle.block:
534-
; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP4]])
534+
; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP4]])
535535
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
536536
; CHECK: scalar.ph:
537537
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]

llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ define float @test(ptr nocapture readonly %pA, ptr nocapture readonly %pB, i32 %
5151
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
5252
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
5353
; CHECK: middle.block:
54-
; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[PREDPHI]])
54+
; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[PREDPHI]])
5555
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[BLOCKSIZE]], [[N_VEC]]
5656
; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[SCALAR_PH]]
5757
; CHECK: scalar.ph:

llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ define i32 @test(ptr nocapture readonly %x) {
6464
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
6565
; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
6666
; CHECK: middle.block:
67-
; CHECK-NEXT: [[TMP15:%.*]] = call fast double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> [[TMP13]])
67+
; CHECK-NEXT: [[TMP15:%.*]] = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> [[TMP13]])
6868
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[T]], [[N_VEC]]
6969
; CHECK-NEXT: br i1 [[CMP_N]], label [[OUTEREND]], label [[SCALAR_PH]]
7070
; CHECK: scalar.ph:

llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-call.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ define dso_local double @test(ptr %Arr) {
2020
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
2121
; CHECK: middle.block:
2222
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi <2 x double> [ [[TMP3]], [[VECTOR_BODY]] ]
23-
; CHECK-NEXT: [[TMP5:%.*]] = tail call fast double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> [[DOTLCSSA]])
23+
; CHECK-NEXT: [[TMP5:%.*]] = tail call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> [[DOTLCSSA]])
2424
; CHECK-NEXT: ret double [[TMP5]]
2525
;
2626
entry:

llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-vfabi-attr.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ define dso_local double @test(ptr %Arr) {
1818
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128
1919
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
2020
; CHECK: middle.block:
21-
; CHECK-NEXT: [[TMP6:%.*]] = call fast double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> [[TMP4]])
21+
; CHECK-NEXT: [[TMP6:%.*]] = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> [[TMP4]])
2222
; CHECK-NEXT: ret double [[TMP6]]
2323
;
2424
entry:

llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ define float @fadd_fast(ptr noalias nocapture readonly %a, i64 %n) {
206206
; CHECK: %[[ADD2:.*]] = fadd fast <vscale x 8 x float> %[[LOAD2]]
207207
; CHECK: middle.block:
208208
; CHECK: %[[ADD:.*]] = fadd fast <vscale x 8 x float> %[[ADD2]], %[[ADD1]]
209-
; CHECK-NEXT: call fast float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, <vscale x 8 x float> %[[ADD]])
209+
; CHECK-NEXT: call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> %[[ADD]])
210210
entry:
211211
br label %for.body
212212

@@ -235,7 +235,7 @@ define bfloat @fadd_fast_bfloat(ptr noalias nocapture readonly %a, i64 %n) {
235235
; CHECK: %[[FADD2:.*]] = fadd fast <16 x bfloat> %[[LOAD2]]
236236
; CHECK: middle.block:
237237
; CHECK: %[[RDX:.*]] = fadd fast <16 x bfloat> %[[FADD2]], %[[FADD1]]
238-
; CHECK: call fast bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR8000, <16 x bfloat> %[[RDX]])
238+
; CHECK: call fast bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> %[[RDX]])
239239
entry:
240240
br label %for.body
241241

llvm/test/Transforms/LoopVectorize/X86/cost-model.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32
215215
; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP120]], [[TMP119]]
216216
; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <4 x float> [[TMP121]], [[BIN_RDX]]
217217
; CHECK-NEXT: [[BIN_RDX5:%.*]] = fadd fast <4 x float> [[TMP122]], [[BIN_RDX4]]
218-
; CHECK-NEXT: [[TMP124:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[BIN_RDX5]])
218+
; CHECK-NEXT: [[TMP124:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[BIN_RDX5]])
219219
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
220220
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]]
221221
; CHECK: scalar.ph:

llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ define double @sumIfVector(ptr nocapture readonly %arr) {
9393
; SSE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
9494
; SSE: middle.block:
9595
; SSE-NEXT: [[BIN_RDX:%.*]] = fadd fast <2 x double> [[PREDPHI3]], [[PREDPHI]]
96-
; SSE-NEXT: [[TMP11:%.*]] = call fast double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> [[BIN_RDX]])
96+
; SSE-NEXT: [[TMP11:%.*]] = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> [[BIN_RDX]])
9797
; SSE-NEXT: br i1 true, label [[DONE:%.*]], label [[SCALAR_PH]]
9898
; SSE: scalar.ph:
9999
; SSE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 32, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
@@ -166,7 +166,7 @@ define double @sumIfVector(ptr nocapture readonly %arr) {
166166
; AVX-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x double> [[PREDPHI7]], [[PREDPHI]]
167167
; AVX-NEXT: [[BIN_RDX10:%.*]] = fadd fast <4 x double> [[PREDPHI8]], [[BIN_RDX]]
168168
; AVX-NEXT: [[BIN_RDX11:%.*]] = fadd fast <4 x double> [[PREDPHI9]], [[BIN_RDX10]]
169-
; AVX-NEXT: [[TMP21:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[BIN_RDX11]])
169+
; AVX-NEXT: [[TMP21:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[BIN_RDX11]])
170170
; AVX-NEXT: br i1 true, label [[DONE:%.*]], label [[SCALAR_PH]]
171171
; AVX: scalar.ph:
172172
; AVX-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 32, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]

llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ define float @reduction_sum_float_fastmath(i32 %n, ptr %array) {
7474
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
7575
; CHECK: middle.block:
7676
; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP7]], [[TMP6]]
77-
; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[BIN_RDX]])
77+
; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[BIN_RDX]])
7878
; CHECK-NEXT: br i1 true, label [[LOOP_EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
7979
; CHECK: scalar.ph:
8080
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ]

llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ define float @multiple_fp_rdx(ptr %A, i64 %N) {
314314
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
315315
; CHECK: middle.block:
316316
; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP4]])
317-
; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]])
317+
; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]])
318318
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
319319
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
320320
; CHECK: vec.epilog.iter.check:
@@ -345,7 +345,7 @@ define float @multiple_fp_rdx(ptr %A, i64 %N) {
345345
; CHECK-NEXT: br i1 [[TMP15]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
346346
; CHECK: vec.epilog.middle.block:
347347
; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP14]])
348-
; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP13]])
348+
; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP13]])
349349
; CHECK-NEXT: [[CMP_N11:%.*]] = icmp eq i64 [[N]], [[N_VEC5]]
350350
; CHECK-NEXT: br i1 [[CMP_N11]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]]
351351
; CHECK: vec.epilog.scalar.ph:

0 commit comments

Comments
 (0)