GPUOpen-Drivers
diff --git a/‎llvm/lib/CodeGen/TargetInstrInfo.cpp
Lines changed: 6 additions & 3 deletions b/‎llvm/lib/CodeGen/TargetInstrInfo.cpp
Lines changed: 6 additions & 3 deletions
diff --git a/‎llvm/lib/Target/X86/X86InstrInfo.cpp
Lines changed: 16 additions & 1 deletion b/‎llvm/lib/Target/X86/X86InstrInfo.cpp
Lines changed: 16 additions & 1 deletion
diff --git a/‎llvm/test/CodeGen/X86/fmf-flags.ll
Lines changed: 1 addition & 1 deletion b/‎llvm/test/CodeGen/X86/fmf-flags.ll
Lines changed: 1 addition & 1 deletion
diff --git a/‎llvm/test/CodeGen/X86/machine-combiner.ll
Lines changed: 79 additions & 79 deletions b/‎llvm/test/CodeGen/X86/machine-combiner.ll
Lines changed: 79 additions & 79 deletions
diff --git a/‎llvm/test/CodeGen/X86/pow.ll
Lines changed: 1 addition & 1 deletion b/‎llvm/test/CodeGen/X86/pow.ll
Lines changed: 1 addition & 1 deletion
@@ -699,10 +699,13 @@ bool TargetInstrInfo::hasReassociableSibling(const MachineInstr &Inst,
     std::swap(MI1, MI2);
 
   // 1. The previous instruction must be the same type as Inst.
-  // 2. The previous instruction must have virtual register definitions for its
+  // 2. The previous instruction must also be associative/commutative (this can
+  //    be different even for instructions with the same opcode if traits like
+  //    fast-math-flags are included).
+  // 3. The previous instruction must have virtual register definitions for its
   //    operands in the same basic block as Inst.
-  // 3. The previous instruction's result must only be used by Inst.
-  return MI1->getOpcode() == AssocOpcode &&
+  // 4. The previous instruction's result must only be used by Inst.
+  return MI1->getOpcode() == AssocOpcode && isAssociativeAndCommutative(*MI1) &&
          hasReassociableOperands(*MI1, MBB) &&
          MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg());
 }
 
@@ -7657,7 +7657,8 @@ bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
   case X86::VMULSSrr:
   case X86::VMULSDZrr:
   case X86::VMULSSZrr:
-    return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
+    return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
+           Inst.getFlag(MachineInstr::MIFlag::FmNsz);
   default:
     return false;
   }
@@ -7843,6 +7844,20 @@ void X86InstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
                                          MachineInstr &OldMI2,
                                          MachineInstr &NewMI1,
                                          MachineInstr &NewMI2) const {
+  // Propagate FP flags from the original instructions.
+  // But clear poison-generating flags because those may not be valid now.
+  // TODO: There should be a helper function for copying only fast-math-flags.
+  uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
+  NewMI1.setFlags(IntersectedFlags);
+  NewMI1.clearFlag(MachineInstr::MIFlag::NoSWrap);
+  NewMI1.clearFlag(MachineInstr::MIFlag::NoUWrap);
+  NewMI1.clearFlag(MachineInstr::MIFlag::IsExact);
+
+  NewMI2.setFlags(IntersectedFlags);
+  NewMI2.clearFlag(MachineInstr::MIFlag::NoSWrap);
+  NewMI2.clearFlag(MachineInstr::MIFlag::NoUWrap);
+  NewMI2.clearFlag(MachineInstr::MIFlag::IsExact);
+
   // Integer instructions may define an implicit EFLAGS dest register operand.
   MachineOperand *OldFlagDef1 = OldMI1.findRegisterDefOperand(X86::EFLAGS);
   MachineOperand *OldFlagDef2 = OldMI2.findRegisterDefOperand(X86::EFLAGS);
 
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s -check-prefix=X64
 ; RUN: llc < %s -mtriple=i686-unknown   | FileCheck %s -check-prefix=X86
 
 declare float @llvm.sqrt.f32(float %x);
 
@@ -1,13 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=SSE
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=AVX,AVX1
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx512vl -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=AVX,AVX512
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=SSE
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx512vl -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=AVX,AVX512
 
 ; Incremental updates of the instruction depths should be enough for this test
 ; case.
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=SSE
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefixes=AVX,AVX1
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx512vl -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefixes=AVX,AVX512
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mattr=sse -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=SSE
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mattr=avx -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mattr=avx512vl -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefixes=AVX,AVX512
 
 ; Verify that the first two adds are independent regardless of how the inputs are
 ; commuted. The destination registers are used as source registers for the third add.
@@ -26,9 +26,9 @@ define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
-  %t0 = fadd float %x0, %x1
-  %t1 = fadd float %t0, %x2
-  %t2 = fadd float %t1, %x3
+  %t0 = fadd reassoc nsz float %x0, %x1
+  %t1 = fadd reassoc nsz float %t0, %x2
+  %t2 = fadd reassoc nsz float %t1, %x3
   ret float %t2
 }
 
@@ -46,9 +46,9 @@ define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
-  %t0 = fadd float %x0, %x1
-  %t1 = fadd float %x2, %t0
-  %t2 = fadd float %t1, %x3
+  %t0 = fadd reassoc nsz float %x0, %x1
+  %t1 = fadd reassoc nsz float %x2, %t0
+  %t2 = fadd reassoc nsz float %t1, %x3
   ret float %t2
 }
 
@@ -66,9 +66,9 @@ define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
-  %t0 = fadd float %x0, %x1
-  %t1 = fadd float %t0, %x2
-  %t2 = fadd float %x3, %t1
+  %t0 = fadd reassoc nsz float %x0, %x1
+  %t1 = fadd reassoc nsz float %t0, %x2
+  %t2 = fadd reassoc nsz float %x3, %t1
   ret float %t2
 }
 
@@ -86,9 +86,9 @@ define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
-  %t0 = fadd float %x0, %x1
-  %t1 = fadd float %x2, %t0
-  %t2 = fadd float %x3, %t1
+  %t0 = fadd reassoc nsz float %x0, %x1
+  %t1 = fadd reassoc nsz float %x2, %t0
+  %t2 = fadd reassoc nsz float %x3, %t1
   ret float %t2
 }
 
@@ -117,13 +117,13 @@ define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, floa
 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vaddss %xmm7, %xmm0, %xmm0
 ; AVX-NEXT:    retq
-  %t0 = fadd float %x0, %x1
-  %t1 = fadd float %t0, %x2
-  %t2 = fadd float %t1, %x3
-  %t3 = fadd float %t2, %x4
-  %t4 = fadd float %t3, %x5
-  %t5 = fadd float %t4, %x6
-  %t6 = fadd float %t5, %x7
+  %t0 = fadd reassoc nsz float %x0, %x1
+  %t1 = fadd reassoc nsz float %t0, %x2
+  %t2 = fadd reassoc nsz float %t1, %x3
+  %t3 = fadd reassoc nsz float %t2, %x4
+  %t4 = fadd reassoc nsz float %t3, %x5
+  %t5 = fadd reassoc nsz float %t4, %x6
+  %t6 = fadd reassoc nsz float %t5, %x7
   ret float %t6
 }
 
@@ -146,9 +146,9 @@ define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
-  %t0 = fdiv float %x0, %x1
-  %t1 = fadd float %x2, %t0
-  %t2 = fadd float %x3, %t1
+  %t0 = fdiv reassoc nsz float %x0, %x1
+  %t1 = fadd reassoc nsz float %x2, %t0
+  %t2 = fadd reassoc nsz float %x3, %t1
   ret float %t2
 }
 
@@ -168,9 +168,9 @@ define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
 ; AVX-NEXT:    vmulss %xmm3, %xmm2, %xmm1
 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
-  %t0 = fdiv float %x0, %x1
-  %t1 = fmul float %x2, %t0
-  %t2 = fmul float %x3, %t1
+  %t0 = fdiv reassoc nsz float %x0, %x1
+  %t1 = fmul reassoc nsz float %x2, %t0
+  %t2 = fmul reassoc nsz float %x3, %t1
   ret float %t2
 }
 
@@ -190,9 +190,9 @@ define double @reassociate_adds_double(double %x0, double %x1, double %x2, doubl
 ; AVX-NEXT:    vaddsd %xmm3, %xmm2, %xmm1
 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
-  %t0 = fdiv double %x0, %x1
-  %t1 = fadd double %x2, %t0
-  %t2 = fadd double %x3, %t1
+  %t0 = fdiv reassoc nsz double %x0, %x1
+  %t1 = fadd reassoc nsz double %x2, %t0
+  %t2 = fadd reassoc nsz double %x3, %t1
   ret double %t2
 }
 
@@ -212,9 +212,9 @@ define double @reassociate_muls_double(double %x0, double %x1, double %x2, doubl
 ; AVX-NEXT:    vmulsd %xmm3, %xmm2, %xmm1
 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
-  %t0 = fdiv double %x0, %x1
-  %t1 = fmul double %x2, %t0
-  %t2 = fmul double %x3, %t1
+  %t0 = fdiv reassoc nsz double %x0, %x1
+  %t1 = fmul reassoc nsz double %x2, %t0
+  %t2 = fmul reassoc nsz double %x3, %t1
   ret double %t2
 }
 
@@ -240,9 +240,9 @@ define <4 x float> @reassociate_adds_v4f32(<4 x float> %x0, <4 x float> %x1, <4
 ; AVX512-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
 ; AVX512-NEXT:    vaddps %xmm0, %xmm3, %xmm0
 ; AVX512-NEXT:    retq
-  %t0 = fmul <4 x float> %x0, %x1
-  %t1 = fadd <4 x float> %x2, %t0
-  %t2 = fadd <4 x float> %x3, %t1
+  %t0 = fmul reassoc nsz <4 x float> %x0, %x1
+  %t1 = fadd reassoc nsz <4 x float> %x2, %t0
+  %t2 = fadd reassoc nsz <4 x float> %x3, %t1
   ret <4 x float> %t2
 }
 
@@ -268,9 +268,9 @@ define <2 x double> @reassociate_adds_v2f64(<2 x double> %x0, <2 x double> %x1,
 ; AVX512-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
 ; AVX512-NEXT:    vaddpd %xmm0, %xmm3, %xmm0
 ; AVX512-NEXT:    retq
-  %t0 = fmul <2 x double> %x0, %x1
-  %t1 = fadd <2 x double> %x2, %t0
-  %t2 = fadd <2 x double> %x3, %t1
+  %t0 = fmul reassoc nsz <2 x double> %x0, %x1
+  %t1 = fadd reassoc nsz <2 x double> %x2, %t0
+  %t2 = fadd reassoc nsz <2 x double> %x3, %t1
   ret <2 x double> %t2
 }
 
@@ -290,9 +290,9 @@ define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4
 ; AVX-NEXT:    vmulps %xmm3, %xmm2, %xmm1
 ; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
-  %t0 = fadd <4 x float> %x0, %x1
-  %t1 = fmul <4 x float> %x2, %t0
-  %t2 = fmul <4 x float> %x3, %t1
+  %t0 = fadd reassoc nsz <4 x float> %x0, %x1
+  %t1 = fmul reassoc nsz <4 x float> %x2, %t0
+  %t2 = fmul reassoc nsz <4 x float> %x3, %t1
   ret <4 x float> %t2
 }
 
@@ -312,9 +312,9 @@ define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1,
 ; AVX-NEXT:    vmulpd %xmm3, %xmm2, %xmm1
 ; AVX-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
-  %t0 = fadd <2 x double> %x0, %x1
-  %t1 = fmul <2 x double> %x2, %t0
-  %t2 = fmul <2 x double> %x3, %t1
+  %t0 = fadd reassoc nsz <2 x double> %x0, %x1
+  %t1 = fmul reassoc nsz <2 x double> %x2, %t0
+  %t2 = fmul reassoc nsz <2 x double> %x3, %t1
   ret <2 x double> %t2
 }
 
@@ -343,9 +343,9 @@ define <8 x float> @reassociate_adds_v8f32(<8 x float> %x0, <8 x float> %x1, <8
 ; AVX512-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
 ; AVX512-NEXT:    vaddps %ymm0, %ymm3, %ymm0
 ; AVX512-NEXT:    retq
-  %t0 = fmul <8 x float> %x0, %x1
-  %t1 = fadd <8 x float> %x2, %t0
-  %t2 = fadd <8 x float> %x3, %t1
+  %t0 = fmul reassoc nsz <8 x float> %x0, %x1
+  %t1 = fadd reassoc nsz <8 x float> %x2, %t0
+  %t2 = fadd reassoc nsz <8 x float> %x3, %t1
   ret <8 x float> %t2
 }
 
@@ -374,9 +374,9 @@ define <4 x double> @reassociate_adds_v4f64(<4 x double> %x0, <4 x double> %x1,
 ; AVX512-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
 ; AVX512-NEXT:    vaddpd %ymm0, %ymm3, %ymm0
 ; AVX512-NEXT:    retq
-  %t0 = fmul <4 x double> %x0, %x1
-  %t1 = fadd <4 x double> %x2, %t0
-  %t2 = fadd <4 x double> %x3, %t1
+  %t0 = fmul reassoc nsz <4 x double> %x0, %x1
+  %t1 = fadd reassoc nsz <4 x double> %x2, %t0
+  %t2 = fadd reassoc nsz <4 x double> %x3, %t1
   ret <4 x double> %t2
 }
 
@@ -399,9 +399,9 @@ define <8 x float> @reassociate_muls_v8f32(<8 x float> %x0, <8 x float> %x1, <8
 ; AVX-NEXT:    vmulps %ymm3, %ymm2, %ymm1
 ; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm0
 ; AVX-NEXT:    retq
-  %t0 = fadd <8 x float> %x0, %x1
-  %t1 = fmul <8 x float> %x2, %t0
-  %t2 = fmul <8 x float> %x3, %t1
+  %t0 = fadd reassoc nsz <8 x float> %x0, %x1
+  %t1 = fmul reassoc nsz <8 x float> %x2, %t0
+  %t2 = fmul reassoc nsz <8 x float> %x3, %t1
   ret <8 x float> %t2
 }
 
@@ -424,9 +424,9 @@ define <4 x double> @reassociate_muls_v4f64(<4 x double> %x0, <4 x double> %x1,
 ; AVX-NEXT:    vmulpd %ymm3, %ymm2, %ymm1
 ; AVX-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
 ; AVX-NEXT:    retq
-  %t0 = fadd <4 x double> %x0, %x1
-  %t1 = fmul <4 x double> %x2, %t0
-  %t2 = fmul <4 x double> %x3, %t1
+  %t0 = fadd reassoc nsz <4 x double> %x0, %x1
+  %t1 = fmul reassoc nsz <4 x double> %x2, %t0
+  %t2 = fmul reassoc nsz  <4 x double> %x3, %t1
   ret <4 x double> %t2
 }
 
@@ -464,9 +464,9 @@ define <16 x float> @reassociate_adds_v16f32(<16 x float> %x0, <16 x float> %x1,
 ; AVX512-NEXT:    vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
 ; AVX512-NEXT:    vaddps %zmm0, %zmm3, %zmm0
 ; AVX512-NEXT:    retq
-  %t0 = fmul <16 x float> %x0, %x1
-  %t1 = fadd <16 x float> %x2, %t0
-  %t2 = fadd <16 x float> %x3, %t1
+  %t0 = fmul reassoc nsz <16 x float> %x0, %x1
+  %t1 = fadd reassoc nsz <16 x float> %x2, %t0
+  %t2 = fadd reassoc nsz <16 x float> %x3, %t1
   ret <16 x float> %t2
 }
 
@@ -504,9 +504,9 @@ define <8 x double> @reassociate_adds_v8f64(<8 x double> %x0, <8 x double> %x1,
 ; AVX512-NEXT:    vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
 ; AVX512-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
 ; AVX512-NEXT:    retq
-  %t0 = fmul <8 x double> %x0, %x1
-  %t1 = fadd <8 x double> %x2, %t0
-  %t2 = fadd <8 x double> %x3, %t1
+  %t0 = fmul reassoc nsz <8 x double> %x0, %x1
+  %t1 = fadd reassoc nsz <8 x double> %x2, %t0
+  %t2 = fadd reassoc nsz <8 x double> %x3, %t1
   ret <8 x double> %t2
 }
 
@@ -545,9 +545,9 @@ define <16 x float> @reassociate_muls_v16f32(<16 x float> %x0, <16 x float> %x1,
 ; AVX512-NEXT:    vmulps %zmm3, %zmm2, %zmm1
 ; AVX512-NEXT:    vmulps %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
-  %t0 = fadd <16 x float> %x0, %x1
-  %t1 = fmul <16 x float> %x2, %t0
-  %t2 = fmul <16 x float> %x3, %t1
+  %t0 = fadd reassoc nsz <16 x float> %x0, %x1
+  %t1 = fmul reassoc nsz <16 x float> %x2, %t0
+  %t2 = fmul reassoc nsz <16 x float> %x3, %t1
   ret <16 x float> %t2
 }
 
@@ -586,9 +586,9 @@ define <8 x double> @reassociate_muls_v8f64(<8 x double> %x0, <8 x double> %x1,
 ; AVX512-NEXT:    vmulpd %zmm3, %zmm2, %zmm1
 ; AVX512-NEXT:    vmulpd %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
-  %t0 = fadd <8 x double> %x0, %x1
-  %t1 = fmul <8 x double> %x2, %t0
-  %t2 = fmul <8 x double> %x3, %t1
+  %t0 = fadd reassoc nsz <8 x double> %x0, %x1
+  %t1 = fmul reassoc nsz <8 x double> %x2, %t0
+  %t2 = fmul reassoc nsz <8 x double> %x3, %t1
   ret <8 x double> %t2
 }
 
@@ -1114,9 +1114,9 @@ define double @reassociate_adds_from_calls() {
   %x1 = call double @bar()
   %x2 = call double @bar()
   %x3 = call double @bar()
-  %t0 = fadd double %x0, %x1
-  %t1 = fadd double %t0, %x2
-  %t2 = fadd double %t1, %x3
+  %t0 = fadd reassoc nsz double %x0, %x1
+  %t1 = fadd reassoc nsz double %t0, %x2
+  %t2 = fadd reassoc nsz double %t1, %x3
   ret double %t2
 }
 
@@ -1165,9 +1165,9 @@ define double @already_reassociated() {
   %x1 = call double @bar()
   %x2 = call double @bar()
   %x3 = call double @bar()
-  %t0 = fadd double %x0, %x1
-  %t1 = fadd double %x2, %x3
-  %t2 = fadd double %t0, %t1
+  %t0 = fadd reassoc nsz double %x0, %x1
+  %t1 = fadd reassoc nsz double %x2, %x3
+  %t2 = fadd reassoc nsz double %t0, %t1
   ret double %t2
 }
 
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s
 
 declare float @llvm.pow.f32(float, float)
 declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>)