Skip to content

[VectorCombine][X86] Use updated getVectorInstrCost hook #137823

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 35 additions & 25 deletions llvm/lib/Transforms/Vectorize/VectorCombine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/TargetTransformInfo.h"
Expand Down Expand Up @@ -1080,14 +1081,6 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
VecTy1->getElementCount().getKnownMinValue() <= Index1)
return false;

// Bail for single insertion if it is a load.
// TODO: Handle this once getVectorInstrCost can cost for load/stores.
auto *I0 = dyn_cast_or_null<Instruction>(V0);
auto *I1 = dyn_cast_or_null<Instruction>(V1);
if ((IsConst0 && I1 && I1->mayReadFromMemory()) ||
(IsConst1 && I0 && I0->mayReadFromMemory()))
return false;

uint64_t Index = IsConst0 ? Index1 : Index0;
Type *ScalarTy = IsConst0 ? V1->getType() : V0->getType();
Type *VecTy = I.getType();
Expand Down Expand Up @@ -1120,16 +1113,31 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
VectorOpCost = TTI.getIntrinsicInstrCost(VectorICA, CostKind);
}

// Fold the vector constants in the original vectors into a new base vector to
// get more accurate cost modelling.
Value *NewVecC;
if (isa<CmpInst>(I))
NewVecC = ConstantFoldCompareInstOperands(Pred, VecC0, VecC1, *DL);
else if (isa<BinaryOperator>(I))
NewVecC = ConstantFoldBinaryOpOperands((Instruction::BinaryOps)Opcode,
VecC0, VecC1, *DL);
else
NewVecC = ConstantFoldBinaryIntrinsic(
cast<IntrinsicInst>(I).getIntrinsicID(), VecC0, VecC1, I.getType(), &I);

// Get cost estimate for the insert element. This cost will factor into
// both sequences.
InstructionCost InsertCost = TTI.getVectorInstrCost(
Instruction::InsertElement, VecTy, CostKind, Index);
InstructionCost OldCost =
(IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
InstructionCost NewCost = ScalarOpCost + InsertCost +
(IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) +
(IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost);

InstructionCost InsertCostNewVecC = TTI.getVectorInstrCost(
Instruction::InsertElement, VecTy, CostKind, Index, NewVecC);
InstructionCost InsertCostV0 = TTI.getVectorInstrCost(
Instruction::InsertElement, VecTy, CostKind, Index, VecC0, V0);
InstructionCost InsertCostV1 = TTI.getVectorInstrCost(
Instruction::InsertElement, VecTy, CostKind, Index, VecC1, V1);
InstructionCost OldCost = (IsConst0 ? 0 : InsertCostV0) +
(IsConst1 ? 0 : InsertCostV1) + VectorOpCost;
InstructionCost NewCost = ScalarOpCost + InsertCostNewVecC +
(IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCostV0) +
(IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCostV1);
// We want to scalarize unless the vector variant actually has lower cost.
if (OldCost < NewCost || !NewCost.isValid())
return false;
Expand Down Expand Up @@ -1165,15 +1173,17 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
if (auto *ScalarInst = dyn_cast<Instruction>(Scalar))
ScalarInst->copyIRFlags(&I);

// Fold the vector constants in the original vectors into a new base vector.
Value *NewVecC;
if (isa<CmpInst>(I))
NewVecC = Builder.CreateCmp(Pred, VecC0, VecC1);
else if (isa<BinaryOperator>(I))
NewVecC = Builder.CreateBinOp((Instruction::BinaryOps)Opcode, VecC0, VecC1);
else
NewVecC = Builder.CreateIntrinsic(
VecTy, cast<IntrinsicInst>(I).getIntrinsicID(), {VecC0, VecC1});
// Create a new base vector if the constant folding failed.
if (!NewVecC) {
if (isa<CmpInst>(I))
NewVecC = Builder.CreateCmp(Pred, VecC0, VecC1);
else if (isa<BinaryOperator>(I))
NewVecC =
Builder.CreateBinOp((Instruction::BinaryOps)Opcode, VecC0, VecC1);
else
NewVecC = Builder.CreateIntrinsic(
VecTy, cast<IntrinsicInst>(I).getIntrinsicID(), {VecC0, VecC1});
}
Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, Index);
replaceValue(I, *Insert);
return true;
Expand Down
16 changes: 16 additions & 0 deletions llvm/test/Transforms/VectorCombine/RISCV/binop-scalarize.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -S -p vector-combine -mtriple=riscv64 -mattr=+v | FileCheck %s

define <4 x i32> @add_constant_load(ptr %p) {
; CHECK-LABEL: define <4 x i32> @add_constant_load(
; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[P]], align 4
; CHECK-NEXT: [[V_SCALAR:%.*]] = add i32 [[X]], 42
; CHECK-NEXT: [[V:%.*]] = insertelement <4 x i32> poison, i32 [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <4 x i32> [[V]]
;
%x = load i32, ptr %p
%ins = insertelement <4 x i32> poison, i32 %x, i32 0
%v = add <4 x i32> %ins, splat (i32 42)
ret <4 x i32> %v
}
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,8 @@ define <2 x i64> @shl_constant_op0_not_undef_lane(i64 %x) {
define <2 x i64> @shl_constant_op0_load(ptr %p) {
; CHECK-LABEL: @shl_constant_op0_load(
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[LD]], i32 1
; CHECK-NEXT: [[BO:%.*]] = shl <2 x i64> <i64 undef, i64 2>, [[INS]]
; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl i64 2, [[LD]]
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 1
; CHECK-NEXT: ret <2 x i64> [[BO]]
;
%ld = load i64, ptr %p
Expand Down Expand Up @@ -204,8 +204,8 @@ define <2 x i64> @shl_constant_op1_not_undef_lane(i64 %x) {
define <2 x i64> @shl_constant_op1_load(ptr %p) {
; CHECK-LABEL: @shl_constant_op1_load(
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[LD]], i32 0
; CHECK-NEXT: [[BO:%.*]] = shl nuw <2 x i64> [[INS]], <i64 5, i64 2>
; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl nuw i64 [[LD]], 5
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
; CHECK-NEXT: ret <2 x i64> [[BO]]
;
%ld = load i64, ptr %p
Expand Down
102 changes: 66 additions & 36 deletions llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,8 @@ define <2 x i64> @shl_constant_op0_not_undef_lane(i64 %x) {
define <2 x i64> @shl_constant_op0_load(ptr %p) {
; CHECK-LABEL: @shl_constant_op0_load(
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[LD]], i32 1
; CHECK-NEXT: [[BO:%.*]] = shl <2 x i64> <i64 undef, i64 2>, [[INS]]
; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl i64 2, [[LD]]
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 1
; CHECK-NEXT: ret <2 x i64> [[BO]]
;
%ld = load i64, ptr %p
Expand Down Expand Up @@ -204,8 +204,8 @@ define <2 x i64> @shl_constant_op1_not_undef_lane(i64 %x) {
define <2 x i64> @shl_constant_op1_load(ptr %p) {
; CHECK-LABEL: @shl_constant_op1_load(
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[LD]], i32 0
; CHECK-NEXT: [[BO:%.*]] = shl nuw <2 x i64> [[INS]], <i64 5, i64 2>
; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl nuw i64 [[LD]], 5
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 0
; CHECK-NEXT: ret <2 x i64> [[BO]]
;
%ld = load i64, ptr %p
Expand Down Expand Up @@ -479,21 +479,31 @@ define <2 x i64> @sdiv_constant_op1_not_undef_lane(i64 %x) {
}

define <2 x i64> @and_constant(i64 %x) {
; CHECK-LABEL: @and_constant(
; CHECK-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> <i64 0, i64 undef>, i64 [[BO_SCALAR]], i64 0
; CHECK-NEXT: ret <2 x i64> [[BO]]
; SSE-LABEL: @and_constant(
; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
; SSE-NEXT: [[BO:%.*]] = and <2 x i64> [[INS]], <i64 42, i64 undef>
; SSE-NEXT: ret <2 x i64> [[BO]]
;
; AVX-LABEL: @and_constant(
; AVX-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> <i64 0, i64 undef>, i64 [[BO_SCALAR]], i64 0
; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
%bo = and <2 x i64> %ins, <i64 42, i64 undef>
ret <2 x i64> %bo
}

define <2 x i64> @and_constant_not_undef_lane(i64 %x) {
; CHECK-LABEL: @and_constant_not_undef_lane(
; CHECK-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 0
; CHECK-NEXT: ret <2 x i64> [[BO]]
; SSE-LABEL: @and_constant_not_undef_lane(
; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
; SSE-NEXT: [[BO:%.*]] = and <2 x i64> [[INS]], <i64 42, i64 -42>
; SSE-NEXT: ret <2 x i64> [[BO]]
;
; AVX-LABEL: @and_constant_not_undef_lane(
; AVX-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 0
; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
%bo = and <2 x i64> %ins, <i64 42, i64 -42>
Expand Down Expand Up @@ -523,10 +533,15 @@ define <2 x i64> @or_constant_not_undef_lane(i64 %x) {
}

define <2 x i64> @xor_constant(i64 %x) {
; CHECK-LABEL: @xor_constant(
; CHECK-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[BO_SCALAR]], i64 0
; CHECK-NEXT: ret <2 x i64> [[BO]]
; SSE-LABEL: @xor_constant(
; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
; SSE-NEXT: [[BO:%.*]] = xor <2 x i64> [[INS]], <i64 42, i64 undef>
; SSE-NEXT: ret <2 x i64> [[BO]]
;
; AVX-LABEL: @xor_constant(
; AVX-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[BO_SCALAR]], i64 0
; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
%bo = xor <2 x i64> %ins, <i64 42, i64 undef>
Expand All @@ -546,8 +561,8 @@ define <2 x i64> @xor_constant_not_undef_lane(i64 %x) {

define <2 x double> @fadd_constant(double %x) {
; CHECK-LABEL: @fadd_constant(
; CHECK-NEXT: [[BO_SCALAR:%.*]] = fadd double [[X:%.*]], 4.200000e+01
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double undef>, double [[BO_SCALAR]], i64 0
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = fadd <2 x double> [[INS]], <double 4.200000e+01, double undef>
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the reasoning behind this no longer being scalarized is that at SSE2 and above double and <2 x double> have the same fadd cost:

; SSE2-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F64 = fadd double undef, undef
; SSE2-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fadd <2 x double> undef, undef

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Leaving a note here that this still generates the same code ultimately because DAGCombine will scalarize it either way

; CHECK-NEXT: ret <2 x double> [[BO]]
;
%ins = insertelement <2 x double> undef, double %x, i32 0
Expand All @@ -568,8 +583,8 @@ define <2 x double> @fadd_constant_not_undef_lane(double %x) {

define <2 x double> @fsub_constant_op0(double %x) {
; CHECK-LABEL: @fsub_constant_op0(
; CHECK-NEXT: [[BO_SCALAR:%.*]] = fsub fast double 4.200000e+01, [[X:%.*]]
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double undef>, double [[BO_SCALAR]], i64 0
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = fsub fast <2 x double> <double 4.200000e+01, double undef>, [[INS]]
; CHECK-NEXT: ret <2 x double> [[BO]]
;
%ins = insertelement <2 x double> undef, double %x, i32 0
Expand Down Expand Up @@ -601,8 +616,8 @@ define <2 x double> @fsub_constant_op1(double %x) {

define <2 x double> @fsub_constant_op1_not_undef_lane(double %x) {
; CHECK-LABEL: @fsub_constant_op1_not_undef_lane(
; CHECK-NEXT: [[BO_SCALAR:%.*]] = fsub double [[X:%.*]], 4.200000e+01
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = fsub <2 x double> [[INS]], <double 4.200000e+01, double -4.200000e+01>
; CHECK-NEXT: ret <2 x double> [[BO]]
;
%ins = insertelement <2 x double> undef, double %x, i32 0
Expand All @@ -612,8 +627,8 @@ define <2 x double> @fsub_constant_op1_not_undef_lane(double %x) {

define <2 x double> @fmul_constant(double %x) {
; CHECK-LABEL: @fmul_constant(
; CHECK-NEXT: [[BO_SCALAR:%.*]] = fmul reassoc double [[X:%.*]], 4.200000e+01
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double undef>, double [[BO_SCALAR]], i64 0
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
; CHECK-NEXT: [[BO:%.*]] = fmul reassoc <2 x double> [[INS]], <double 4.200000e+01, double undef>
; CHECK-NEXT: ret <2 x double> [[BO]]
;
%ins = insertelement <2 x double> undef, double %x, i32 0
Expand Down Expand Up @@ -644,32 +659,47 @@ define <2 x double> @fdiv_constant_op0(double %x) {
}

define <2 x double> @fdiv_constant_op0_not_undef_lane(double %x) {
; CHECK-LABEL: @fdiv_constant_op0_not_undef_lane(
; CHECK-NEXT: [[BO_SCALAR:%.*]] = fdiv ninf double 4.200000e+01, [[X:%.*]]
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0
; CHECK-NEXT: ret <2 x double> [[BO]]
; SSE-LABEL: @fdiv_constant_op0_not_undef_lane(
; SSE-NEXT: [[BO_SCALAR:%.*]] = fdiv ninf double 4.200000e+01, [[X:%.*]]
; SSE-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0
; SSE-NEXT: ret <2 x double> [[BO]]
;
; AVX-LABEL: @fdiv_constant_op0_not_undef_lane(
; AVX-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
; AVX-NEXT: [[BO:%.*]] = fdiv ninf <2 x double> <double 4.200000e+01, double -4.200000e+01>, [[INS]]
; AVX-NEXT: ret <2 x double> [[BO]]
;
%ins = insertelement <2 x double> undef, double %x, i32 0
%bo = fdiv ninf <2 x double> <double 42.0, double -42.0>, %ins
ret <2 x double> %bo
}

define <2 x double> @fdiv_constant_op1(double %x) {
; CHECK-LABEL: @fdiv_constant_op1(
; CHECK-NEXT: [[BO_SCALAR:%.*]] = fdiv double [[X:%.*]], 4.200000e+01
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double undef>, double [[BO_SCALAR]], i64 0
; CHECK-NEXT: ret <2 x double> [[BO]]
; SSE-LABEL: @fdiv_constant_op1(
; SSE-NEXT: [[BO_SCALAR:%.*]] = fdiv double [[X:%.*]], 4.200000e+01
; SSE-NEXT: [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double undef>, double [[BO_SCALAR]], i64 0
; SSE-NEXT: ret <2 x double> [[BO]]
;
; AVX-LABEL: @fdiv_constant_op1(
; AVX-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
; AVX-NEXT: [[BO:%.*]] = fdiv <2 x double> [[INS]], <double 4.200000e+01, double undef>
; AVX-NEXT: ret <2 x double> [[BO]]
;
%ins = insertelement <2 x double> undef, double %x, i32 0
%bo = fdiv <2 x double> %ins, <double 42.0, double undef>
ret <2 x double> %bo
}

define <2 x double> @fdiv_constant_op1_not_undef_lane(double %x) {
; CHECK-LABEL: @fdiv_constant_op1_not_undef_lane(
; CHECK-NEXT: [[BO_SCALAR:%.*]] = fdiv double [[X:%.*]], 4.200000e+01
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0
; CHECK-NEXT: ret <2 x double> [[BO]]
; SSE-LABEL: @fdiv_constant_op1_not_undef_lane(
; SSE-NEXT: [[BO_SCALAR:%.*]] = fdiv double [[X:%.*]], 4.200000e+01
; SSE-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0
; SSE-NEXT: ret <2 x double> [[BO]]
;
; AVX-LABEL: @fdiv_constant_op1_not_undef_lane(
; AVX-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
; AVX-NEXT: [[BO:%.*]] = fdiv <2 x double> [[INS]], <double 4.200000e+01, double -4.200000e+01>
; AVX-NEXT: ret <2 x double> [[BO]]
;
%ins = insertelement <2 x double> undef, double %x, i32 0
%bo = fdiv <2 x double> %ins, <double 42.0, double -42.0>
Expand Down
19 changes: 13 additions & 6 deletions llvm/test/Transforms/VectorCombine/X86/insert-binop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -155,12 +155,19 @@ define <2 x i64> @ins1_ins1_urem(i64 %x, i64 %y) {
; Extra use is accounted for in cost calculation.

define <4 x i32> @ins0_ins0_xor(i32 %x, i32 %y) {
; CHECK-LABEL: @ins0_ins0_xor(
; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
; CHECK-NEXT: call void @use(<4 x i32> [[I0]])
; CHECK-NEXT: [[R_SCALAR:%.*]] = xor i32 [[X]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[R_SCALAR]], i64 0
; CHECK-NEXT: ret <4 x i32> [[R]]
; SSE-LABEL: @ins0_ins0_xor(
; SSE-NEXT: [[I0:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
; SSE-NEXT: call void @use(<4 x i32> [[I0]])
; SSE-NEXT: [[I1:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0
; SSE-NEXT: [[R:%.*]] = xor <4 x i32> [[I0]], [[I1]]
; SSE-NEXT: ret <4 x i32> [[R]]
;
; AVX-LABEL: @ins0_ins0_xor(
; AVX-NEXT: [[I0:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
; AVX-NEXT: call void @use(<4 x i32> [[I0]])
; AVX-NEXT: [[R_SCALAR:%.*]] = xor i32 [[X]], [[Y:%.*]]
; AVX-NEXT: [[R:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[R_SCALAR]], i64 0
; AVX-NEXT: ret <4 x i32> [[R]]
;
%i0 = insertelement <4 x i32> undef, i32 %x, i32 0
call void @use(<4 x i32> %i0)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX

declare void @use(<4 x i32>)
declare void @usef(<4 x float>)
Expand Down Expand Up @@ -165,14 +165,18 @@ define <2 x i1> @constant_op1_i64_not_undef_lane(i64 %x) {
ret <2 x i1> %r
}

; negative test - load prevents the transform

define <2 x i1> @constant_op1_i64_load(ptr %p) {
; CHECK-LABEL: @constant_op1_i64_load(
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[LD]], i32 0
; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i64> [[INS]], <i64 42, i64 -42>
; CHECK-NEXT: ret <2 x i1> [[R]]
; SSE-LABEL: @constant_op1_i64_load(
; SSE-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
; SSE-NEXT: [[R_SCALAR:%.*]] = icmp eq i64 [[LD]], 42
; SSE-NEXT: [[R:%.*]] = insertelement <2 x i1> poison, i1 [[R_SCALAR]], i64 0
; SSE-NEXT: ret <2 x i1> [[R]]
;
; AVX-LABEL: @constant_op1_i64_load(
; AVX-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
; AVX-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[LD]], i32 0
; AVX-NEXT: [[R:%.*]] = icmp eq <2 x i64> [[INS]], <i64 42, i64 -42>
; AVX-NEXT: ret <2 x i1> [[R]]
;
%ld = load i64, ptr %p
%ins = insertelement <2 x i64> poison, i64 %ld, i32 0
Expand Down
Loading
Loading