@@ -58352,14 +58352,17 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
58352
58352
break;
58353
58353
case X86ISD::PCMPEQ:
58354
58354
case X86ISD::PCMPGT:
58355
- if (!IsSplat && VT.is256BitVector() &&
58356
- (Subtarget.hasInt256() || VT == MVT::v8i32) &&
58357
- (IsConcatFree(VT, Ops, 0) || IsConcatFree (VT, Ops, 1))) {
58358
- if (Subtarget.hasInt256() )
58355
+ if (!IsSplat && VT.is256BitVector() && Subtarget.hasInt256()) {
58356
+ SDValue Concat0 = CombineSubOperand(VT, Ops, 0);
58357
+ SDValue Concat1 = CombineSubOperand (VT, Ops, 1);
58358
+ if (Concat0 || Concat1 )
58359
58359
return DAG.getNode(Op0.getOpcode(), DL, VT,
58360
- ConcatSubOperand(VT, Ops, 0),
58361
- ConcatSubOperand(VT, Ops, 1));
58360
+ Concat0 ? Concat0 : ConcatSubOperand(VT, Ops, 0),
58361
+ Concat1 ? Concat1 : ConcatSubOperand(VT, Ops, 1));
58362
+ break;
58363
+ }
58362
58364
58365
+ if (!IsSplat && VT == MVT::v8i32) {
58363
58366
// Without AVX2, see if we can cast the values to v8f32 and use fcmp.
58364
58367
// TODO: Handle v4f64 as well?
58365
58368
unsigned MaxSigBitsLHS = 0, MaxSigBitsRHS = 0;
@@ -58384,8 +58387,10 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
58384
58387
58385
58388
if (std::optional<unsigned> CastOpc =
58386
58389
CastIntSETCCtoFP(FpVT, ICC, MaxSigBitsLHS, MaxSigBitsRHS)) {
58387
- SDValue LHS = ConcatSubOperand(VT, Ops, 0);
58388
- SDValue RHS = ConcatSubOperand(VT, Ops, 1);
58390
+ SDValue LHS = CombineSubOperand(VT, Ops, 0);
58391
+ SDValue RHS = CombineSubOperand(VT, Ops, 1);
58392
+ LHS = LHS ? LHS : ConcatSubOperand(VT, Ops, 0);
58393
+ RHS = RHS ? RHS : ConcatSubOperand(VT, Ops, 1);
58389
58394
LHS = DAG.getNode(*CastOpc, DL, FpVT, LHS);
58390
58395
RHS = DAG.getNode(*CastOpc, DL, FpVT, RHS);
58391
58396
0 commit comments