Skip to content

Commit b80e292

Browse files
committed
DAGCombine lastb + csel into clastb
1 parent 30222c6 commit b80e292

File tree

2 files changed

+55
-25
lines changed

2 files changed

+55
-25
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24416,6 +24416,50 @@ static SDValue foldCSELOfCSEL(SDNode *Op, SelectionDAG &DAG) {
2441624416
return DAG.getNode(AArch64ISD::CSEL, DL, VT, L, R, CCValue, Cond);
2441724417
}
2441824418

24419+
static SDValue foldCSELOfLASTB(SDNode *N, SelectionDAG &DAG) {
24420+
SDValue Op0 = N->getOperand(0);
24421+
SDValue Op1 = N->getOperand(1);
24422+
AArch64CC::CondCode CC =
24423+
static_cast<AArch64CC::CondCode>(N->getConstantOperandVal(2));
24424+
SDValue PTAny = N->getOperand(3);
24425+
24426+
// FIXME: Handle the inverse?
24427+
if (Op0.getOpcode() != AArch64ISD::LASTB)
24428+
return SDValue();
24429+
24430+
if (PTAny.getOpcode() != AArch64ISD::PTEST_ANY)
24431+
return SDValue();
24432+
24433+
// Get the predicate...
24434+
SDValue LBPred = Op0.getOperand(0);
24435+
24436+
// Look through reinterprets...
24437+
SDValue PTestPG = PTAny.getOperand(0);
24438+
if (PTestPG.getOpcode() == AArch64ISD::REINTERPRET_CAST)
24439+
PTestPG = PTestPG.getOperand(0);
24440+
24441+
SDValue PTestOp = PTAny.getOperand(1);
24442+
if (PTestOp.getOpcode() == AArch64ISD::REINTERPRET_CAST)
24443+
PTestOp = PTestOp.getOperand(0);
24444+
24445+
// And compare against the csel cmp.
24446+
// Make sure the same predicate is used.
24447+
if (PTestOp != LBPred)
24448+
return SDValue();
24449+
24450+
// Make sure that PG for the test is either the same as the input or
24451+
// an explicit ptrue.
24452+
// FIXME:... look for ptrue_all instead of just ptrue...
24453+
if (PTestPG != LBPred && PTestPG.getOpcode() != AArch64ISD::PTRUE)
24454+
return SDValue();
24455+
24456+
if (CC != AArch64CC::NE)
24457+
return SDValue();
24458+
24459+
return DAG.getNode(AArch64ISD::CLASTB_N, SDLoc(N), N->getValueType(0),
24460+
LBPred, Op1, Op0.getOperand(1));
24461+
}
24462+
2441924463
// Optimize CSEL instructions
2442024464
static SDValue performCSELCombine(SDNode *N,
2442124465
TargetLowering::DAGCombinerInfo &DCI,
@@ -24432,6 +24476,9 @@ static SDValue performCSELCombine(SDNode *N,
2443224476
if (SDValue Folded = foldCSELofCTTZ(N, DAG))
2443324477
return Folded;
2443424478

24479+
if (SDValue CLastB = foldCSELOfLASTB(N, DAG))
24480+
return CLastB;
24481+
2443524482
return performCONDCombine(N, DCI, DAG, 2, 3);
2443624483
}
2443724484

llvm/test/CodeGen/AArch64/sve-clastb.ll

Lines changed: 8 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,7 @@
44
define i8 @clastb_i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pg, i8 %existing) {
55
; CHECK-LABEL: clastb_i8:
66
; CHECK: // %bb.0:
7-
; CHECK-NEXT: lastb w8, p0, z0.b
8-
; CHECK-NEXT: ptest p0, p0.b
9-
; CHECK-NEXT: csel w0, w8, w0, ne
7+
; CHECK-NEXT: clastb w0, p0, w0, z0.b
108
; CHECK-NEXT: ret
119
%rev.pg = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> %pg)
1210
%tz.cnt = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %rev.pg, i1 false)
@@ -23,10 +21,7 @@ define i8 @clastb_i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pg, i8 %exist
2321
define i16 @clastb_i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pg, i16 %existing) {
2422
; CHECK-LABEL: clastb_i16:
2523
; CHECK: // %bb.0:
26-
; CHECK-NEXT: lastb w8, p0, z0.h
27-
; CHECK-NEXT: ptrue p1.h
28-
; CHECK-NEXT: ptest p1, p0.b
29-
; CHECK-NEXT: csel w0, w8, w0, ne
24+
; CHECK-NEXT: clastb w0, p0, w0, z0.h
3025
; CHECK-NEXT: ret
3126
%rev.pg = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> %pg)
3227
%tz.cnt = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> %rev.pg, i1 false)
@@ -43,10 +38,7 @@ define i16 @clastb_i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pg, i16 %exi
4338
define i32 @clastb_i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i32 %existing) {
4439
; CHECK-LABEL: clastb_i32:
4540
; CHECK: // %bb.0:
46-
; CHECK-NEXT: lastb w8, p0, z0.s
47-
; CHECK-NEXT: ptrue p1.s
48-
; CHECK-NEXT: ptest p1, p0.b
49-
; CHECK-NEXT: csel w0, w8, w0, ne
41+
; CHECK-NEXT: clastb w0, p0, w0, z0.s
5042
; CHECK-NEXT: ret
5143
%rev.pg = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> %pg)
5244
%tz.cnt = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> %rev.pg, i1 false)
@@ -63,10 +55,7 @@ define i32 @clastb_i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i32 %exi
6355
define i64 @clastb_i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64 %existing) {
6456
; CHECK-LABEL: clastb_i64:
6557
; CHECK: // %bb.0:
66-
; CHECK-NEXT: lastb x8, p0, z0.d
67-
; CHECK-NEXT: ptrue p1.d
68-
; CHECK-NEXT: ptest p1, p0.b
69-
; CHECK-NEXT: csel x0, x8, x0, ne
58+
; CHECK-NEXT: clastb x0, p0, x0, z0.d
7059
; CHECK-NEXT: ret
7160
%rev.pg = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> %pg)
7261
%tz.cnt = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> %rev.pg, i1 false)
@@ -80,13 +69,10 @@ define i64 @clastb_i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64 %exi
8069
ret i64 %res
8170
}
8271

83-
define float @clastb_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, float %existing) {
72+
define float @clastb_float(float %existing, <vscale x 4 x float> %data, <vscale x 4 x i1> %pg) {
8473
; CHECK-LABEL: clastb_float:
8574
; CHECK: // %bb.0:
86-
; CHECK-NEXT: lastb s0, p0, z0.s
87-
; CHECK-NEXT: ptrue p1.s
88-
; CHECK-NEXT: ptest p1, p0.b
89-
; CHECK-NEXT: fcsel s0, s0, s1, ne
75+
; CHECK-NEXT: clastb s0, p0, s0, z1.s
9076
; CHECK-NEXT: ret
9177
%rev.pg = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> %pg)
9278
%tz.cnt = call i32 @llvm.experimental.cttz.elts.float.nxv4i1(<vscale x 4 x i1> %rev.pg, i1 false)
@@ -100,13 +86,10 @@ define float @clastb_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, fl
10086
ret float %res
10187
}
10288

103-
define double @clastb_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, double %existing) {
89+
define double @clastb_double(double %existing, <vscale x 2 x double> %data, <vscale x 2 x i1> %pg) {
10490
; CHECK-LABEL: clastb_double:
10591
; CHECK: // %bb.0:
106-
; CHECK-NEXT: lastb d0, p0, z0.d
107-
; CHECK-NEXT: ptrue p1.d
108-
; CHECK-NEXT: ptest p1, p0.b
109-
; CHECK-NEXT: fcsel d0, d0, d1, ne
92+
; CHECK-NEXT: clastb d0, p0, d0, z1.d
11093
; CHECK-NEXT: ret
11194
%rev.pg = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> %pg)
11295
%tz.cnt = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> %rev.pg, i1 false)

0 commit comments

Comments
 (0)