Skip to content

Commit 0f13170

Browse files
[SPIR-V] Implement intrinsics llvm.scmp.* and llvm.ucmp.* (#117341)
This PR add translation of intrinsics `llvm.scmp.*` and `llvm.ucmp.*`.
1 parent 243c979 commit 0f13170

File tree

4 files changed

+370
-3
lines changed

4 files changed

+370
-3
lines changed

llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp

Lines changed: 73 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,9 @@ class SPIRVInstructionSelector : public InstructionSelector {
210210
bool selectTrunc(Register ResVReg, const SPIRVType *ResType,
211211
MachineInstr &I) const;
212212

213+
bool selectSUCmp(Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
214+
bool IsSigned) const;
215+
213216
bool selectIntToBool(Register IntReg, Register ResVReg, MachineInstr &I,
214217
const SPIRVType *intTy, const SPIRVType *boolTy) const;
215218

@@ -241,8 +244,10 @@ class SPIRVInstructionSelector : public InstructionSelector {
241244
bool selectPhi(Register ResVReg, const SPIRVType *ResType,
242245
MachineInstr &I) const;
243246

244-
bool selectExtInst(Register ResVReg, const SPIRVType *RestType,
245-
MachineInstr &I, GL::GLSLExtInst GLInst) const;
247+
[[maybe_unused]] bool selectExtInst(Register ResVReg,
248+
const SPIRVType *RestType,
249+
MachineInstr &I,
250+
GL::GLSLExtInst GLInst) const;
246251
bool selectExtInst(Register ResVReg, const SPIRVType *ResType,
247252
MachineInstr &I, CL::OpenCLExtInst CLInst) const;
248253
bool selectExtInst(Register ResVReg, const SPIRVType *ResType,
@@ -533,6 +538,11 @@ bool SPIRVInstructionSelector::spvSelect(Register ResVReg,
533538
case TargetOpcode::G_UMAX:
534539
return selectExtInst(ResVReg, ResType, I, CL::u_max, GL::UMax);
535540

541+
case TargetOpcode::G_SCMP:
542+
return selectSUCmp(ResVReg, ResType, I, true);
543+
case TargetOpcode::G_UCMP:
544+
return selectSUCmp(ResVReg, ResType, I, false);
545+
536546
case TargetOpcode::G_FMA:
537547
return selectExtInst(ResVReg, ResType, I, CL::fma, GL::Fma);
538548

@@ -2398,6 +2408,67 @@ bool SPIRVInstructionSelector::selectExt(Register ResVReg,
23982408
return selectUnOp(ResVReg, ResType, I, Opcode);
23992409
}
24002410

2411+
bool SPIRVInstructionSelector::selectSUCmp(Register ResVReg,
2412+
const SPIRVType *ResType,
2413+
MachineInstr &I,
2414+
bool IsSigned) const {
2415+
MachineIRBuilder MIRBuilder(I);
2416+
MachineRegisterInfo *MRI = MIRBuilder.getMRI();
2417+
MachineBasicBlock &BB = *I.getParent();
2418+
// Ensure we have bool.
2419+
SPIRVType *BoolType = GR.getOrCreateSPIRVBoolType(I, TII);
2420+
unsigned N = GR.getScalarOrVectorComponentCount(ResType);
2421+
if (N > 1)
2422+
BoolType = GR.getOrCreateSPIRVVectorType(BoolType, N, I, TII);
2423+
Register BoolTypeReg = GR.getSPIRVTypeID(BoolType);
2424+
// Build less-than-equal and less-than.
2425+
// TODO: replace with one-liner createVirtualRegister() from
2426+
// llvm/lib/Target/SPIRV/SPIRVUtils.cpp when PR #116609 is merged.
2427+
Register IsLessEqReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
2428+
MRI->setType(IsLessEqReg, LLT::scalar(64));
2429+
GR.assignSPIRVTypeToVReg(ResType, IsLessEqReg, MIRBuilder.getMF());
2430+
bool Result = BuildMI(BB, I, I.getDebugLoc(),
2431+
TII.get(IsSigned ? SPIRV::OpSLessThanEqual
2432+
: SPIRV::OpULessThanEqual))
2433+
.addDef(IsLessEqReg)
2434+
.addUse(BoolTypeReg)
2435+
.addUse(I.getOperand(1).getReg())
2436+
.addUse(I.getOperand(2).getReg())
2437+
.constrainAllUses(TII, TRI, RBI);
2438+
Register IsLessReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
2439+
MRI->setType(IsLessReg, LLT::scalar(64));
2440+
GR.assignSPIRVTypeToVReg(ResType, IsLessReg, MIRBuilder.getMF());
2441+
Result &= BuildMI(BB, I, I.getDebugLoc(),
2442+
TII.get(IsSigned ? SPIRV::OpSLessThan : SPIRV::OpULessThan))
2443+
.addDef(IsLessReg)
2444+
.addUse(BoolTypeReg)
2445+
.addUse(I.getOperand(1).getReg())
2446+
.addUse(I.getOperand(2).getReg())
2447+
.constrainAllUses(TII, TRI, RBI);
2448+
// Build selects.
2449+
Register ResTypeReg = GR.getSPIRVTypeID(ResType);
2450+
Register NegOneOrZeroReg =
2451+
MRI->createVirtualRegister(GR.getRegClass(ResType));
2452+
MRI->setType(NegOneOrZeroReg, LLT::scalar(64));
2453+
GR.assignSPIRVTypeToVReg(ResType, NegOneOrZeroReg, MIRBuilder.getMF());
2454+
unsigned SelectOpcode =
2455+
N > 1 ? SPIRV::OpSelectVIVCond : SPIRV::OpSelectSISCond;
2456+
Result &= BuildMI(BB, I, I.getDebugLoc(), TII.get(SelectOpcode))
2457+
.addDef(NegOneOrZeroReg)
2458+
.addUse(ResTypeReg)
2459+
.addUse(IsLessReg)
2460+
.addUse(buildOnesVal(true, ResType, I)) // -1
2461+
.addUse(buildZerosVal(ResType, I))
2462+
.constrainAllUses(TII, TRI, RBI);
2463+
return Result & BuildMI(BB, I, I.getDebugLoc(), TII.get(SelectOpcode))
2464+
.addDef(ResVReg)
2465+
.addUse(ResTypeReg)
2466+
.addUse(IsLessEqReg)
2467+
.addUse(NegOneOrZeroReg) // -1 or 0
2468+
.addUse(buildOnesVal(false, ResType, I))
2469+
.constrainAllUses(TII, TRI, RBI);
2470+
}
2471+
24012472
bool SPIRVInstructionSelector::selectIntToBool(Register IntReg,
24022473
Register ResVReg,
24032474
MachineInstr &I,

llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) {
212212

213213
getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX, G_ABS,
214214
G_BITREVERSE, G_SADDSAT, G_UADDSAT, G_SSUBSAT,
215-
G_USUBSAT})
215+
G_USUBSAT, G_SCMP, G_UCMP})
216216
.legalFor(allIntScalarsAndVectors)
217217
.legalIf(extendedScalarsAndVectors);
218218

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s
2+
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}
3+
4+
; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
5+
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
6+
7+
; CHECK-DAG: %[[TypeI8:.*]] = OpTypeInt 8 0
8+
; CHECK-DAG: %[[TypeI16:.*]] = OpTypeInt 16 0
9+
; CHECK-DAG: %[[TypeI32:.*]] = OpTypeInt 32 0
10+
; CHECK-DAG: %[[TypeI64:.*]] = OpTypeInt 64 0
11+
12+
; CHECK-DAG: %[[CmpI64ConstMinusOne:.*]] = OpConstant %[[TypeI64]] 18446744073709551615
13+
14+
; CHECK-DAG: %[[CmpI8ConstOne:.*]] = OpConstant %[[TypeI8]] 1
15+
; CHECK-DAG: %[[CmpI8ConstZero:.*]] = OpConstantNull %[[TypeI8]]
16+
; CHECK-DAG: %[[CmpI8ConstMinusOne:.*]] = OpConstant %[[TypeI8]] 255
17+
18+
; CHECK-DAG: %[[CmpI16ConstOne:.*]] = OpConstant %[[TypeI16]] 1
19+
; CHECK-DAG: %[[CmpI16ConstZero:.*]] = OpConstantNull %[[TypeI16]]
20+
; CHECK-DAG: %[[CmpI16ConstMinusOne:.*]] = OpConstant %[[TypeI16]] 65535
21+
22+
; CHECK-DAG: %[[CmpI32ConstOne:.*]] = OpConstant %[[TypeI32]] 1
23+
; CHECK-DAG: %[[CmpI32ConstZero:.*]] = OpConstantNull %[[TypeI32]]
24+
; CHECK-DAG: %[[CmpI32ConstMinusOne:.*]] = OpConstant %[[TypeI32]] 4294967295
25+
26+
; CHECK-DAG: %[[CmpI64ConstOne:.*]] = OpConstant %[[TypeI64]] 1
27+
; CHECK-DAG: %[[CmpI64ConstZero:.*]] = OpConstantNull %[[TypeI64]]
28+
29+
; CHECK-DAG: %[[TypeBool:.*]] = OpTypeBool
30+
; CHECK-DAG: %[[TypeVBool:.*]] = OpTypeVector %[[TypeBool]] 4
31+
32+
; CHECK-DAG: %[[TypeV4I8:.*]] = OpTypeVector %[[TypeI8]] 4
33+
; CHECK-DAG: %[[TypeV4I16:.*]] = OpTypeVector %[[TypeI16]] 4
34+
; CHECK-DAG: %[[TypeV4I32:.*]] = OpTypeVector %[[TypeI32]] 4
35+
; CHECK-DAG: %[[TypeV4I64:.*]] = OpTypeVector %[[TypeI64]] 4
36+
37+
; CHECK-DAG: %[[V4I8ConstOne:.*]] = OpConstantComposite %[[TypeV4I8]] %[[CmpI8ConstOne]] %[[CmpI8ConstOne]] %[[CmpI8ConstOne]] %[[CmpI8ConstOne]]
38+
; CHECK-DAG: %[[V4I8ConstZero:.*]] = OpConstantNull %[[TypeV4I8]]
39+
; CHECK-DAG: %[[V4I8ConstMinusOne:.*]] = OpConstantComposite %[[TypeV4I8]] %[[CmpI8ConstMinusOne]] %[[CmpI8ConstMinusOne]] %[[CmpI8ConstMinusOne]] %[[CmpI8ConstMinusOne]]
40+
41+
; CHECK-DAG: %[[V4I16ConstOne:.*]] = OpConstantComposite %[[TypeV4I16]] %[[CmpI16ConstOne]] %[[CmpI16ConstOne]] %[[CmpI16ConstOne]] %[[CmpI16ConstOne]]
42+
; CHECK-DAG: %[[V4I16ConstZero:.*]] = OpConstantNull %[[TypeV4I16]]
43+
; CHECK-DAG: %[[V4I16ConstMinusOne:.*]] = OpConstantComposite %[[TypeV4I16]] %[[CmpI16ConstMinusOne]] %[[CmpI16ConstMinusOne]] %[[CmpI16ConstMinusOne]] %[[CmpI16ConstMinusOne]]
44+
45+
; CHECK-DAG: %[[V4I32ConstOne:.*]] = OpConstantComposite %[[TypeV4I32]] %[[CmpI32ConstOne]] %[[CmpI32ConstOne]] %[[CmpI32ConstOne]] %[[CmpI32ConstOne]]
46+
; CHECK-DAG: %[[V4I32ConstZero:.*]] = OpConstantNull %[[TypeV4I32]]
47+
; CHECK-DAG: %[[V4I32ConstMinusOne:.*]] = OpConstantComposite %[[TypeV4I32]] %[[CmpI32ConstMinusOne]] %[[CmpI32ConstMinusOne]] %[[CmpI32ConstMinusOne]] %[[CmpI32ConstMinusOne]]
48+
49+
; CHECK-DAG: %[[V4I64ConstOne:.*]] = OpConstantComposite %[[TypeV4I64]] %[[CmpI64ConstOne]] %[[CmpI64ConstOne]] %[[CmpI64ConstOne]] %[[CmpI64ConstOne]]
50+
; CHECK-DAG: %[[V4I64ConstZero:.*]] = OpConstantNull %[[TypeV4I64]]
51+
; CHECK-DAG: %[[V4I64ConstMinusOne:.*]] = OpConstantComposite %[[TypeV4I64]] %[[CmpI64ConstMinusOne]] %[[CmpI64ConstMinusOne]] %[[CmpI64ConstMinusOne]] %[[CmpI64ConstMinusOne]]
52+
53+
; CHECK: OpFunction
54+
; CHECK: %[[CmpI8R1:.*]] = OpSLessThanEqual %[[TypeBool]] %[[#]] %[[#]]
55+
; CHECK: %[[CmpI8R2:.*]] = OpSLessThan %[[TypeBool]] %[[#]] %[[#]]
56+
; CHECK: %[[SelI8R1:.*]] = OpSelect %[[TypeI8]] %[[CmpI8R2]] %[[CmpI8ConstMinusOne]] %[[CmpI8ConstZero]]
57+
; CHECK: %[[SelI8R2:.*]] = OpSelect %[[TypeI8]] %[[CmpI8R1]] %[[SelI8R1]] %[[CmpI8ConstOne]]
58+
; CHECK: OpReturnValue %[[SelI8R2]]
59+
define range(i8 -1, 2) i8 @test_i8(i8 noundef %0, i8 noundef %1) {
60+
%3 = tail call i8 @llvm.scmp.i8.i8(i8 %0, i8 %1)
61+
ret i8 %3
62+
}
63+
64+
; CHECK: OpFunction
65+
; CHECK: %[[CmpI16R1:.*]] = OpSLessThanEqual %[[TypeBool]] %[[#]] %[[#]]
66+
; CHECK: %[[CmpI16R2:.*]] = OpSLessThan %[[TypeBool]] %[[#]] %[[#]]
67+
; CHECK: %[[SelI16R1:.*]] = OpSelect %[[TypeI16]] %[[CmpI16R2]] %[[CmpI16ConstMinusOne]] %[[CmpI16ConstZero]]
68+
; CHECK: %[[SelI16R2:.*]] = OpSelect %[[TypeI16]] %[[CmpI16R1]] %[[SelI16R1]] %[[CmpI16ConstOne]]
69+
; CHECK: OpReturnValue %[[SelI16R2]]
70+
define range(i16 -1, 2) i16 @test_i16(i16 noundef %0, i16 noundef %1) {
71+
%3 = tail call i16 @llvm.scmp.i16.i16(i16 %0, i16 %1)
72+
ret i16 %3
73+
}
74+
75+
; CHECK: OpFunction
76+
; CHECK: %[[CmpI32R1:.*]] = OpSLessThanEqual %[[TypeBool]] %[[#]] %[[#]]
77+
; CHECK: %[[CmpI32R2:.*]] = OpSLessThan %[[TypeBool]] %[[#]] %[[#]]
78+
; CHECK: %[[SelI32R1:.*]] = OpSelect %[[TypeI32]] %[[CmpI32R2]] %[[CmpI32ConstMinusOne]] %[[CmpI32ConstZero]]
79+
; CHECK: %[[SelI32R2:.*]] = OpSelect %[[TypeI32]] %[[CmpI32R1]] %[[SelI32R1]] %[[CmpI32ConstOne]]
80+
; CHECK: OpReturnValue %[[SelI32R2]]
81+
define range(i32 -1, 2) i32 @test_i32(i32 noundef %0, i32 noundef %1) {
82+
%3 = tail call i32 @llvm.scmp.i32.i32(i32 %0, i32 %1)
83+
ret i32 %3
84+
}
85+
86+
; CHECK: OpFunction
87+
; CHECK: %[[CmpI64R1:.*]] = OpSLessThanEqual %[[TypeBool]] %[[#]] %[[#]]
88+
; CHECK: %[[CmpI64R2:.*]] = OpSLessThan %[[TypeBool]] %[[#]] %[[#]]
89+
; CHECK: %[[SelI64R1:.*]] = OpSelect %[[TypeI64]] %[[CmpI64R2]] %[[CmpI64ConstMinusOne]] %[[CmpI64ConstZero]]
90+
; CHECK: %[[SelI64R2:.*]] = OpSelect %[[TypeI64]] %[[CmpI64R1]] %[[SelI64R1]] %[[CmpI64ConstOne]]
91+
; CHECK: OpReturnValue %[[SelI64R2]]
92+
define range(i64 -1, 2) i64 @test_i64(i64 noundef %0, i64 noundef %1) {
93+
%3 = tail call i64 @llvm.scmp.i64.i64(i64 %0, i64 %1)
94+
ret i64 %3
95+
}
96+
97+
; CHECK: OpFunction
98+
; CHECK: %[[V4I8R1:.*]] = OpSLessThanEqual %[[TypeVBool]] %[[#]] %[[#]]
99+
; CHECK: %[[V4I8R2:.*]] = OpSLessThan %[[TypeVBool]] %[[#]] %[[#]]
100+
; CHECK: %[[SelectV4I8R1:.*]] = OpSelect %[[TypeV4I8]] %[[V4I8R2]] %[[V4I8ConstMinusOne]] %[[V4I8ConstZero]]
101+
; CHECK: %[[SelectV4I8R2:.*]] = OpSelect %[[TypeV4I8]] %[[V4I8R1]] %[[SelectV4I8R1]] %[[V4I8ConstOne]]
102+
; CHECK: OpReturnValue %[[SelectV4I8R2]]
103+
define range(i8 -1, 2) <4 x i8> @test_v4i8(<4 x i8> noundef %0, <4 x i8> noundef %1) {
104+
%3 = tail call <4 x i8> @llvm.scmp.v4i8.v4i8(<4 x i8> %0, <4 x i8> %1)
105+
ret <4 x i8> %3
106+
}
107+
108+
; CHECK: OpFunction
109+
; CHECK: %[[V4I16R1:.*]] = OpSLessThanEqual %[[TypeVBool]] %[[#]] %[[#]]
110+
; CHECK: %[[V4I16R2:.*]] = OpSLessThan %[[TypeVBool]] %[[#]] %[[#]]
111+
; CHECK: %[[SelectV4I16R1:.*]] = OpSelect %[[TypeV4I16]] %[[V4I16R2]] %[[V4I16ConstMinusOne]] %[[V4I16ConstZero]]
112+
; CHECK: %[[SelectV4I16R2:.*]] = OpSelect %[[TypeV4I16]] %[[V4I16R1]] %[[SelectV4I16R1]] %[[V4I16ConstOne]]
113+
; CHECK: OpReturnValue %[[SelectV4I16R2]]
114+
define range(i16 -1, 2) <4 x i16> @test_v4i16(<4 x i16> noundef %0, <4 x i16> noundef %1) {
115+
%3 = tail call <4 x i16> @llvm.scmp.v4i16.v4i16(<4 x i16> %0, <4 x i16> %1)
116+
ret <4 x i16> %3
117+
}
118+
119+
; CHECK: OpFunction
120+
; CHECK: %[[V4I32R1:.*]] = OpSLessThanEqual %[[TypeVBool]] %[[#]] %[[#]]
121+
; CHECK: %[[V4I32R2:.*]] = OpSLessThan %[[TypeVBool]] %[[#]] %[[#]]
122+
; CHECK: %[[SelectV4I32R1:.*]] = OpSelect %[[TypeV4I32]] %[[V4I32R2]] %[[V4I32ConstMinusOne]] %[[V4I32ConstZero]]
123+
; CHECK: %[[SelectV4I32R2:.*]] = OpSelect %[[TypeV4I32]] %[[V4I32R1]] %[[SelectV4I32R1]] %[[V4I32ConstOne]]
124+
; CHECK: OpReturnValue %[[SelectV4I32R2]]
125+
define range(i32 -1, 2) <4 x i32> @test_v4i32(<4 x i32> noundef %0, <4 x i32> noundef %1) {
126+
%3 = tail call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %0, <4 x i32> %1)
127+
ret <4 x i32> %3
128+
}
129+
130+
; CHECK: OpFunction
131+
; CHECK: %[[V4I64R1:.*]] = OpSLessThanEqual %[[TypeVBool]] %[[#]] %[[#]]
132+
; CHECK: %[[V4I64R2:.*]] = OpSLessThan %[[TypeVBool]] %[[#]] %[[#]]
133+
; CHECK: %[[SelectV4I64R1:.*]] = OpSelect %[[TypeV4I64]] %[[V4I64R2]] %[[V4I64ConstMinusOne]] %[[V4I64ConstZero]]
134+
; CHECK: %[[SelectV4I64R2:.*]] = OpSelect %[[TypeV4I64]] %[[V4I64R1]] %[[SelectV4I64R1]] %[[V4I64ConstOne]]
135+
; CHECK: OpReturnValue %[[SelectV4I64R2]]
136+
define range(i64 -1, 2) <4 x i64> @test_v4i64(<4 x i64> noundef %0, <4 x i64> noundef %1) {
137+
%3 = tail call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %0, <4 x i64> %1)
138+
ret <4 x i64> %3
139+
}
140+
141+
declare i8 @llvm.scmp.i8.i8(i8, i8)
142+
declare i16 @llvm.scmp.i16.i16(i16, i16)
143+
declare i32 @llvm.scmp.i32.i32(i32, i32)
144+
declare i64 @llvm.scmp.i64.i64(i64, i64)
145+
declare <4 x i8> @llvm.scmp.v4i8.v4i8(<4 x i8>, <4 x i8>)
146+
declare <4 x i16> @llvm.scmp.v4i16.v4i16(<4 x i16>, <4 x i16>)
147+
declare <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32>, <4 x i32>)
148+
declare <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64>, <4 x i64>)

0 commit comments

Comments
 (0)